aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBernat Vadell <hounter.caza@gmail.com>2023-03-17 10:47:06 +0100
committerGitHub <noreply@github.com>2023-03-17 10:47:06 +0100
commit2af23d30434a677c6416812eea52ccc0af65119c (patch)
tree900c5ac5ceef13f65194ca8334cde41ed3590c09
parent904d2a8d6acd667c9633138d45a361d40fbf76d0 (diff)
🚀 Dockerize llamacpp (#132)
* feat: dockerize llamacpp * feat: split build & runtime stages * split dockerfile into main & tools * add quantize into tool docker image * Update .devops/tools.sh Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * add docker action pipeline * change CI to publish at github docker registry * fix name runs-on macOS-latest is macos-latest (lowercase) * include docker versioned images * fix github action docker * fix docker.yml * feat: include all-in-one command tool & update readme.md --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
-rw-r--r--.devops/full.Dockerfile17
-rw-r--r--.devops/main.Dockerfile18
-rwxr-xr-x.devops/tools.sh46
-rw-r--r--.dockerignore24
-rw-r--r--.github/workflows/build.yml2
-rw-r--r--.github/workflows/docker.yml61
-rw-r--r--README.md32
-rw-r--r--convert-pth-to-ggml.py6
-rw-r--r--download-pth.py66
9 files changed, 270 insertions, 2 deletions
diff --git a/.devops/full.Dockerfile b/.devops/full.Dockerfile
new file mode 100644
index 0000000..618cddd
--- /dev/null
+++ b/.devops/full.Dockerfile
@@ -0,0 +1,17 @@
+ARG UBUNTU_VERSION=22.04
+
+FROM ubuntu:$UBUNTU_VERSION as build
+
+RUN apt-get update && \
+ apt-get install -y build-essential python3 python3-pip
+
+RUN pip install --upgrade pip setuptools wheel \
+ && pip install torch torchvision torchaudio sentencepiece numpy
+
+WORKDIR /app
+
+COPY . .
+
+RUN make
+
+ENTRYPOINT ["/app/.devops/tools.sh"] \ No newline at end of file
diff --git a/.devops/main.Dockerfile b/.devops/main.Dockerfile
new file mode 100644
index 0000000..cd575ef
--- /dev/null
+++ b/.devops/main.Dockerfile
@@ -0,0 +1,18 @@
+ARG UBUNTU_VERSION=22.04
+
+FROM ubuntu:$UBUNTU_VERSION as build
+
+RUN apt-get update && \
+ apt-get install -y build-essential
+
+WORKDIR /app
+
+COPY . .
+
+RUN make
+
+FROM ubuntu:$UBUNTU_VERSION as runtime
+
+COPY --from=build /app/main /main
+
+ENTRYPOINT [ "/main" ] \ No newline at end of file
diff --git a/.devops/tools.sh b/.devops/tools.sh
new file mode 100755
index 0000000..b5711c9
--- /dev/null
+++ b/.devops/tools.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+set -e
+
+# Read the first argument into a variable
+arg1="$1"
+
+# Shift the arguments to remove the first one
+shift
+
+# Join the remaining arguments into a single string
+arg2="$@"
+
+if [[ $arg1 == '--convert' || $arg1 == '-c' ]]; then
+ python3 ./convert-pth-to-ggml.py $arg2
+elif [[ $arg1 == '--quantize' || $arg1 == '-q' ]]; then
+ ./quantize $arg2
+elif [[ $arg1 == '--run' || $arg1 == '-r' ]]; then
+ ./main $arg2
+elif [[ $arg1 == '--download' || $arg1 == '-d' ]]; then
+ python3 ./download-pth.py $arg2
+elif [[ $arg1 == '--all-in-one' || $arg1 == '-a' ]]; then
+ echo "Downloading model..."
+ python3 ./download-pth.py "$1" "$2"
+ echo "Converting PTH to GGML..."
+ for i in `ls $1/$2/ggml-model-f16.bin*`; do
+ if [ -f "${i/f16/q4_0}" ]; then
+ echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
+ else
+ echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
+ ./quantize "$i" "${i/f16/q4_0}" 2
+ fi
+ done
+else
+ echo "Unknown command: $arg1"
+ echo "Available commands: "
+ echo " --run (-r): Run a model previously converted into ggml"
+ echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -t 8 -n 512"
+ echo " --convert (-c): Convert a llama model into ggml"
+ echo " ex: \"/models/7B/\" 1"
+ echo " --quantize (-q): Optimize with quantization process ggml"
+ echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
+ echo " --download (-d): Download original llama model from CDN: https://agi.gpt4.org/llama/"
+ echo " ex: \"/models/\" 7B"
+ echo " --all-in-one (-a): Execute --download, --convert & --quantize"
+ echo " ex: \"/models/\" 7B"
+fi
diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..952990f
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,24 @@
+*.o
+*.a
+.cache/
+.vs/
+.vscode/
+.DS_Store
+
+build/
+build-em/
+build-debug/
+build-release/
+build-static/
+build-no-accel/
+build-sanitize-addr/
+build-sanitize-thread/
+
+models/*
+
+/main
+/quantize
+
+arm_neon.h
+compile_commands.json
+Dockerfile \ No newline at end of file
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 1a068ae..94f199c 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -19,7 +19,7 @@ jobs:
make
macOS-latest:
- runs-on: macOS-latest
+ runs-on: macos-latest
steps:
- name: Clone
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
new file mode 100644
index 0000000..bc9aff7
--- /dev/null
+++ b/.github/workflows/docker.yml
@@ -0,0 +1,61 @@
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+# GitHub recommends pinning actions to a commit SHA.
+# To get a newer version, you will need to update the SHA.
+# You can also reference a tag or branch, but the action may change without warning.
+
+name: Publish Docker image
+
+on:
+ pull_request:
+ push:
+ branches:
+ - master
+
+jobs:
+ push_to_registry:
+ name: Push Docker image to Docker Hub
+ runs-on: ubuntu-latest
+ env:
+ COMMIT_SHA: ${{ github.sha }}
+ strategy:
+ matrix:
+ config:
+ - { tag: "light", dockerfile: ".devops/main.Dockerfile" }
+ - { tag: "full", dockerfile: ".devops/full.Dockerfile" }
+ steps:
+ - name: Check out the repo
+ uses: actions/checkout@v3
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v2
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v2
+
+ - name: Log in to Docker Hub
+ uses: docker/login-action@v2
+ with:
+ registry: ghcr.io
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Build and push Docker image (versioned)
+ if: github.event_name == 'push'
+ uses: docker/build-push-action@v4
+ with:
+ context: .
+ push: true
+ tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
+ file: ${{ matrix.config.dockerfile }}
+
+ - name: Build and push Docker image (tagged)
+ uses: docker/build-push-action@v4
+ with:
+ context: .
+ push: ${{ github.event_name == 'push' }}
+ tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}"
+ file: ${{ matrix.config.dockerfile }} \ No newline at end of file
diff --git a/README.md b/README.md
index 15e1b9a..8cf59f4 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,7 @@ Supported platforms:
- [X] Mac OS
- [X] Linux
- [X] Windows (via CMake)
+- [X] Docker
---
@@ -194,6 +195,37 @@ Finally, copy the `llama` binary and the model files to your device storage. Her
https://user-images.githubusercontent.com/271616/225014776-1d567049-ad71-4ef2-b050-55b0b3b9274c.mp4
+### Docker
+
+#### Prerequisites
+* Docker must be installed and running on your system.
+* Create a folder to store big models & intermediate files (in ex. im using /llama/models)
+
+#### Images
+We have two Docker images available for this project:
+
+1. `ghcr.io/ggerganov/llama.cpp:full`: This image includes both the main executable file and the tools to convert LLaMA models into ggml and convert into 4-bit quantization.
+2. `ghcr.io/ggerganov/llama.cpp:light`: This image only includes the main executable file.
+
+#### Usage
+
+The easiest way to download the models, convert them to ggml and optimize them is with the --all-in-one command which includes the full docker image.
+
+ ```bash
+docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:full --all-in-one "/models/" 7B
+```
+
+On complete, you are ready to play!
+
+```bash
+docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:full --run -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -t 8 -n 512
+```
+
+or with light image:
+
+```bash
+docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -t 8 -n 512
+```
## Limitations
diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py
index 5c36e9c..d0eb213 100644
--- a/convert-pth-to-ggml.py
+++ b/convert-pth-to-ggml.py
@@ -16,7 +16,7 @@
# At the start of the ggml file we write the model parameters
# and vocabulary.
#
-
+import os
import sys
import json
import struct
@@ -64,6 +64,10 @@ if len(sys.argv) > 2:
sys.exit(1)
fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin"
+if os.path.exists(fname_out):
+ print(f"Skip conversion, it already exists: {fname_out}")
+ sys.exit(0)
+
with open(fname_hparams, "r") as f:
hparams = json.load(f)
diff --git a/download-pth.py b/download-pth.py
new file mode 100644
index 0000000..129532c
--- /dev/null
+++ b/download-pth.py
@@ -0,0 +1,66 @@
+import os
+import sys
+from tqdm import tqdm
+import requests
+
+if len(sys.argv) < 3:
+ print("Usage: download-pth.py dir-model model-type\n")
+ print(" model-type: Available models 7B, 13B, 30B or 65B")
+ sys.exit(1)
+
+modelsDir = sys.argv[1]
+model = sys.argv[2]
+
+num = {
+ "7B": 1,
+ "13B": 2,
+ "30B": 4,
+ "65B": 8,
+}
+
+if model not in num:
+ print(f"Error: model {model} is not valid, provide 7B, 13B, 30B or 65B")
+ sys.exit(1)
+
+print(f"Downloading model {model}")
+
+files = ["checklist.chk", "params.json"]
+
+for i in range(num[model]):
+ files.append(f"consolidated.0{i}.pth")
+
+resolved_path = os.path.abspath(os.path.join(modelsDir, model))
+os.makedirs(resolved_path, exist_ok=True)
+
+for file in files:
+ dest_path = os.path.join(resolved_path, file)
+
+ if os.path.exists(dest_path):
+ print(f"Skip file download, it already exists: {file}")
+ continue
+
+ url = f"https://agi.gpt4.org/llama/LLaMA/{model}/{file}"
+ response = requests.get(url, stream=True)
+ with open(dest_path, 'wb') as f:
+ with tqdm(unit='B', unit_scale=True, miniters=1, desc=file) as t:
+ for chunk in response.iter_content(chunk_size=1024):
+ if chunk:
+ f.write(chunk)
+ t.update(len(chunk))
+
+files2 = ["tokenizer_checklist.chk", "tokenizer.model"]
+for file in files2:
+ dest_path = os.path.join(modelsDir, file)
+
+ if os.path.exists(dest_path):
+ print(f"Skip file download, it already exists: {file}")
+ continue
+
+ url = f"https://agi.gpt4.org/llama/LLaMA/{file}"
+ response = requests.get(url, stream=True)
+ with open(dest_path, 'wb') as f:
+ with tqdm(unit='B', unit_scale=True, miniters=1, desc=file) as t:
+ for chunk in response.iter_content(chunk_size=1024):
+ if chunk:
+ f.write(chunk)
+ t.update(len(chunk)) \ No newline at end of file