aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHenri Vasserman <henv@hot.ee>2023-05-30 21:24:22 +0300
committerGitHub <noreply@github.com>2023-05-30 21:24:22 +0300
commitffb06a345e3a9e30d39aaa5b46a23201a74be6de (patch)
tree382e4e4538881565126bf0bb9232da116ba851cb
parent7552ac586380f202b75b18aa216ecfefbd438d94 (diff)
OpenLLaMA 3B support (#1588)
This adds support to llama.cpp to load the model. Currently missing are changes that are required from convert.py to convert the model correctly. It needs some changes to start reading the JSON configuration for HF models instead of deriving the values by guessing. Co-authored-by: FNsi <125447286+FNsi@users.noreply.github.com>
-rw-r--r--llama.cpp7
1 files changed, 7 insertions, 0 deletions
diff --git a/llama.cpp b/llama.cpp
index 5a19316..47b4c8d 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -42,6 +42,7 @@
// available llama models
enum e_model {
MODEL_UNKNOWN,
+ MODEL_3B,
MODEL_7B,
MODEL_13B,
MODEL_30B,
@@ -58,6 +59,7 @@ static const size_t MB = 1024*1024;
static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0()
{
static std::map<e_model, size_t> k_sizes = {
+ { MODEL_3B, 128ull * MB },
{ MODEL_7B, 512ull * MB },
{ MODEL_13B, 512ull * MB },
{ MODEL_30B, 512ull * MB },
@@ -69,6 +71,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0()
static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1()
{
static std::map<e_model, size_t> k_sizes = {
+ { MODEL_3B, 128ull * MB },
{ MODEL_7B, 512ull * MB },
{ MODEL_13B, 512ull * MB },
{ MODEL_30B, 512ull * MB },
@@ -81,6 +84,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1()
static const std::map<e_model, size_t> & MEM_REQ_KV_SELF()
{
static std::map<e_model, size_t> k_sizes = {
+ { MODEL_3B, 682ull * MB },
{ MODEL_7B, 1026ull * MB },
{ MODEL_13B, 1608ull * MB },
{ MODEL_30B, 3124ull * MB },
@@ -94,6 +98,7 @@ static const std::map<e_model, size_t> & MEM_REQ_KV_SELF()
static const std::map<e_model, size_t> & MEM_REQ_EVAL()
{
static std::map<e_model, size_t> k_sizes = {
+ { MODEL_3B, 512ull * MB },
{ MODEL_7B, 768ull * MB },
{ MODEL_13B, 1024ull * MB },
{ MODEL_30B, 1280ull * MB },
@@ -899,6 +904,7 @@ static const char *llama_ftype_name(enum llama_ftype ftype) {
static const char *llama_model_type_name(e_model type) {
switch (type) {
+ case MODEL_3B: return "3B";
case MODEL_7B: return "7B";
case MODEL_13B: return "13B";
case MODEL_30B: return "30B";
@@ -932,6 +938,7 @@ static void llama_model_load_internal(
{
switch (hparams.n_layer) {
+ case 26: model.type = e_model::MODEL_3B; break;
case 32: model.type = e_model::MODEL_7B; break;
case 40: model.type = e_model::MODEL_13B; break;
case 60: model.type = e_model::MODEL_30B; break;