diff options
author | Rand Xie <randxiexyy29@gmail.com> | 2023-07-28 01:42:53 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-28 11:42:53 +0300 |
commit | 65cdf34bdc469fa86248e667a5880992684ef114 (patch) | |
tree | f58f467c3f33f93819d2d45e8fbdf7cf630a45ec | |
parent | edcc7ae7d26007bbf83136e9d33f863fcad9b871 (diff) |
llama : use n_embd_gqa instead of n_embd to handle llama-2 70B (#2433)
-rw-r--r-- | examples/save-load-state/save-load-state.cpp | 1 | ||||
-rw-r--r-- | llama.cpp | 4 |
2 files changed, 3 insertions, 2 deletions
diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp index 4c86885..61c71c3 100644 --- a/examples/save-load-state/save-load-state.cpp +++ b/examples/save-load-state/save-load-state.cpp @@ -26,6 +26,7 @@ int main(int argc, char ** argv) { auto lparams = llama_context_default_params(); lparams.n_ctx = params.n_ctx; + lparams.n_gqa = params.n_gqa; lparams.seed = params.seed; lparams.f16_kv = params.memory_f16; lparams.use_mmap = params.use_mmap; @@ -3663,7 +3663,7 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) { const auto & kv_self = ctx->kv_self; const auto & hparams = ctx->model.hparams; const int n_layer = hparams.n_layer; - const int n_embd = hparams.n_embd; + const int n_embd = hparams.n_embd_gqa(); const int n_ctx = hparams.n_ctx; const size_t kv_size = kv_self.buf.size; @@ -3766,7 +3766,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) { const auto & kv_self = ctx->kv_self; const auto & hparams = ctx->model.hparams; const int n_layer = hparams.n_layer; - const int n_embd = hparams.n_embd; + const int n_embd = hparams.n_embd_gqa(); const int n_ctx = hparams.n_ctx; size_t kv_size; |