aboutsummaryrefslogtreecommitdiff
path: root/utils.h
diff options
context:
space:
mode:
Diffstat (limited to 'utils.h')
-rw-r--r--utils.h61
1 files changed, 3 insertions, 58 deletions
diff --git a/utils.h b/utils.h
index 3129038..3f970ea 100644
--- a/utils.h
+++ b/utils.h
@@ -2,8 +2,9 @@
#pragma once
+#include "llama.h"
+
#include <string>
-#include <unordered_map>
#include <vector>
#include <random>
#include <thread>
@@ -50,63 +51,7 @@ void gpt_print_usage(int argc, char ** argv, const gpt_params & params);
std::string gpt_random_prompt(std::mt19937 & rng);
//
-// Model file parsing
-//
-
-#define FILE_MAGIC_UNVERSIONED 0x67676d6c // pre-versioned files
-#define FILE_MAGIC 0x67676d66 // 'ggmf' in hex
-#define FILE_VERSION 1
-
-//
// Vocab utils
//
-struct llama_vocab {
- using id = int32_t;
- using token = std::string;
-
- struct token_score {
- token tok;
- float score;
- };
-
- std::unordered_map<token, id> token_to_id;
- std::vector<token_score> id_to_token;
-};
-
-void replace(std::string & str, const std::string & needle, const std::string & replacement);
-
-// poor-man's JSON parsing
-std::unordered_map<std::string, int32_t> json_parse(const std::string & fname);
-
-// TODO: temporary until #77 is merged, need this now for some tokenizer tests
-bool llama_vocab_load(const std::string & fname, llama_vocab & vocab);
-
-// TODO: this is probably wrong, but I cannot figure out how this tokenizer works ..
-// ref: https://github.com/google/sentencepiece
-std::vector<llama_vocab::id> llama_tokenize(const llama_vocab & vocab, const std::string & text, bool bos);
-
-// sample next token given probabilities for each embedding
-//
-// - consider only the top K tokens
-// - from them, consider only the top tokens with cumulative probability > P
-//
-llama_vocab::id llama_sample_top_p_top_k(
- const llama_vocab & vocab,
- const float * logits,
- std::vector<llama_vocab::id> & last_n_tokens,
- double repeat_penalty,
- int top_k,
- double top_p,
- double temp,
- std::mt19937 & rng);
-
-// filer to top K tokens from list of logits
-void sample_top_k(std::vector<std::pair<double, llama_vocab::id>> & logits_id, int top_k);
-
-//
-// Quantization
-//
-
-size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t * hist);
-size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t * hist);
+std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos);