aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
Diffstat (limited to 'examples')
-rw-r--r--examples/benchmark/benchmark-matmult.cpp3
-rw-r--r--examples/embedding/embedding.cpp2
-rw-r--r--examples/main/main.cpp3
-rw-r--r--examples/perplexity/perplexity.cpp2
-rw-r--r--examples/quantize/quantize.cpp21
5 files changed, 14 insertions, 17 deletions
diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp
index 446b8e8..9f9ed9d 100644
--- a/examples/benchmark/benchmark-matmult.cpp
+++ b/examples/benchmark/benchmark-matmult.cpp
@@ -1,6 +1,7 @@
-#include <locale.h>
#include "ggml.h"
#include "build-info.h"
+
+#include <locale.h>
#include <assert.h>
#include <math.h>
#include <cstring>
diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
index c24f7f8..03603b1 100644
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -31,6 +31,8 @@ int main(int argc, char ** argv) {
params.prompt = gpt_random_prompt(rng);
}
+ llama_init_backend();
+
llama_context * ctx;
// load the model
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 4d886f8..47b418d 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -96,8 +96,7 @@ int main(int argc, char ** argv) {
params.prompt = gpt_random_prompt(rng);
}
-// params.prompt = R"(// this function checks if the number n is prime
-//bool is_prime(int n) {)";
+ llama_init_backend();
llama_context * ctx;
g_ctx = &ctx;
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
index 9d38626..e19c682 100644
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -143,6 +143,8 @@ int main(int argc, char ** argv) {
params.prompt = gpt_random_prompt(rng);
}
+ llama_init_backend();
+
llama_context * ctx;
// load the model and apply lora adapter, if any
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
index 115d8fb..769dd36 100644
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@@ -1,7 +1,7 @@
-#include "ggml.h"
-#include "llama.h"
#include "build-info.h"
+#include "llama.h"
+
#include <cstdio>
#include <map>
#include <string>
@@ -42,8 +42,6 @@ bool try_parse_ftype(const std::string & ftype_str, llama_ftype & ftype, std::st
// ./quantize models/llama/ggml-model.bin [models/llama/ggml-model-quant.bin] type [nthreads]
//
int main(int argc, char ** argv) {
- ggml_time_init();
-
if (argc < 3) {
fprintf(stderr, "usage: %s model-f32.bin [model-quant.bin] type [nthreads]\n", argv[0]);
for (auto it = LLAMA_FTYPE_MAP.begin(); it != LLAMA_FTYPE_MAP.end(); it++) {
@@ -52,12 +50,7 @@ int main(int argc, char ** argv) {
return 1;
}
- // needed to initialize f16 tables
- {
- struct ggml_init_params params = { 0, NULL, false };
- struct ggml_context * ctx = ggml_init(params);
- ggml_free(ctx);
- }
+ llama_init_backend();
// parse command line arguments
const std::string fname_inp = argv[1];
@@ -116,25 +109,25 @@ int main(int argc, char ** argv) {
}
fprintf(stderr, "\n");
- const int64_t t_main_start_us = ggml_time_us();
+ const int64_t t_main_start_us = llama_time_us();
int64_t t_quantize_us = 0;
// load the model
{
- const int64_t t_start_us = ggml_time_us();
+ const int64_t t_start_us = llama_time_us();
if (llama_model_quantize(fname_inp.c_str(), fname_out.c_str(), ftype, nthread)) {
fprintf(stderr, "%s: failed to quantize model from '%s'\n", __func__, fname_inp.c_str());
return 1;
}
- t_quantize_us = ggml_time_us() - t_start_us;
+ t_quantize_us = llama_time_us() - t_start_us;
}
// report timing
{
- const int64_t t_main_end_us = ggml_time_us();
+ const int64_t t_main_end_us = llama_time_us();
printf("\n");
printf("%s: quantize time = %8.2f ms\n", __func__, t_quantize_us/1000.0);