16 files changed, 89 insertions, 38 deletions
diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp
index 0add6ad..50e14c4 100644
--- a/examples/baby-llama/baby-llama.cpp
+++ b/examples/baby-llama/baby-llama.cpp
@@ -4,6 +4,10 @@
 #include <random>
 #include <cstring>
 
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
 float frand() {
     return (float)rand()/(float)RAND_MAX;
 }
@@ -1470,7 +1474,7 @@ struct ggml_tensor * square_error_loss(struct ggml_context * ctx, struct ggml_te
 }
 
 struct ggml_tensor * cross_entropy_loss(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) {
-    const float eps = 1e-3;
+    const float eps = 1e-3f;
     return
         ggml_sum(ctx,
             ggml_neg(ctx,
diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp
index 9f9ed9d..39d15ca 100644
--- a/examples/benchmark/benchmark-matmult.cpp
+++ b/examples/benchmark/benchmark-matmult.cpp
@@ -16,6 +16,10 @@
 #include <iterator>
 #include <algorithm>
 
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
 float tensor_sum_elements(const ggml_tensor * tensor) {
     float sum = 0;
     if (tensor->type==GGML_TYPE_F32) {
@@ -29,9 +33,9 @@ float tensor_sum_elements(const ggml_tensor * tensor) {
 }
 
 void tensor_dump(const ggml_tensor * tensor, const char * name) {
-    printf("%15s: type = %i (%5s) ne = %5d x %5d x %5d, nb = (%5li, %5li, %5li) - ", name,
+    printf("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi) - ", name,
         tensor->type, ggml_type_name(tensor->type),
-        (int) tensor->ne[0], (int) tensor->ne[1], (int) tensor->ne[2], tensor->nb[0], tensor->nb[1], tensor->nb[2]);
+        tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->nb[0], tensor->nb[1], tensor->nb[2]);
     float sum = tensor_sum_elements(tensor);
     printf("Sum of tensor %s is %6.2f\n", name, sum);
 }
@@ -120,7 +124,7 @@ int main(int argc, char ** argv)  {
     ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); // BLAS
     ctx_size += 1024*1024*16;
 
-    printf("Allocating Memory of size %li bytes, %li MB\n",ctx_size, (ctx_size/1024/1024));
+    printf("Allocating Memory of size %zi bytes, %zi MB\n",ctx_size, (ctx_size/1024/1024));
 
     struct ggml_init_params params = {
         /*.mem_size   =*/ ctx_size,
diff --git a/examples/common.cpp b/examples/common.cpp
index b47f062..055383b 100644
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -28,6 +28,10 @@
 #include <wchar.h>
 #endif
 
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
 int32_t get_num_physical_cores() {
 #ifdef __linux__
     // enumerate the set of thread siblings, num entries is num cores
@@ -373,7 +377,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
                 } else {
                     throw std::exception();
                 }
-            } catch (const std::exception &e) {
+            } catch (const std::exception&) {
                 invalid_param = true;
                 break;
             }
diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
index 03603b1..860f99f 100644
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -4,6 +4,10 @@
 
 #include <ctime>
 
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
 int main(int argc, char ** argv) {
     gpt_params params;
 
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index efa913e..ef9e75f 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -28,6 +28,10 @@
 #include <signal.h>
 #endif
 
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
 static console_state con_st;
 static llama_context ** g_ctx;
 
@@ -348,7 +352,7 @@ int main(int argc, char ** argv) {
             if ((int)embd.size() > max_embd_size) {
                 auto skipped_tokens = embd.size() - max_embd_size;
                 console_set_color(con_st, CONSOLE_COLOR_ERROR);
-                printf("<<input too long: skipped %ld token%s>>", skipped_tokens, skipped_tokens != 1 ? "s" : "");
+                printf("<<input too long: skipped %" PRIu64 "  token%s>>", skipped_tokens, skipped_tokens != 1 ? "s" : "");
                 console_set_color(con_st, CONSOLE_COLOR_DEFAULT);
                 fflush(stdout);
                 embd.resize(max_embd_size);
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
index e19c682..ae8cfe0 100644
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -5,6 +5,10 @@
 #include <cmath>
 #include <ctime>
 
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
 std::vector<float> softmax(const std::vector<float>& logits) {
     std::vector<float> probs(logits.size());
     float max_logit = logits[0];
diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp
index 6e4f7e1..6b8018e 100644
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@@ -19,6 +19,10 @@
 #include <thread>
 #include <mutex>
 
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
 struct quantize_stats_params {
     std::string model = "models/7B/ggml-model-f16.bin";
     bool verbose = false;
diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp
index 91f04b6..da4d37a 100644
--- a/examples/save-load-state/save-load-state.cpp
+++ b/examples/save-load-state/save-load-state.cpp
@@ -37,7 +37,7 @@ int main(int argc, char ** argv) {
     // init
     auto ctx = llama_init_from_file(params.model.c_str(), lparams);
     auto tokens = std::vector<llama_token>(params.n_ctx);
-    auto n_prompt_tokens = llama_tokenize(ctx, params.prompt.c_str(), tokens.data(), tokens.size(), true);
+    auto n_prompt_tokens = llama_tokenize(ctx, params.prompt.c_str(), tokens.data(), int(tokens.size()), true);
 
     if (n_prompt_tokens < 1) {
         fprintf(stderr, "%s : failed to tokenize prompt\n", __func__);
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
index 51271b4..7ec8595 100644
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -12,6 +12,9 @@
 #include <algorithm>
 #include <string>
 
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
 
 struct random_normal_distribution {
     std::mt19937 gen;
@@ -20,7 +23,6 @@ struct random_normal_distribution {
     float max;
 };
 
-
 struct random_uniform_distribution {
     std::mt19937 gen;
     std::uniform_real_distribution<float> rd;
@@ -2366,7 +2368,7 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) {
         file->write_u32(0);
         file->write_u32(0);
         file->write_u32(GGML_TYPE_F32);
-        file->seek(-file->tell() & 31, SEEK_CUR);
+        file->seek(0-file->tell() & 31, SEEK_CUR);
         return;
     }
     const char * name = ggml_get_name(tensor);
@@ -2381,7 +2383,7 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) {
     file->write_u32(tensor->type);
     file->write_raw(ne, sizeof(ne[0]) * nd);
     file->write_raw(name, name_len);
-    file->seek(-file->tell() & 31, SEEK_CUR);
+    file->seek(0-file->tell() & 31, SEEK_CUR);
     file->write_raw(tensor->data, ggml_nbytes(tensor));
 }
 
@@ -2402,7 +2404,7 @@ void read_tensor(struct llama_file * file, struct ggml_tensor * tensor) {
     std::string name = file->read_string(name_len);
     GGML_ASSERT(strncmp(ggml_get_name(tensor), name.c_str(), sizeof(tensor->name)-1) == 0);
 
-    file->seek(-file->tell() & 31, SEEK_CUR);
+    file->seek(0-file->tell() & 31, SEEK_CUR);
     file->read_raw(tensor->data, ggml_nbytes(tensor));
 }
 
@@ -2756,8 +2758,8 @@ struct train_params get_default_train_params() {
 
     params.lbfgs_n_iter      = 16;
     params.adam_n_iter       = 16;
-    params.adam_alpha        = 1e-3;
-    params.adam_decay        = 1e-3;
+    params.adam_alpha        = 1e-3f;
+    params.adam_decay        = 1e-3f;
 
     params.mem_model_gb   = 2;
     params.mem_compute_gb = 24;
@@ -3331,8 +3333,8 @@ int main(int argc, char ** argv) {
         int n_gen = params.n_predict;
         int sample_ctx = n_tokens - n_tokens/8;
 
-        sampler.params.temp = 0.2;
-        sampler.params.repeat_penalty = 1.1;
+        sampler.params.temp = 0.2f;
+        sampler.params.repeat_penalty = 1.1f;
         sampler.params.mirostat = 2;
         init_sampler(&sampler, lctx);
 
diff --git a/ggml.c b/ggml.c
index c0efa19..0eda7f3 100644
--- a/ggml.c
+++ b/ggml.c
@@ -35,6 +35,12 @@
 #define static_assert(cond, msg) struct global_scope_noop_trick
 #endif
 
+#if defined(_MSC_VER)
+// disable "possible loss of data" to avoid hundreds of casts
+// we should just be careful :)
+#pragma warning(disable: 4244 4267)
+#endif
+
 #if defined(_WIN32)
 
 #include <windows.h>
diff --git a/llama.cpp b/llama.cpp
index b8bc0d8..a904388 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -40,6 +40,10 @@
 #include <sstream>
 #include <numeric>
 
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
 #define LLAMA_USE_SCRATCH
 #define LLAMA_MAX_SCRATCH_BUFFERS 16
 
diff --git a/pocs/vdot/vdot.cpp b/pocs/vdot/vdot.cpp
index 26bf50c..7b18090 100644
--- a/pocs/vdot/vdot.cpp
+++ b/pocs/vdot/vdot.cpp
@@ -10,6 +10,10 @@
 
 #include <ggml.h>
 
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
 constexpr int kVecSize = 1 << 18;
 
 float drawFromGaussianPdf(std::mt19937& rndm) {
diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp
index 728460b..c40f1b2 100644
--- a/tests/test-quantize-fns.cpp
+++ b/tests/test-quantize-fns.cpp
@@ -9,12 +9,15 @@
 #include <string>
 #include <vector>
 
-
-const float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001;
-const float MAX_QUANTIZATION_TOTAL_ERROR = 0.002;
-const float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075;
-const float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040;
-const float MAX_DOT_PRODUCT_ERROR = 0.02;
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
+const float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001f;
+const float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f;
+const float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f;
+const float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f;
+const float MAX_DOT_PRODUCT_ERROR = 0.02f;
 
 const char* RESULT_STR[] = {"ok", "FAILED"};
 
diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp
index d551445..6003757 100644
--- a/tests/test-quantize-perf.cpp
+++ b/tests/test-quantize-perf.cpp
@@ -13,6 +13,10 @@
 #include <string>
 #include <vector>
 
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
 #define MAX_ALIGNMENT 64
 #define QK 32
 #define WARMUP 5
diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp
index 0e67512..5d693f7 100644
--- a/tests/test-sampling.cpp
+++ b/tests/test-sampling.cpp
@@ -176,27 +176,27 @@ void test_frequency_presence_penalty(
 int main(void) {
     ggml_time_init();
 
-    test_top_k({0.1, 0.2, 0.3, 0.4}, {0.4}, 1);
-    test_top_k({0.1, 0.2, 0.3, 0.4}, {0.4, 0.3, 0.2}, 3);
+    test_top_k({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f}, 1);
+    test_top_k({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f}, 3);
 
-    test_top_p({0.1, 0.2, 0.3, 0.4}, {0.4}, 0);
-    test_top_p({0.1, 0.2, 0.3, 0.4}, {0.4, 0.3}, 0.7);
-    test_top_p({0.1, 0.2, 0.3, 0.4}, {0.4, 0.3, 0.2, 0.1}, 1);
+    test_top_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f}, 0);
+    test_top_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f}, 0.7f);
+    test_top_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 1);
 
-    test_tfs({0.1, 0.15, 0.2, 0.25, 0.3}, {0.3}, 0.25);
-    test_tfs({0.1, 0.15, 0.2, 0.25, 0.3}, {0.3, 0.25}, 0.75);
-    test_tfs({0.1, 0.15, 0.2, 0.25, 0.3}, {0.3, 0.25}, 0.99);
+    test_tfs({0.1f, 0.15f, 0.2f, 0.25f, 0.3f}, {0.3f}, 0.25f);
+    test_tfs({0.1f, 0.15f, 0.2f, 0.25f, 0.3f}, {0.3f, 0.25f}, 0.75f);
+    test_tfs({0.1f, 0.15f, 0.2f, 0.25f, 0.3f}, {0.3f, 0.25f}, 0.99f);
 
-    test_typical({0.97, 0.01, 0.01, 0.01}, {0.97}, 0.5);
-    test_typical({0.4, 0.2, 0.2, 0.2}, {0.2, 0.2, 0.2}, 0.5);
+    test_typical({0.97f, 0.01f, 0.01f, 0.01f}, {0.97f}, 0.5f);
+    test_typical({0.4f, 0.2f, 0.2f, 0.2f}, {0.2f, 0.2f, 0.2f}, 0.5f);
 
-    test_repetition_penalty({0.2, 0.2, 0.2, 0.2, 0.2}, {0}, {0.25, 0.25, 0.25, 0.25, 0}, 50.0);
-    test_repetition_penalty({0.2, 0.2, 0.2, 0.2, 0.2}, {0, 1, 2}, {0.5, 0.5, 0, 0, 0}, 50.0);
-    test_repetition_penalty({0.2, 0.2, 0.2, 0.2, 0.2}, {0, 1, 2, 0, 0}, {0.5, 0.5, 0, 0, 0}, 50.0);
+    test_repetition_penalty({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0}, {0.25f, 0.25f, 0.25f, 0.25f, 0}, 50.0f);
+    test_repetition_penalty({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0, 1, 2}, {0.5f, 0.5f, 0, 0, 0}, 50.0f);
+    test_repetition_penalty({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0, 1, 2, 0, 0}, {0.5f, 0.5f, 0, 0, 0}, 50.0f);
 
-    test_frequency_presence_penalty({0.2, 0.2, 0.2, 0.2, 0.2}, {0},             {0.249997, 0.249997, 0.249997, 0.249997, 0.000011}, 5.0, 5.0);
-    test_frequency_presence_penalty({0.2, 0.2, 0.2, 0.2, 0.2}, {0, 1, 2},       {0.499966, 0.499966, 0.000023, 0.000023, 0.000023}, 5.0, 5.0);
-    test_frequency_presence_penalty({0.2, 0.2, 0.2, 0.2, 0.2}, {0, 1, 2, 0, 0}, {0.499977, 0.499977, 0.000023, 0.000023, 0.000000}, 5.0, 5.0);
+    test_frequency_presence_penalty({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0},             {0.249997f, 0.249997f, 0.249997f, 0.249997f, 0.000011f}, 5.0f, 5.0f);
+    test_frequency_presence_penalty({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0, 1, 2},       {0.499966f, 0.499966f, 0.000023f, 0.000023f, 0.000023f}, 5.0f, 5.0f);
+    test_frequency_presence_penalty({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0, 1, 2, 0, 0}, {0.499977f, 0.499977f, 0.000023f, 0.000023f, 0.000000f}, 5.0f, 5.0f);
 
     printf("OK\n");
 }
diff --git a/tests/test-tokenizer-0.cpp b/tests/test-tokenizer-0.cpp
index b089845..ab1538a 100644
--- a/tests/test-tokenizer-0.cpp
+++ b/tests/test-tokenizer-0.cpp
@@ -53,7 +53,7 @@ int main(int argc, char **argv) {
 
     for (const auto & test_kv : k_tests()) {
         std::vector<llama_token> res(test_kv.first.size());
-        const int n = llama_tokenize(ctx, test_kv.first.c_str(), res.data(), res.size(), true);
+        const int n = llama_tokenize(ctx, test_kv.first.c_str(), res.data(), int(res.size()), true);
         res.resize(n);
 
         bool correct = res.size() == test_kv.second.size();