1 files changed, 3 insertions, 1 deletions
diff --git a/llama.h b/llama.h
index 011e34c..e95ff73 100644
--- a/llama.h
+++ b/llama.h
@@ -93,10 +93,12 @@ extern "C" {
 
     // TODO: not great API - very likely to change
     // Returns 0 on success
+    // nthread - how many threads to use. If <=0, will use std::thread::hardware_concurrency(), else the number given
     LLAMA_API int llama_model_quantize(
             const char * fname_inp,
             const char * fname_out,
-      enum llama_ftype   ftype);
+      enum llama_ftype   ftype,
+            int          nthread);
 
     // Apply a LoRA adapter to a loaded model
     // path_base_model is the path to a higher quality model to use as a base for