aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSlaren <2141330+slaren@users.noreply.github.com>2023-03-29 08:31:26 +0200
committerJustine Tunney <jtunney@gmail.com>2023-03-30 12:28:25 -0700
commit276e5b781155e3bbe6834472c58f03dfe62efabe (patch)
tree9c6c5126211a43cfe4b04b4a340e7de0d7ada830
parentd68c5dc4356c8f49e933df210f2ceca5002a8118 (diff)
Unmap the file in llama_free
-rw-r--r--llama.cpp41
1 files changed, 31 insertions, 10 deletions
diff --git a/llama.cpp b/llama.cpp
index 096735c..0c220e4 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -149,6 +149,10 @@ struct llama_model {
// the model memory buffer
std::vector<uint8_t> buf;
+ // model memory mapped file
+ void * mm_addr;
+ size_t mm_length;
+
// tensors
int n_loaded;
std::unordered_map<std::string, struct ggml_tensor *> tensors;
@@ -296,22 +300,32 @@ struct llama_context_params llama_context_default_params() {
// model loading
//
-static void * mmap_file(const char* fname) {
+static void mmap_file(const char* fname, void * &mm_addr, size_t &mm_length) {
#if defined(MAP_FAILED)
- // POSIX mmap
+ // POSIX
int fd = open(fname, O_RDONLY);
- size_t len = lseek(fd, 0, SEEK_END);
- void * mm_addr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
+ mm_length = lseek(fd, 0, SEEK_END);
+ mm_addr = mmap(NULL, mm_length, PROT_READ, MAP_SHARED, fd, 0);
+ close(fd);
if (mm_addr == MAP_FAILED) {
perror("mmap failed");
mm_addr = NULL;
+ mm_length = 0;
}
- close(fd);
- return mm_addr;
#else
// TODO: windows support
(void)(fname); // suppress warnings
- return NULL;
+#endif
+}
+
+static void munmap_file(void * addr, size_t length) {
+#if defined(MAP_FAILED)
+ // POSIX
+ munmap(addr, length);
+#else
+ // TODO: windows support
+ (void)(addr); // suppress warnings
+ (void)(length);
#endif
}
@@ -480,12 +494,15 @@ static bool llama_model_load(
bool use_mmap = (n_parts == 1);
// try to memory map the model file
- void* mm_addr = NULL;
+ void * mm_addr = NULL;
if (use_mmap) {
- mm_addr = mmap_file(fname.c_str());
- if (mm_addr == NULL) {
+ mmap_file(fname.c_str(), model.mm_addr, model.mm_length);
+ if (model.mm_addr == NULL) {
use_mmap = false;
}
+ else {
+ mm_addr = model.mm_addr;
+ }
}
auto & ctx = model.ctx;
@@ -1750,6 +1767,10 @@ void llama_free(struct llama_context * ctx) {
ggml_free(ctx->model.ctx);
}
+ if (ctx->model.mm_addr) {
+ munmap_file(ctx->model.mm_addr, ctx->model.mm_length);
+ }
+
delete ctx;
}