From d01bccde9f759b24449fdaa16306b406a50eb367 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 18 Jul 2023 14:24:43 +0300 Subject: ci : integrate with ggml-org/ci (#2250) * ci : run ctest ggml-ci * ci : add open llama 3B-v2 tests ggml-ci * ci : disable wget progress output ggml-ci * ci : add open llama 3B-v2 tg tests for q4 and q5 quantizations ggml-ci * tests : try to fix tail free sampling test ggml-ci * ci : add K-quants ggml-ci * ci : add short perplexity tests ggml-ci * ci : add README.md * ppl : add --chunks argument to limit max number of chunks ggml-ci * ci : update README --- examples/perplexity/perplexity.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'examples/perplexity') diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 7e120ff..bfad999 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -32,13 +32,15 @@ void perplexity(llama_context * ctx, const gpt_params & params) { // BOS tokens will be added for each chunk before eval auto tokens = ::llama_tokenize(ctx, params.prompt, true); - int count = 0; + const int n_chunk_max = tokens.size() / params.n_ctx; - const int n_chunk = tokens.size() / params.n_ctx; + const int n_chunk = params.n_chunks < 0 ? n_chunk_max : std::min(params.n_chunks, n_chunk_max); const int n_vocab = llama_n_vocab(ctx); const int n_batch = params.n_batch; + int count = 0; double nll = 0.0; + fprintf(stderr, "%s: calculating perplexity over %d chunks, batch_size=%d\n", __func__, n_chunk, n_batch); for (int i = 0; i < n_chunk; ++i) { -- cgit v1.2.3 From b1f429095328a34556c0e9a7a2fefced3db3368c Mon Sep 17 00:00:00 2001 From: wzy <32936898+Freed-Wu@users.noreply.github.com> Date: Wed, 19 Jul 2023 15:01:11 +0800 Subject: cmake : install targets (#2256) fix #2252 --- CMakeLists.txt | 25 +++++++++++++++++++++++++ convert-lora-to-ggml.py | 1 + convert.py | 1 + examples/baby-llama/CMakeLists.txt | 1 + examples/benchmark/CMakeLists.txt | 1 + examples/embd-input/CMakeLists.txt | 2 ++ examples/embedding/CMakeLists.txt | 1 + examples/main/CMakeLists.txt | 1 + examples/metal/CMakeLists.txt | 1 + examples/perplexity/CMakeLists.txt | 1 + examples/quantize-stats/CMakeLists.txt | 1 + examples/quantize/CMakeLists.txt | 1 + examples/save-load-state/CMakeLists.txt | 1 + examples/server/CMakeLists.txt | 1 + examples/simple/CMakeLists.txt | 1 + examples/train-text-from-scratch/CMakeLists.txt | 1 + tests/CMakeLists.txt | 1 + 17 files changed, 42 insertions(+) mode change 100644 => 100755 convert-lora-to-ggml.py mode change 100644 => 100755 convert.py (limited to 'examples/perplexity') diff --git a/CMakeLists.txt b/CMakeLists.txt index d9381da..abc9681 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -512,6 +512,7 @@ if (BUILD_SHARED_LIBS) set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON) add_library(ggml_shared SHARED $) target_link_libraries(ggml_shared PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS}) + install(TARGETS ggml_shared LIBRARY) endif() add_library(llama @@ -533,8 +534,32 @@ if (BUILD_SHARED_LIBS) if (LLAMA_METAL) set_target_properties(llama PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal") endif() + install(TARGETS llama LIBRARY) endif() +include(GNUInstallDirs) +install( + FILES convert.py + PERMISSIONS + OWNER_READ + OWNER_WRITE + OWNER_EXECUTE + GROUP_READ + GROUP_EXECUTE + WORLD_READ + WORLD_EXECUTE + DESTINATION ${CMAKE_INSTALL_BINDIR}) +install( + FILES convert-lora-to-ggml.py + PERMISSIONS + OWNER_READ + OWNER_WRITE + OWNER_EXECUTE + GROUP_READ + GROUP_EXECUTE + WORLD_READ + WORLD_EXECUTE + DESTINATION ${CMAKE_INSTALL_BINDIR}) # # programs, examples and tests diff --git a/convert-lora-to-ggml.py b/convert-lora-to-ggml.py old mode 100644 new mode 100755 index f43c836..b4999ff --- a/convert-lora-to-ggml.py +++ b/convert-lora-to-ggml.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python import json import os import re diff --git a/convert.py b/convert.py old mode 100644 new mode 100755 index 7a2705e..e3f1096 --- a/convert.py +++ b/convert.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python import argparse import concurrent.futures import copy diff --git a/examples/baby-llama/CMakeLists.txt b/examples/baby-llama/CMakeLists.txt index d2ce363..7b70227 100644 --- a/examples/baby-llama/CMakeLists.txt +++ b/examples/baby-llama/CMakeLists.txt @@ -1,4 +1,5 @@ set(TARGET baby-llama) add_executable(${TARGET} baby-llama.cpp) +install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) diff --git a/examples/benchmark/CMakeLists.txt b/examples/benchmark/CMakeLists.txt index 0376961..3f34153 100644 --- a/examples/benchmark/CMakeLists.txt +++ b/examples/benchmark/CMakeLists.txt @@ -1,5 +1,6 @@ set(TARGET benchmark) add_executable(${TARGET} benchmark-matmult.cpp) +install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) if(TARGET BUILD_INFO) diff --git a/examples/embd-input/CMakeLists.txt b/examples/embd-input/CMakeLists.txt index 2b62395..5bbb1ea 100644 --- a/examples/embd-input/CMakeLists.txt +++ b/examples/embd-input/CMakeLists.txt @@ -1,5 +1,6 @@ set(TARGET embdinput) add_library(${TARGET} embd-input-lib.cpp embd-input.h) +install(TARGETS ${TARGET} LIBRARY) target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) if(TARGET BUILD_INFO) @@ -8,6 +9,7 @@ endif() set(TARGET embd-input-test) add_executable(${TARGET} embd-input-test.cpp) +install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE common llama embdinput ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) if(TARGET BUILD_INFO) diff --git a/examples/embedding/CMakeLists.txt b/examples/embedding/CMakeLists.txt index db73b6b..0c752c7 100644 --- a/examples/embedding/CMakeLists.txt +++ b/examples/embedding/CMakeLists.txt @@ -1,5 +1,6 @@ set(TARGET embedding) add_executable(${TARGET} embedding.cpp) +install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) if(TARGET BUILD_INFO) diff --git a/examples/main/CMakeLists.txt b/examples/main/CMakeLists.txt index c364242..cc18889 100644 --- a/examples/main/CMakeLists.txt +++ b/examples/main/CMakeLists.txt @@ -1,5 +1,6 @@ set(TARGET main) add_executable(${TARGET} main.cpp) +install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) if(TARGET BUILD_INFO) diff --git a/examples/metal/CMakeLists.txt b/examples/metal/CMakeLists.txt index a8c4284..f16d491 100644 --- a/examples/metal/CMakeLists.txt +++ b/examples/metal/CMakeLists.txt @@ -1,3 +1,4 @@ set(TEST_TARGET metal) add_executable(${TEST_TARGET} metal.cpp) +install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TEST_TARGET} PRIVATE ggml) diff --git a/examples/perplexity/CMakeLists.txt b/examples/perplexity/CMakeLists.txt index 61b17b8..af00b4e 100644 --- a/examples/perplexity/CMakeLists.txt +++ b/examples/perplexity/CMakeLists.txt @@ -1,5 +1,6 @@ set(TARGET perplexity) add_executable(${TARGET} perplexity.cpp) +install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) if(TARGET BUILD_INFO) diff --git a/examples/quantize-stats/CMakeLists.txt b/examples/quantize-stats/CMakeLists.txt index 7bebc11..c5c3940 100644 --- a/examples/quantize-stats/CMakeLists.txt +++ b/examples/quantize-stats/CMakeLists.txt @@ -1,4 +1,5 @@ set(TARGET quantize-stats) add_executable(${TARGET} quantize-stats.cpp) +install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) diff --git a/examples/quantize/CMakeLists.txt b/examples/quantize/CMakeLists.txt index 475fc8b..47d0be7 100644 --- a/examples/quantize/CMakeLists.txt +++ b/examples/quantize/CMakeLists.txt @@ -1,5 +1,6 @@ set(TARGET quantize) add_executable(${TARGET} quantize.cpp) +install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) if(TARGET BUILD_INFO) diff --git a/examples/save-load-state/CMakeLists.txt b/examples/save-load-state/CMakeLists.txt index 08dbe5c..eadd13c 100644 --- a/examples/save-load-state/CMakeLists.txt +++ b/examples/save-load-state/CMakeLists.txt @@ -1,5 +1,6 @@ set(TARGET save-load-state) add_executable(${TARGET} save-load-state.cpp) +install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) if(TARGET BUILD_INFO) diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt index 07ba76a..812a24b 100644 --- a/examples/server/CMakeLists.txt +++ b/examples/server/CMakeLists.txt @@ -2,6 +2,7 @@ set(TARGET server) option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) add_executable(${TARGET} server.cpp json.hpp httplib.h) +install(TARGETS ${TARGET} RUNTIME) target_compile_definitions(${TARGET} PRIVATE SERVER_VERBOSE=$ ) diff --git a/examples/simple/CMakeLists.txt b/examples/simple/CMakeLists.txt index 1568f73..0ac9cb0 100644 --- a/examples/simple/CMakeLists.txt +++ b/examples/simple/CMakeLists.txt @@ -1,5 +1,6 @@ set(TARGET simple) add_executable(${TARGET} simple.cpp) +install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) if(TARGET BUILD_INFO) diff --git a/examples/train-text-from-scratch/CMakeLists.txt b/examples/train-text-from-scratch/CMakeLists.txt index 1a44c49..4459516 100644 --- a/examples/train-text-from-scratch/CMakeLists.txt +++ b/examples/train-text-from-scratch/CMakeLists.txt @@ -1,4 +1,5 @@ set(TARGET train-text-from-scratch) add_executable(${TARGET} train-text-from-scratch.cpp) +install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1acf050..11ec6c7 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,6 +1,7 @@ function(llama_add_test source) get_filename_component(TEST_TARGET ${source} NAME_WE) add_executable(${TEST_TARGET} ${source}) + install(TARGETS ${TEST_TARGET} RUNTIME) target_link_libraries(${TEST_TARGET} PRIVATE llama) add_test(NAME ${TEST_TARGET} COMMAND $ ${ARGN}) endfunction() -- cgit v1.2.3 From b5fe67f8c69113bd9354bc1adcfe2df6be323740 Mon Sep 17 00:00:00 2001 From: klosax <131523366+klosax@users.noreply.github.com> Date: Sat, 22 Jul 2023 14:21:24 +0200 Subject: Perplexity: Compute scores correlated to HellaSwag (#2312) * Add parameter --perplexity-lines to perplexity.cpp --- examples/common.cpp | 5 ++- examples/common.h | 1 + examples/perplexity/perplexity.cpp | 78 +++++++++++++++++++++++++++++++++++++- 3 files changed, 82 insertions(+), 2 deletions(-) (limited to 'examples/perplexity') diff --git a/examples/common.cpp b/examples/common.cpp index 0990195..730b28b 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -387,6 +387,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { params.antiprompt.push_back(argv[i]); } else if (arg == "--perplexity") { params.perplexity = true; + } else if (arg == "--perplexity-lines") { + params.perplexity_lines = true; } else if (arg == "--ignore-eos") { params.logit_bias[llama_token_eos()] = -INFINITY; } else if (arg == "--no-penalize-nl") { @@ -512,7 +514,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " not recommended: doubles context memory required and no measurable increase in quality\n"); fprintf(stderr, " --temp N temperature (default: %.1f)\n", (double)params.temp); fprintf(stderr, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch); - fprintf(stderr, " --perplexity compute perplexity over the prompt\n"); + fprintf(stderr, " --perplexity compute perplexity over each ctx window of the prompt\n"); + fprintf(stderr, " --perplexity-lines compute perplexity over each line of the prompt\n"); fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); fprintf(stderr, " --chunks N max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks); if (llama_mlock_supported()) { diff --git a/examples/common.h b/examples/common.h index 69170df..c936de6 100644 --- a/examples/common.h +++ b/examples/common.h @@ -82,6 +82,7 @@ struct gpt_params { bool instruct = false; // instruction mode (used for Alpaca models) bool penalize_nl = true; // consider newlines as a repeatable token bool perplexity = false; // compute perplexity over the prompt + bool perplexity_lines = false; // compute perplexity over each line of the prompt bool use_mmap = true; // use mmap for faster loads bool use_mlock = false; // use mlock to keep model in memory bool mem_test = false; // compute maximum memory usage diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index bfad999..d23b7e7 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -4,6 +4,7 @@ #include #include +#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data @@ -120,6 +121,77 @@ void perplexity(llama_context * ctx, const gpt_params & params) { printf("\n"); } +void perplexity_lines(llama_context * ctx, const gpt_params & params) { + // Calculates perplexity over each line of the prompt + + std::vector prompt_lines; + std::istringstream strstream(params.prompt); + std::string line; + + while (std::getline(strstream,line,'\n')) { + prompt_lines.push_back(line); + } + + const int n_vocab = llama_n_vocab(ctx); + + int counttotal = 0; + size_t n_lines = prompt_lines.size(); + + double nll = 0.0; + + fprintf(stderr, "%s: calculating perplexity over %lu lines\n", __func__, n_lines); + + printf("\nLine\tPPL line\tPPL cumulative\n"); + + for (size_t i = 0; i < n_lines; ++i) { + + // Tokenize and insert BOS at start + std::vector batch_embd = ::llama_tokenize(ctx, prompt_lines[i], true); + + size_t batch_size = batch_embd.size(); + + // Stop if line is too long + if( batch_size > (size_t)params.n_ctx ) { + fprintf(stderr, "%s : tokens in line %lu > n_ctxl\n", __func__, i); + return; + } + + if (llama_eval(ctx, batch_embd.data(), batch_size, 0, params.n_threads)) { + fprintf(stderr, "%s : failed to eval\n", __func__); + return; + } + + const auto batch_logits = llama_get_logits(ctx); + std::vector logits; + logits.insert(logits.end(), batch_logits, batch_logits + batch_size * n_vocab); + + double nllline = 0.0; + int countline = 0; + + // Perplexity over second half of the line + for (size_t j = batch_size/2; j < batch_size - 1; ++j) { + // Calculate probability of next token, given the previous ones. + const std::vector tok_logits( + logits.begin() + (j + 0) * n_vocab, + logits.begin() + (j + 1) * n_vocab); + + const float prob = softmax(tok_logits)[batch_embd[ j + 1]]; + + nllline += -std::log(prob); + ++countline; + } + + nll += nllline; + counttotal += countline; + + // perplexity is e^(average negative log-likelihood) + printf("%lu\t%.8lf\t%.8lf\n", i + 1, std::exp(nllline/countline), std::exp(nll / counttotal) ); + fflush(stdout); + } + + printf("\n"); +} + int main(int argc, char ** argv) { gpt_params params; @@ -168,7 +240,11 @@ int main(int argc, char ** argv) { params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info()); } - perplexity(ctx, params); + if (params.perplexity_lines) { + perplexity_lines(ctx, params); + } else { + perplexity(ctx, params); + } llama_print_timings(ctx); llama_free(ctx); -- cgit v1.2.3 From 8a88e5855c3b93024be0f93290b01a4206b65b38 Mon Sep 17 00:00:00 2001 From: klosax <131523366+klosax@users.noreply.github.com> Date: Fri, 28 Jul 2023 20:25:36 +0200 Subject: perplexity : add Hellaswag calculation (#2389) * common.h : add hellaswag / remove perplexity-lines * common.cpp : add hellaswag / remove perplexity-lines * perplexity.cpp : add hellswag scores / remove perplexity-lines * perplexity.cpp : clean up * common.h : change default param value * common.cpp : Change default param * perplexity.cpp : alter wording * common.h : alter wording * common.cpp : alter wording --- examples/common.cpp | 15 +++- examples/common.h | 6 +- examples/perplexity/perplexity.cpp | 179 +++++++++++++++++++++++++++++-------- 3 files changed, 155 insertions(+), 45 deletions(-) (limited to 'examples/perplexity') diff --git a/examples/common.cpp b/examples/common.cpp index dd964c8..fe7308b 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -402,8 +402,14 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { params.antiprompt.push_back(argv[i]); } else if (arg == "--perplexity") { params.perplexity = true; - } else if (arg == "--perplexity-lines") { - params.perplexity_lines = true; + } else if (arg == "--hellaswag") { + params.hellaswag = true; + } else if (arg == "--hellaswag-tasks") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.hellaswag_tasks = std::stoi(argv[i]); } else if (arg == "--ignore-eos") { params.logit_bias[llama_token_eos()] = -INFINITY; } else if (arg == "--no-penalize-nl") { @@ -559,8 +565,9 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stdout, " not recommended: doubles context memory required and no measurable increase in quality\n"); fprintf(stdout, " --temp N temperature (default: %.1f)\n", (double)params.temp); fprintf(stdout, " --perplexity compute perplexity over each ctx window of the prompt\n"); - fprintf(stdout, " --perplexity-lines compute perplexity over each line of the prompt\n"); - fprintf(stdout, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); + fprintf(stdout, " --hellaswag compute HellaSwag score over random tasks from datafile supplied with -f\n"); + fprintf(stdout, " --hellaswag-tasks N number of tasks to use when computing the HellaSwag score (default: %d)\n", params.hellaswag_tasks); + fprintf(stdout, " --keep N number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); fprintf(stdout, " --chunks N max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks); if (llama_mlock_supported()) { fprintf(stdout, " --mlock force system to keep model in RAM rather than swapping or compressing\n"); diff --git a/examples/common.h b/examples/common.h index 672dcf7..1184f32 100644 --- a/examples/common.h +++ b/examples/common.h @@ -70,7 +70,10 @@ struct gpt_params { std::string lora_adapter = ""; // lora adapter path std::string lora_base = ""; // base model path for the lora adapter - bool low_vram = false; // if true, reduce VRAM usage at the cost of performance + bool hellaswag = false; // compute HellaSwag score over random tasks from datafile supplied in prompt + size_t hellaswag_tasks = 400; // number of tasks to use when computing the HellaSwag score + + bool low_vram = false; // if true, reduce VRAM usage at the cost of performance bool memory_f16 = true; // use f16 instead of f32 for memory kv bool random_prompt = false; // do not randomize prompt if none provided bool use_color = false; // use color to distinguish generations and inputs @@ -86,7 +89,6 @@ struct gpt_params { bool instruct = false; // instruction mode (used for Alpaca models) bool penalize_nl = true; // consider newlines as a repeatable token bool perplexity = false; // compute perplexity over the prompt - bool perplexity_lines = false; // compute perplexity over each line of the prompt bool use_mmap = true; // use mmap for faster loads bool use_mlock = false; // use mlock to keep model in memory bool mem_test = false; // compute maximum memory usage diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index d23b7e7..6870a11 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -121,8 +121,23 @@ void perplexity(llama_context * ctx, const gpt_params & params) { printf("\n"); } -void perplexity_lines(llama_context * ctx, const gpt_params & params) { - // Calculates perplexity over each line of the prompt +void hellaswag_score(llama_context * ctx, const gpt_params & params) { + // Calculates hellaswag score (acc_norm) from prompt + // + // Data extracted from the HellaSwag validation dataset (MIT license) https://github.com/rowanz/hellaswag/blob/master/data/hellaswag_val.jsonl + // All used data fields are preprocessed as in https://github.com/EleutherAI/lm-evaluation-harness/blob/df3da98c5405deafd519c2ddca52bb7c3fe36bef/lm_eval/tasks/hellaswag.py#L62-L68 + // + // All 10042 tasks should be extracted to keep the results standardized like other implementations. + // + // Datafile layout: + // ['??'] denotes json fields + // 6 lines per task: + // ['activity_label'] + ": " +['ctx'] - The first part of the query, the context + // ['label'] - The index the best common sense ending aka gold ending + // ['endings'][0] - Endings added to the first part of the query + // ['endings'][1] + // ['endings'][2] + // ['endings'][3] std::vector prompt_lines; std::istringstream strstream(params.prompt); @@ -132,63 +147,149 @@ void perplexity_lines(llama_context * ctx, const gpt_params & params) { prompt_lines.push_back(line); } - const int n_vocab = llama_n_vocab(ctx); + if( prompt_lines.size() % 6 != 0) { + fprintf(stderr, "%s : number of lines in prompt not a multiple of 6.\n", __func__); + return; + } - int counttotal = 0; - size_t n_lines = prompt_lines.size(); + size_t hs_task_count = prompt_lines.size()/6; + fprintf(stderr, "%s : loaded %lu tasks from prompt.\n", __func__, hs_task_count); - double nll = 0.0; + // This is needed as usual for LLaMA models + bool prepend_bos = true; + + // Number of tasks to use when computing the score + if ( params.hellaswag_tasks < hs_task_count ) { + hs_task_count = params.hellaswag_tasks; + } - fprintf(stderr, "%s: calculating perplexity over %lu lines\n", __func__, n_lines); + // The tasks should be randomized so the score stabilizes quickly. + bool randomize_tasks = true; - printf("\nLine\tPPL line\tPPL cumulative\n"); + // The random seed should not impact the final result if the computation is done over enough tasks, so kept hardcoded for now + std::mt19937 rng(1); - for (size_t i = 0; i < n_lines; ++i) { + // Dataholder for hellaswag tasks + struct hs_data_t { + std::string context; + size_t gold_ending_idx; + std::string ending[4]; + size_t ending_logprob_count[4]; + double ending_logprob[4]; + }; - // Tokenize and insert BOS at start - std::vector batch_embd = ::llama_tokenize(ctx, prompt_lines[i], true); + fprintf(stderr, "%s : selecting %lu %s tasks.\n", __func__, hs_task_count, (randomize_tasks?"randomized":"the first") ); - size_t batch_size = batch_embd.size(); + // Select and read data from prompt lines + hs_data_t *hs_data = new hs_data_t[hs_task_count]; + for (size_t i=0; i < hs_task_count; i++) { + size_t idx = i; - // Stop if line is too long - if( batch_size > (size_t)params.n_ctx ) { - fprintf(stderr, "%s : tokens in line %lu > n_ctxl\n", __func__, i); - return; + // Select a random example of those left in the prompt + if (randomize_tasks) { + std::uniform_int_distribution dist(0, prompt_lines.size()/6-1 ) ; + idx = dist(rng); } - if (llama_eval(ctx, batch_embd.data(), batch_size, 0, params.n_threads)) { - fprintf(stderr, "%s : failed to eval\n", __func__); - return; + hs_data[i].context = prompt_lines[idx*6]; + hs_data[i].gold_ending_idx = std::stoi( prompt_lines[idx*6+1] ); + for (size_t j=0; j < 4; j++) { + hs_data[i].ending[j] = " " + prompt_lines[idx*6+2+j]; } - const auto batch_logits = llama_get_logits(ctx); - std::vector logits; - logits.insert(logits.end(), batch_logits, batch_logits + batch_size * n_vocab); + // Delete the selected random example from the prompt + if (randomize_tasks) { + prompt_lines.erase( std::next(prompt_lines.begin(),idx*6) , std::next(prompt_lines.begin(),idx*6+6) ); + } + } - double nllline = 0.0; - int countline = 0; + fprintf(stderr, "%s : calculating hellaswag score over selected tasks.\n", __func__); + printf("\ntask\tacc_norm\n"); - // Perplexity over second half of the line - for (size_t j = batch_size/2; j < batch_size - 1; ++j) { - // Calculate probability of next token, given the previous ones. - const std::vector tok_logits( - logits.begin() + (j + 0) * n_vocab, - logits.begin() + (j + 1) * n_vocab); + double acc = 0.0f; + const int n_vocab = llama_n_vocab(ctx); + + for (size_t task_idx = 0; task_idx < hs_task_count; task_idx++) { + + // Tokenize the context to count tokens + std::vector context_embd = ::llama_tokenize(ctx, hs_data[task_idx].context, prepend_bos); + size_t context_size = context_embd.size(); + + for (size_t ending_idx=0;ending_idx<4;ending_idx++) { + + // Tokenize the query + std::vector query_embd = ::llama_tokenize(ctx, hs_data[task_idx].context + hs_data[task_idx].ending[ending_idx], prepend_bos); + size_t query_size = query_embd.size(); + + // Stop if query wont fit the ctx window + if (query_size > (size_t)params.n_ctx) { + fprintf(stderr, "%s : number of tokens in query %lu > n_ctxl\n", __func__, query_size); + return; + } - const float prob = softmax(tok_logits)[batch_embd[ j + 1]]; + // Speedup small evaluations by evaluating atleast 32 tokens + if (query_size < 32) { + query_embd.resize(32); + } + + // Evaluate the query + if (llama_eval(ctx, query_embd.data(), query_embd.size(), 0, params.n_threads)) { + fprintf(stderr, "%s : failed to eval\n", __func__); + return; + } + + const auto query_logits = llama_get_logits(ctx); + std::vector logits; + logits.insert(logits.end(), query_logits, query_logits + query_size * n_vocab); + + hs_data[task_idx].ending_logprob_count[ending_idx] = 0; + hs_data[task_idx].ending_logprob[ending_idx] = 0.0f; + + // Calculate the logprobs over the ending + for (size_t j = context_size-1; j < query_size - 1; j++) { + // Calculate probability of next token, given the previous ones. + const std::vector tok_logits( + logits.begin() + (j + 0) * n_vocab, + logits.begin() + (j + 1) * n_vocab); + + const float prob = softmax(tok_logits)[query_embd[ j + 1]]; + + hs_data[task_idx].ending_logprob[ending_idx] += std::log(prob); + hs_data[task_idx].ending_logprob_count[ending_idx]++; + } + + // Calculate the mean token logprob for acc_norm + hs_data[task_idx].ending_logprob[ending_idx] /= hs_data[task_idx].ending_logprob_count[ending_idx]; + + +// printf("task %lu, ending %lu, whole_len %lu, context_len %lu, ending_logprob_count %lu, ending_logprob %.4f\n", +// task_idx,ending_idx,whole_size,context_size, hs_data[task_idx].ending_logprob_count[ending_idx], hs_data[task_idx].ending_logprob[ending_idx] ); + } - nllline += -std::log(prob); - ++countline; + // Find the ending with maximum logprob + size_t ending_logprob_max_idx = -1; + double ending_logprob_max_val = -INFINITY; + for (size_t j=0; j < 4; j++) { + if (hs_data[task_idx].ending_logprob[j] > ending_logprob_max_val) { + ending_logprob_max_idx = j; + ending_logprob_max_val = hs_data[task_idx].ending_logprob[j]; + } } - nll += nllline; - counttotal += countline; +// printf("max logprob ending idx %lu, gold ending idx %lu\n", ending_logprob_max_idx, hs_data[task_idx].gold_ending_idx); - // perplexity is e^(average negative log-likelihood) - printf("%lu\t%.8lf\t%.8lf\n", i + 1, std::exp(nllline/countline), std::exp(nll / counttotal) ); + // If the gold ending got the maximum logprobe add one accuracy point + if (ending_logprob_max_idx == hs_data[task_idx].gold_ending_idx) { + acc += 1.0; + } + + // Print the accumulated accuracy mean x 100 + printf("%li\t%.8lf\n",task_idx+1, acc/double(task_idx+1)*100.0); fflush(stdout); } + delete [] hs_data; + printf("\n"); } @@ -240,8 +341,8 @@ int main(int argc, char ** argv) { params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info()); } - if (params.perplexity_lines) { - perplexity_lines(ctx, params); + if (params.hellaswag) { + hellaswag_score(ctx, params); } else { perplexity(ctx, params); } -- cgit v1.2.3 From ff966e7ca6af127c9405523cdb07ef8fa01bf6d6 Mon Sep 17 00:00:00 2001 From: Borislav Stanimirov Date: Fri, 4 Aug 2023 13:07:21 +0300 Subject: build : fix several cast and printf warnings (#2499) --- examples/embd-input/embd-input-lib.cpp | 2 +- examples/grammar-parser.cpp | 2 +- examples/perplexity/perplexity.cpp | 8 ++++---- examples/simple/simple.cpp | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'examples/perplexity') diff --git a/examples/embd-input/embd-input-lib.cpp b/examples/embd-input/embd-input-lib.cpp index 2656382..2185b9b 100644 --- a/examples/embd-input/embd-input-lib.cpp +++ b/examples/embd-input/embd-input-lib.cpp @@ -30,7 +30,7 @@ struct MyModel* create_mymodel(int argc, char ** argv) { fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); if (params.seed == LLAMA_DEFAULT_SEED) { - params.seed = time(NULL); + params.seed = uint32_t(time(NULL)); } fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); diff --git a/examples/grammar-parser.cpp b/examples/grammar-parser.cpp index 019d5e1..e76bd11 100644 --- a/examples/grammar-parser.cpp +++ b/examples/grammar-parser.cpp @@ -405,7 +405,7 @@ namespace grammar_parser { for (size_t i = 0, end = state.rules.size(); i < end; i++) { // fprintf(file, "%zu: ", i); // print_rule_binary(file, state.rules[i]); - print_rule(file, i, state.rules[i], symbol_id_names); + print_rule(file, uint32_t(i), state.rules[i], symbol_id_names); // fprintf(file, "\n"); } } catch (const std::exception & err) { diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 6870a11..62433e9 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -153,7 +153,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) { } size_t hs_task_count = prompt_lines.size()/6; - fprintf(stderr, "%s : loaded %lu tasks from prompt.\n", __func__, hs_task_count); + fprintf(stderr, "%s : loaded %zu tasks from prompt.\n", __func__, hs_task_count); // This is needed as usual for LLaMA models bool prepend_bos = true; @@ -178,7 +178,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) { double ending_logprob[4]; }; - fprintf(stderr, "%s : selecting %lu %s tasks.\n", __func__, hs_task_count, (randomize_tasks?"randomized":"the first") ); + fprintf(stderr, "%s : selecting %zu %s tasks.\n", __func__, hs_task_count, (randomize_tasks?"randomized":"the first") ); // Select and read data from prompt lines hs_data_t *hs_data = new hs_data_t[hs_task_count]; @@ -223,7 +223,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) { // Stop if query wont fit the ctx window if (query_size > (size_t)params.n_ctx) { - fprintf(stderr, "%s : number of tokens in query %lu > n_ctxl\n", __func__, query_size); + fprintf(stderr, "%s : number of tokens in query %zu > n_ctxl\n", __func__, query_size); return; } @@ -284,7 +284,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) { } // Print the accumulated accuracy mean x 100 - printf("%li\t%.8lf\n",task_idx+1, acc/double(task_idx+1)*100.0); + printf("%zu\t%.8lf\n",task_idx+1, acc/double(task_idx+1)*100.0); fflush(stdout); } diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp index aa2c435..97137a6 100644 --- a/examples/simple/simple.cpp +++ b/examples/simple/simple.cpp @@ -123,7 +123,7 @@ int main(int argc, char ** argv) // Evaluate the tokens : //--------------------------------- - if ( llama_eval( ctx , tokens_list.data() , tokens_list.size() , llama_get_kv_cache_token_count( ctx ) , params.n_threads ) ) + if ( llama_eval( ctx , tokens_list.data() , int(tokens_list.size()) , llama_get_kv_cache_token_count( ctx ) , params.n_threads ) ) { fprintf( stderr, "%s : failed to eval\n" , __func__ ); return 1; -- cgit v1.2.3