diff options
author | Tobias Lütke <tobi@shopify.com> | 2023-07-05 16:51:13 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-05 16:51:13 -0400 |
commit | 31cfbb1013a482e89c72146e2063ac4362becae7 (patch) | |
tree | b6479860979f90d185a132efe883ae7fb62b07f5 /examples/server/public/index.html | |
parent | 983b555e9ddb36703cee4d22642afe958de093b7 (diff) |
Expose generation timings from server & update completions.js (#2116)
* use javascript generators as much cleaner API
Also add ways to access completion as promise and EventSource
* export llama_timings as struct and expose them in server
* update readme, update baked includes
* llama : uniform variable names + struct init
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'examples/server/public/index.html')
-rw-r--r-- | examples/server/public/index.html | 135 |
1 files changed, 78 insertions, 57 deletions
diff --git a/examples/server/public/index.html b/examples/server/public/index.html index 6393e2e..8ace0b0 100644 --- a/examples/server/public/index.html +++ b/examples/server/public/index.html @@ -6,7 +6,6 @@ <title>llama.cpp - chat</title> <style> - body { background-color: #fff; color: #000; @@ -22,10 +21,6 @@ height: 100%; } - header, footer { - text-align: center; - } - main { margin: 3px; display: flex; @@ -99,6 +94,15 @@ margin: 0.5em 0; display: block; } + + header, footer { + text-align: center; + } + + footer { + font-size: 80%; + color: #888; + } </style> <script type="module"> @@ -106,10 +110,10 @@ html, h, signal, effect, computed, render, useSignal, useEffect, useRef } from '/index.js'; - import { llamaComplete } from '/completion.js'; + import { llama } from '/completion.js'; const session = signal({ - prompt: "This is a conversation between user and llama, a friendly chatbot. respond in markdown.", + prompt: "This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.", template: "{{prompt}}\n\n{{history}}\n{{char}}:", historyTemplate: "{{name}}: {{message}}", transcript: [], @@ -118,15 +122,6 @@ user: "User", }) - const transcriptUpdate = (transcript) => { - session.value = { - ...session.value, - transcript - } - } - - const chatStarted = computed(() => session.value.transcript.length > 0) - const params = signal({ n_predict: 400, temperature: 0.7, @@ -136,8 +131,18 @@ top_p: 0.5, }) + const llamaStats = signal(null) const controller = signal(null) + const generating = computed(() => controller.value == null ) + const chatStarted = computed(() => session.value.transcript.length > 0) + + const transcriptUpdate = (transcript) => { + session.value = { + ...session.value, + transcript + } + } // simple template replace const template = (str, extraSettings) => { @@ -158,7 +163,7 @@ transcriptUpdate([...session.value.transcript, ["{{user}}", msg]]) - const payload = template(session.value.template, { + const prompt = template(session.value.template, { message: msg, history: session.value.transcript.flatMap(([name, message]) => template(session.value.historyTemplate, {name, message})).join("\n"), }); @@ -168,22 +173,26 @@ const llamaParams = { ...params.value, - prompt: payload, stop: ["</s>", template("{{char}}:"), template("{{user}}:")], } - await llamaComplete(llamaParams, controller.value, (message) => { - const data = message.data; + for await (const chunk of llama(prompt, llamaParams, { controller: controller.value })) { + const data = chunk.data; currentMessage += data.content; + // remove leading whitespace currentMessage = currentMessage.replace(/^\s+/, "") transcriptUpdate([...history, ["{{char}}", currentMessage]]) if (data.stop) { - console.log("-->", data, ' response was:', currentMessage, 'transcript state:', session.value.transcript); + console.log("Completion finished: '", currentMessage, "', summary: ", data); } - }) + + if (data.timings) { + llamaStats.value = data.timings; + } + } controller.value = null; } @@ -219,13 +228,12 @@ return html` <form onsubmit=${submit}> <div> - <textarea type="text" rows=2 onkeypress=${enterSubmits} value="${message}" oninput=${(e) => message.value = e.target.value} placeholder="Say something..."/> - + <textarea type="text" rows=2 onkeypress=${enterSubmits} value="${message}" oninput=${(e) => message.value = e.target.value} placeholder="Say something..."/> </div> <div class="right"> - <button type="submit" disabled=${!generating.value} >Send</button> - <button onclick=${stop} disabled=${generating}>Stop</button> - <button onclick=${reset}>Reset</button> + <button type="submit" disabled=${!generating.value} >Send</button> + <button onclick=${stop} disabled=${generating}>Stop</button> + <button onclick=${reset}>Reset</button> </div> </form> ` @@ -243,7 +251,7 @@ }, [messages]) const chatLine = ([user, msg]) => { - return html`<p key=${msg}><strong>${template(user)}:</strong> <${Markdown} text=${template(msg)} /></p>` + return html`<p key=${msg}><strong>${template(user)}:</strong> <${Markdownish} text=${template(msg)} /></p>` }; return html` @@ -313,39 +321,52 @@ </form> ` } -const Markdown = (params) => { - const md = params.text - .replace(/^#{1,6} (.*)$/gim, '<h3>$1</h3>') - .replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>') - .replace(/__(.*?)__/g, '<strong>$1</strong>') - .replace(/\*(.*?)\*/g, '<em>$1</em>') - .replace(/_(.*?)_/g, '<em>$1</em>') - .replace(/```.*?\n([\s\S]*?)```/g, '<pre><code>$1</code></pre>') - .replace(/`(.*?)`/g, '<code>$1</code>') - .replace(/\n/gim, '<br />'); - return html`<span dangerouslySetInnerHTML=${{ __html: md }} />`; -}; + // poor mans markdown replacement + const Markdownish = (params) => { + const md = params.text + .replace(/^#{1,6} (.*)$/gim, '<h3>$1</h3>') + .replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>') + .replace(/__(.*?)__/g, '<strong>$1</strong>') + .replace(/\*(.*?)\*/g, '<em>$1</em>') + .replace(/_(.*?)_/g, '<em>$1</em>') + .replace(/```.*?\n([\s\S]*?)```/g, '<pre><code>$1</code></pre>') + .replace(/`(.*?)`/g, '<code>$1</code>') + .replace(/\n/gim, '<br />'); + return html`<span dangerouslySetInnerHTML=${{ __html: md }} />`; + }; + + const ModelGenerationInfo = (params) => { + if (!llamaStats.value) { + return html`<span/>` + } + return html` + <span> + ${llamaStats.value.predicted_per_token_ms.toFixed()}ms per token, ${llamaStats.value.predicted_per_second.toFixed(2)} tokens per second + </span> + ` + } function App(props) { return html` - <div id="container"> - <header> - <h1>llama.cpp</h1> - </header> - - <main id="content"> - <${chatStarted.value ? ChatLog : ConfigForm} /> - </main> - - <footer id="write"> - <${MessageInput} /> - </footer> - - <footer> - <p>Powered by <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> and <a href="https://ggml.ai">ggml.ai</a></p> - </footer> - </div> + <div id="container"> + <header> + <h1>llama.cpp</h1> + </header> + + <main id="content"> + <${chatStarted.value ? ChatLog : ConfigForm} /> + </main> + + <section id="write"> + <${MessageInput} /> + </section> + + <footer> + <p><${ModelGenerationInfo} /></p> + <p>Powered by <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> and <a href="https://ggml.ai">ggml.ai</a>.</p> + </footer> + </div> `; } |