diff options
Diffstat (limited to 'examples/server/public')
-rw-r--r-- | examples/server/public/completion.js | 119 | ||||
-rw-r--r-- | examples/server/public/index.html | 135 |
2 files changed, 181 insertions, 73 deletions
diff --git a/examples/server/public/completion.js b/examples/server/public/completion.js index 4f5005c..a43d5a7 100644 --- a/examples/server/public/completion.js +++ b/examples/server/public/completion.js @@ -5,20 +5,29 @@ const paramDefaults = { stop: ["</s>"] }; -/** - * This function completes the input text using a llama dictionary. - * @param {object} params - The parameters for the completion request. - * @param {object} controller - an instance of AbortController if you need one, or null. - * @param {function} callback - The callback function to call when the completion is done. - * @returns {string} the completed text as a string. Ideally ignored, and you get at it via the callback. - */ -export const llamaComplete = async (params, controller, callback) => { +let generation_settings = null; + + +// Completes the prompt as a generator. Recommended for most use cases. +// +// Example: +// +// import { llama } from '/completion.js' +// +// const request = llama("Tell me a joke", {n_predict: 800}) +// for await (const chunk of request) { +// document.write(chunk.data.content) +// } +// +export async function* llama(prompt, params = {}, config = {}) { + let controller = config.controller; + if (!controller) { controller = new AbortController(); } - const completionParams = { ...paramDefaults, ...params }; - // we use fetch directly here becasue the built in fetchEventSource does not support POST + const completionParams = { ...paramDefaults, ...params, prompt }; + const response = await fetch("/completion", { method: 'POST', body: JSON.stringify(completionParams), @@ -36,7 +45,6 @@ export const llamaComplete = async (params, controller, callback) => { let content = ""; try { - let cont = true; while (cont) { @@ -59,18 +67,21 @@ export const llamaComplete = async (params, controller, callback) => { result.data = JSON.parse(result.data); content += result.data.content; - // callack - if (callback) { - cont = callback(result) != false; - } + // yield + yield result; // if we got a stop token from server, we will break here if (result.data.stop) { + if (result.data.generation_settings) { + generation_settings = result.data.generation_settings; + } break; } } } catch (e) { - console.error("llama error: ", e); + if (e.name !== 'AbortError') { + console.error("llama error: ", e); + } throw e; } finally { @@ -79,3 +90,79 @@ export const llamaComplete = async (params, controller, callback) => { return content; } + +// Call llama, return an event target that you can subcribe to +// +// Example: +// +// import { llamaEventTarget } from '/completion.js' +// +// const conn = llamaEventTarget(prompt) +// conn.addEventListener("message", (chunk) => { +// document.write(chunk.detail.content) +// }) +// +export const llamaEventTarget = (prompt, params = {}, config = {}) => { + const eventTarget = new EventTarget(); + (async () => { + let content = ""; + for await (const chunk of llama(prompt, params, config)) { + if (chunk.data) { + content += chunk.data.content; + eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data })); + } + if (chunk.data.generation_settings) { + eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings })); + } + if (chunk.data.timings) { + eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings })); + } + } + eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } })); + })(); + return eventTarget; +} + +// Call llama, return a promise that resolves to the completed text. This does not support streaming +// +// Example: +// +// llamaPromise(prompt).then((content) => { +// document.write(content) +// }) +// +// or +// +// const content = await llamaPromise(prompt) +// document.write(content) +// +export const llamaPromise = (prompt, params = {}, config = {}) => { + return new Promise(async (resolve, reject) => { + let content = ""; + try { + for await (const chunk of llama(prompt, params, config)) { + content += chunk.data.content; + } + resolve(content); + } catch (error) { + reject(error); + } + }); +}; + +/** + * (deprecated) + */ +export const llamaComplete = async (params, controller, callback) => { + for await (const chunk of llama(params.prompt, params, { controller })) { + callback(chunk); + } +} + +// Get the model info from the server. This is useful for getting the context window and so on. +export const llamaModelInfo = async () => { + if (!generation_settings) { + generation_settings = await fetch("/model.json").then(r => r.json()); + } + return generation_settings; +} diff --git a/examples/server/public/index.html b/examples/server/public/index.html index 6393e2e..8ace0b0 100644 --- a/examples/server/public/index.html +++ b/examples/server/public/index.html @@ -6,7 +6,6 @@ <title>llama.cpp - chat</title> <style> - body { background-color: #fff; color: #000; @@ -22,10 +21,6 @@ height: 100%; } - header, footer { - text-align: center; - } - main { margin: 3px; display: flex; @@ -99,6 +94,15 @@ margin: 0.5em 0; display: block; } + + header, footer { + text-align: center; + } + + footer { + font-size: 80%; + color: #888; + } </style> <script type="module"> @@ -106,10 +110,10 @@ html, h, signal, effect, computed, render, useSignal, useEffect, useRef } from '/index.js'; - import { llamaComplete } from '/completion.js'; + import { llama } from '/completion.js'; const session = signal({ - prompt: "This is a conversation between user and llama, a friendly chatbot. respond in markdown.", + prompt: "This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.", template: "{{prompt}}\n\n{{history}}\n{{char}}:", historyTemplate: "{{name}}: {{message}}", transcript: [], @@ -118,15 +122,6 @@ user: "User", }) - const transcriptUpdate = (transcript) => { - session.value = { - ...session.value, - transcript - } - } - - const chatStarted = computed(() => session.value.transcript.length > 0) - const params = signal({ n_predict: 400, temperature: 0.7, @@ -136,8 +131,18 @@ top_p: 0.5, }) + const llamaStats = signal(null) const controller = signal(null) + const generating = computed(() => controller.value == null ) + const chatStarted = computed(() => session.value.transcript.length > 0) + + const transcriptUpdate = (transcript) => { + session.value = { + ...session.value, + transcript + } + } // simple template replace const template = (str, extraSettings) => { @@ -158,7 +163,7 @@ transcriptUpdate([...session.value.transcript, ["{{user}}", msg]]) - const payload = template(session.value.template, { + const prompt = template(session.value.template, { message: msg, history: session.value.transcript.flatMap(([name, message]) => template(session.value.historyTemplate, {name, message})).join("\n"), }); @@ -168,22 +173,26 @@ const llamaParams = { ...params.value, - prompt: payload, stop: ["</s>", template("{{char}}:"), template("{{user}}:")], } - await llamaComplete(llamaParams, controller.value, (message) => { - const data = message.data; + for await (const chunk of llama(prompt, llamaParams, { controller: controller.value })) { + const data = chunk.data; currentMessage += data.content; + // remove leading whitespace currentMessage = currentMessage.replace(/^\s+/, "") transcriptUpdate([...history, ["{{char}}", currentMessage]]) if (data.stop) { - console.log("-->", data, ' response was:', currentMessage, 'transcript state:', session.value.transcript); + console.log("Completion finished: '", currentMessage, "', summary: ", data); } - }) + + if (data.timings) { + llamaStats.value = data.timings; + } + } controller.value = null; } @@ -219,13 +228,12 @@ return html` <form onsubmit=${submit}> <div> - <textarea type="text" rows=2 onkeypress=${enterSubmits} value="${message}" oninput=${(e) => message.value = e.target.value} placeholder="Say something..."/> - + <textarea type="text" rows=2 onkeypress=${enterSubmits} value="${message}" oninput=${(e) => message.value = e.target.value} placeholder="Say something..."/> </div> <div class="right"> - <button type="submit" disabled=${!generating.value} >Send</button> - <button onclick=${stop} disabled=${generating}>Stop</button> - <button onclick=${reset}>Reset</button> + <button type="submit" disabled=${!generating.value} >Send</button> + <button onclick=${stop} disabled=${generating}>Stop</button> + <button onclick=${reset}>Reset</button> </div> </form> ` @@ -243,7 +251,7 @@ }, [messages]) const chatLine = ([user, msg]) => { - return html`<p key=${msg}><strong>${template(user)}:</strong> <${Markdown} text=${template(msg)} /></p>` + return html`<p key=${msg}><strong>${template(user)}:</strong> <${Markdownish} text=${template(msg)} /></p>` }; return html` @@ -313,39 +321,52 @@ </form> ` } -const Markdown = (params) => { - const md = params.text - .replace(/^#{1,6} (.*)$/gim, '<h3>$1</h3>') - .replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>') - .replace(/__(.*?)__/g, '<strong>$1</strong>') - .replace(/\*(.*?)\*/g, '<em>$1</em>') - .replace(/_(.*?)_/g, '<em>$1</em>') - .replace(/```.*?\n([\s\S]*?)```/g, '<pre><code>$1</code></pre>') - .replace(/`(.*?)`/g, '<code>$1</code>') - .replace(/\n/gim, '<br />'); - return html`<span dangerouslySetInnerHTML=${{ __html: md }} />`; -}; + // poor mans markdown replacement + const Markdownish = (params) => { + const md = params.text + .replace(/^#{1,6} (.*)$/gim, '<h3>$1</h3>') + .replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>') + .replace(/__(.*?)__/g, '<strong>$1</strong>') + .replace(/\*(.*?)\*/g, '<em>$1</em>') + .replace(/_(.*?)_/g, '<em>$1</em>') + .replace(/```.*?\n([\s\S]*?)```/g, '<pre><code>$1</code></pre>') + .replace(/`(.*?)`/g, '<code>$1</code>') + .replace(/\n/gim, '<br />'); + return html`<span dangerouslySetInnerHTML=${{ __html: md }} />`; + }; + + const ModelGenerationInfo = (params) => { + if (!llamaStats.value) { + return html`<span/>` + } + return html` + <span> + ${llamaStats.value.predicted_per_token_ms.toFixed()}ms per token, ${llamaStats.value.predicted_per_second.toFixed(2)} tokens per second + </span> + ` + } function App(props) { return html` - <div id="container"> - <header> - <h1>llama.cpp</h1> - </header> - - <main id="content"> - <${chatStarted.value ? ChatLog : ConfigForm} /> - </main> - - <footer id="write"> - <${MessageInput} /> - </footer> - - <footer> - <p>Powered by <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> and <a href="https://ggml.ai">ggml.ai</a></p> - </footer> - </div> + <div id="container"> + <header> + <h1>llama.cpp</h1> + </header> + + <main id="content"> + <${chatStarted.value ? ChatLog : ConfigForm} /> + </main> + + <section id="write"> + <${MessageInput} /> + </section> + + <footer> + <p><${ModelGenerationInfo} /></p> + <p>Powered by <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> and <a href="https://ggml.ai">ggml.ai</a>.</p> + </footer> + </div> `; } |