aboutsummaryrefslogtreecommitdiff
path: root/examples/server/public
diff options
context:
space:
mode:
authorTobias Lütke <tobi@shopify.com>2023-07-05 16:51:13 -0400
committerGitHub <noreply@github.com>2023-07-05 16:51:13 -0400
commit31cfbb1013a482e89c72146e2063ac4362becae7 (patch)
treeb6479860979f90d185a132efe883ae7fb62b07f5 /examples/server/public
parent983b555e9ddb36703cee4d22642afe958de093b7 (diff)
Expose generation timings from server & update completions.js (#2116)
* use javascript generators as much cleaner API Also add ways to access completion as promise and EventSource * export llama_timings as struct and expose them in server * update readme, update baked includes * llama : uniform variable names + struct init --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'examples/server/public')
-rw-r--r--examples/server/public/completion.js119
-rw-r--r--examples/server/public/index.html135
2 files changed, 181 insertions, 73 deletions
diff --git a/examples/server/public/completion.js b/examples/server/public/completion.js
index 4f5005c..a43d5a7 100644
--- a/examples/server/public/completion.js
+++ b/examples/server/public/completion.js
@@ -5,20 +5,29 @@ const paramDefaults = {
stop: ["</s>"]
};
-/**
- * This function completes the input text using a llama dictionary.
- * @param {object} params - The parameters for the completion request.
- * @param {object} controller - an instance of AbortController if you need one, or null.
- * @param {function} callback - The callback function to call when the completion is done.
- * @returns {string} the completed text as a string. Ideally ignored, and you get at it via the callback.
- */
-export const llamaComplete = async (params, controller, callback) => {
+let generation_settings = null;
+
+
+// Completes the prompt as a generator. Recommended for most use cases.
+//
+// Example:
+//
+// import { llama } from '/completion.js'
+//
+// const request = llama("Tell me a joke", {n_predict: 800})
+// for await (const chunk of request) {
+// document.write(chunk.data.content)
+// }
+//
+export async function* llama(prompt, params = {}, config = {}) {
+ let controller = config.controller;
+
if (!controller) {
controller = new AbortController();
}
- const completionParams = { ...paramDefaults, ...params };
- // we use fetch directly here becasue the built in fetchEventSource does not support POST
+ const completionParams = { ...paramDefaults, ...params, prompt };
+
const response = await fetch("/completion", {
method: 'POST',
body: JSON.stringify(completionParams),
@@ -36,7 +45,6 @@ export const llamaComplete = async (params, controller, callback) => {
let content = "";
try {
-
let cont = true;
while (cont) {
@@ -59,18 +67,21 @@ export const llamaComplete = async (params, controller, callback) => {
result.data = JSON.parse(result.data);
content += result.data.content;
- // callack
- if (callback) {
- cont = callback(result) != false;
- }
+ // yield
+ yield result;
// if we got a stop token from server, we will break here
if (result.data.stop) {
+ if (result.data.generation_settings) {
+ generation_settings = result.data.generation_settings;
+ }
break;
}
}
} catch (e) {
- console.error("llama error: ", e);
+ if (e.name !== 'AbortError') {
+ console.error("llama error: ", e);
+ }
throw e;
}
finally {
@@ -79,3 +90,79 @@ export const llamaComplete = async (params, controller, callback) => {
return content;
}
+
+// Call llama, return an event target that you can subcribe to
+//
+// Example:
+//
+// import { llamaEventTarget } from '/completion.js'
+//
+// const conn = llamaEventTarget(prompt)
+// conn.addEventListener("message", (chunk) => {
+// document.write(chunk.detail.content)
+// })
+//
+export const llamaEventTarget = (prompt, params = {}, config = {}) => {
+ const eventTarget = new EventTarget();
+ (async () => {
+ let content = "";
+ for await (const chunk of llama(prompt, params, config)) {
+ if (chunk.data) {
+ content += chunk.data.content;
+ eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data }));
+ }
+ if (chunk.data.generation_settings) {
+ eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings }));
+ }
+ if (chunk.data.timings) {
+ eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings }));
+ }
+ }
+ eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } }));
+ })();
+ return eventTarget;
+}
+
+// Call llama, return a promise that resolves to the completed text. This does not support streaming
+//
+// Example:
+//
+// llamaPromise(prompt).then((content) => {
+// document.write(content)
+// })
+//
+// or
+//
+// const content = await llamaPromise(prompt)
+// document.write(content)
+//
+export const llamaPromise = (prompt, params = {}, config = {}) => {
+ return new Promise(async (resolve, reject) => {
+ let content = "";
+ try {
+ for await (const chunk of llama(prompt, params, config)) {
+ content += chunk.data.content;
+ }
+ resolve(content);
+ } catch (error) {
+ reject(error);
+ }
+ });
+};
+
+/**
+ * (deprecated)
+ */
+export const llamaComplete = async (params, controller, callback) => {
+ for await (const chunk of llama(params.prompt, params, { controller })) {
+ callback(chunk);
+ }
+}
+
+// Get the model info from the server. This is useful for getting the context window and so on.
+export const llamaModelInfo = async () => {
+ if (!generation_settings) {
+ generation_settings = await fetch("/model.json").then(r => r.json());
+ }
+ return generation_settings;
+}
diff --git a/examples/server/public/index.html b/examples/server/public/index.html
index 6393e2e..8ace0b0 100644
--- a/examples/server/public/index.html
+++ b/examples/server/public/index.html
@@ -6,7 +6,6 @@
<title>llama.cpp - chat</title>
<style>
-
body {
background-color: #fff;
color: #000;
@@ -22,10 +21,6 @@
height: 100%;
}
- header, footer {
- text-align: center;
- }
-
main {
margin: 3px;
display: flex;
@@ -99,6 +94,15 @@
margin: 0.5em 0;
display: block;
}
+
+ header, footer {
+ text-align: center;
+ }
+
+ footer {
+ font-size: 80%;
+ color: #888;
+ }
</style>
<script type="module">
@@ -106,10 +110,10 @@
html, h, signal, effect, computed, render, useSignal, useEffect, useRef
} from '/index.js';
- import { llamaComplete } from '/completion.js';
+ import { llama } from '/completion.js';
const session = signal({
- prompt: "This is a conversation between user and llama, a friendly chatbot. respond in markdown.",
+ prompt: "This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.",
template: "{{prompt}}\n\n{{history}}\n{{char}}:",
historyTemplate: "{{name}}: {{message}}",
transcript: [],
@@ -118,15 +122,6 @@
user: "User",
})
- const transcriptUpdate = (transcript) => {
- session.value = {
- ...session.value,
- transcript
- }
- }
-
- const chatStarted = computed(() => session.value.transcript.length > 0)
-
const params = signal({
n_predict: 400,
temperature: 0.7,
@@ -136,8 +131,18 @@
top_p: 0.5,
})
+ const llamaStats = signal(null)
const controller = signal(null)
+
const generating = computed(() => controller.value == null )
+ const chatStarted = computed(() => session.value.transcript.length > 0)
+
+ const transcriptUpdate = (transcript) => {
+ session.value = {
+ ...session.value,
+ transcript
+ }
+ }
// simple template replace
const template = (str, extraSettings) => {
@@ -158,7 +163,7 @@
transcriptUpdate([...session.value.transcript, ["{{user}}", msg]])
- const payload = template(session.value.template, {
+ const prompt = template(session.value.template, {
message: msg,
history: session.value.transcript.flatMap(([name, message]) => template(session.value.historyTemplate, {name, message})).join("\n"),
});
@@ -168,22 +173,26 @@
const llamaParams = {
...params.value,
- prompt: payload,
stop: ["</s>", template("{{char}}:"), template("{{user}}:")],
}
- await llamaComplete(llamaParams, controller.value, (message) => {
- const data = message.data;
+ for await (const chunk of llama(prompt, llamaParams, { controller: controller.value })) {
+ const data = chunk.data;
currentMessage += data.content;
+
// remove leading whitespace
currentMessage = currentMessage.replace(/^\s+/, "")
transcriptUpdate([...history, ["{{char}}", currentMessage]])
if (data.stop) {
- console.log("-->", data, ' response was:', currentMessage, 'transcript state:', session.value.transcript);
+ console.log("Completion finished: '", currentMessage, "', summary: ", data);
}
- })
+
+ if (data.timings) {
+ llamaStats.value = data.timings;
+ }
+ }
controller.value = null;
}
@@ -219,13 +228,12 @@
return html`
<form onsubmit=${submit}>
<div>
- <textarea type="text" rows=2 onkeypress=${enterSubmits} value="${message}" oninput=${(e) => message.value = e.target.value} placeholder="Say something..."/>
-
+ <textarea type="text" rows=2 onkeypress=${enterSubmits} value="${message}" oninput=${(e) => message.value = e.target.value} placeholder="Say something..."/>
</div>
<div class="right">
- <button type="submit" disabled=${!generating.value} >Send</button>
- <button onclick=${stop} disabled=${generating}>Stop</button>
- <button onclick=${reset}>Reset</button>
+ <button type="submit" disabled=${!generating.value} >Send</button>
+ <button onclick=${stop} disabled=${generating}>Stop</button>
+ <button onclick=${reset}>Reset</button>
</div>
</form>
`
@@ -243,7 +251,7 @@
}, [messages])
const chatLine = ([user, msg]) => {
- return html`<p key=${msg}><strong>${template(user)}:</strong> <${Markdown} text=${template(msg)} /></p>`
+ return html`<p key=${msg}><strong>${template(user)}:</strong> <${Markdownish} text=${template(msg)} /></p>`
};
return html`
@@ -313,39 +321,52 @@
</form>
`
}
-const Markdown = (params) => {
- const md = params.text
- .replace(/^#{1,6} (.*)$/gim, '<h3>$1</h3>')
- .replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
- .replace(/__(.*?)__/g, '<strong>$1</strong>')
- .replace(/\*(.*?)\*/g, '<em>$1</em>')
- .replace(/_(.*?)_/g, '<em>$1</em>')
- .replace(/```.*?\n([\s\S]*?)```/g, '<pre><code>$1</code></pre>')
- .replace(/`(.*?)`/g, '<code>$1</code>')
- .replace(/\n/gim, '<br />');
- return html`<span dangerouslySetInnerHTML=${{ __html: md }} />`;
-};
+ // poor mans markdown replacement
+ const Markdownish = (params) => {
+ const md = params.text
+ .replace(/^#{1,6} (.*)$/gim, '<h3>$1</h3>')
+ .replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
+ .replace(/__(.*?)__/g, '<strong>$1</strong>')
+ .replace(/\*(.*?)\*/g, '<em>$1</em>')
+ .replace(/_(.*?)_/g, '<em>$1</em>')
+ .replace(/```.*?\n([\s\S]*?)```/g, '<pre><code>$1</code></pre>')
+ .replace(/`(.*?)`/g, '<code>$1</code>')
+ .replace(/\n/gim, '<br />');
+ return html`<span dangerouslySetInnerHTML=${{ __html: md }} />`;
+ };
+
+ const ModelGenerationInfo = (params) => {
+ if (!llamaStats.value) {
+ return html`<span/>`
+ }
+ return html`
+ <span>
+ ${llamaStats.value.predicted_per_token_ms.toFixed()}ms per token, ${llamaStats.value.predicted_per_second.toFixed(2)} tokens per second
+ </span>
+ `
+ }
function App(props) {
return html`
- <div id="container">
- <header>
- <h1>llama.cpp</h1>
- </header>
-
- <main id="content">
- <${chatStarted.value ? ChatLog : ConfigForm} />
- </main>
-
- <footer id="write">
- <${MessageInput} />
- </footer>
-
- <footer>
- <p>Powered by <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> and <a href="https://ggml.ai">ggml.ai</a></p>
- </footer>
- </div>
+ <div id="container">
+ <header>
+ <h1>llama.cpp</h1>
+ </header>
+
+ <main id="content">
+ <${chatStarted.value ? ChatLog : ConfigForm} />
+ </main>
+
+ <section id="write">
+ <${MessageInput} />
+ </section>
+
+ <footer>
+ <p><${ModelGenerationInfo} /></p>
+ <p>Powered by <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> and <a href="https://ggml.ai">ggml.ai</a>.</p>
+ </footer>
+ </div>
`;
}