diff --git a/examples/server/public/completion.js b/examples/server/public/completion.js
index 6cb1bf023..2841c0aba 100644
--- a/examples/server/public/completion.js
+++ b/examples/server/public/completion.js
@@ -7,20 +7,27 @@ const paramDefaults = {
 
 let generation_settings = null;
 
-/**
- * This function completes the input text using a llama dictionary.
- * @param {object} params - The parameters for the completion request.
- * @param {object} controller - an instance of AbortController if you need one, or null.
- * @param {function} callback - The callback function to call when the completion is done.
- * @returns {string} the completed text as a string. Ideally ignored, and you get at it via the callback.
- */
-export const llamaComplete = async (params, controller, callback) => {
-  if (!controller) {
-    controller = new AbortController();
-  }
-  const completionParams = { ...paramDefaults, ...params };
 
-  // we use fetch directly here becasue the built in fetchEventSource does not support POST
+// Completes the prompt as a generator. Recommended for most use cases.
+//
+// Example:
+//
+//    import { llama } from '/completion.js'
+//
+//    const request = llama("Tell me a joke", {n_predict: 800})
+//    for await (const chunk of request) {
+//      document.write(chunk.data.content)
+//    }
+//
+export async function* llama(prompt, params = {}, config = {}) {
+  let controller = config.controller;
+
+  if (!controller) {
+     controller = new AbortController();
+  }
+
+  const completionParams = { ...paramDefaults, ...params, prompt };
+
   const response = await fetch("/completion", {
     method: 'POST',
     body: JSON.stringify(completionParams),
@@ -38,7 +45,6 @@ export const llamaComplete = async (params, controller, callback) => {
   let content = "";
 
   try {
-
     let cont = true;
 
     while (cont) {
@@ -61,10 +67,8 @@ export const llamaComplete = async (params, controller, callback) => {
       result.data = JSON.parse(result.data);
       content += result.data.content;
 
-      // callack
-      if (callback) {
-        cont = callback(result) != false;
-      }
+      // yield
+      yield result;
 
       // if we got a stop token from server, we will break here
       if (result.data.stop) {
@@ -75,7 +79,9 @@ export const llamaComplete = async (params, controller, callback) => {
       }
     }
   } catch (e) {
-    console.error("llama error: ", e);
+    if (e.name !== 'AbortError') {
+      console.error("llama error: ", e);
+    }
     throw e;
   }
   finally {
@@ -85,10 +91,78 @@ export const llamaComplete = async (params, controller, callback) => {
   return content;
 }
 
+// Call llama, return an event target that you can subcribe to
+//
+// Example:
+//
+//    import { llamaEventTarget } from '/completion.js'
+//
+//    const conn = llamaEventTarget(prompt)
+//    conn.addEventListener("message", (chunk) => {
+//      document.write(chunk.detail.content)
+//    })
+//
+export const llamaEventTarget = (prompt, params = {}, config = {}) => {
+  const eventTarget = new EventTarget();
+  (async () => {
+    let content = "";
+    for await (const chunk of llama(prompt, params, config)) {
+      if (chunk.data) {
+        content += chunk.data.content;
+        eventTarget.dispatchEvent(new CustomEvent("message", {detail: chunk.data}));
+      }
+      if(chunk.data.generation_settings) {
+        eventTarget.dispatchEvent(new CustomEvent("generation_settings", {detail: chunk.data.generation_settings}));
+      }
+      if(chunk.data.timings) {
+        eventTarget.dispatchEvent(new CustomEvent("timings", {detail: chunk.data.timings}));
+      }
+    }
+    eventTarget.dispatchEvent(new CustomEvent("done", {detail: {content}}));
+  })();
+  return eventTarget;
+}
+
+// Call llama, return a promise that resolves to the completed text. This does not support streaming
+//
+// Example:
+//
+//     llamaPromise(prompt).then((content) => {
+//       document.write(content)
+//     })
+//
+//     or
+//
+//     const content = await llamaPromise(prompt)
+//     document.write(content)
+//
+export const llamaPromise = (prompt, params = {}, config = {}) => {
+  return new Promise(async (resolve, reject) => {
+    let content = "";
+    try {
+      for await (const chunk of llama(prompt, params, config)) {
+        content += chunk.data.content;
+      }
+      resolve(content);
+    } catch (error) {
+      reject(error);
+    }
+  });
+};
+
+/**
+ * (deprecated)
+ */
+export const llamaComplete = async (params, controller, callback) => {
+  for await (const chunk of llama(params.prompt, params, {controller})) {
+    callback(chunk);
+  }
+}
+
+// Get the model info from the server. This is useful for getting the context window and so on.
 export const llamaModelInfo = async () => {
   if (!generation_settings) {
     generation_settings = await fetch("/model.json").then(r => r.json());
   }
   return generation_settings;
 }
-
diff --git a/examples/server/public/index.html b/examples/server/public/index.html
index e65f468fb..8ace0b0af 100644
--- a/examples/server/public/index.html
+++ b/examples/server/public/index.html
@@ -110,7 +110,7 @@
       html, h, signal, effect, computed, render, useSignal, useEffect, useRef
     } from '/index.js';
 
-    import { llamaComplete } from '/completion.js';
+    import { llama } from '/completion.js';
 
     const session = signal({
       prompt: "This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.",
@@ -163,7 +163,7 @@
 
       transcriptUpdate([...session.value.transcript, ["{{user}}", msg]])
 
-      const payload = template(session.value.template, {
+      const prompt = template(session.value.template, {
         message: msg,
         history: session.value.transcript.flatMap(([name, message]) => template(session.value.historyTemplate, {name, message})).join("\n"),
       });
@@ -173,13 +173,13 @@
 
       const llamaParams = {
         ...params.value,
-        prompt: payload,
         stop: ["</s>", template("{{char}}:"), template("{{user}}:")],
       }
 
-      await llamaComplete(llamaParams, controller.value, (message) => {
-        const data = message.data;
+      for await (const chunk of llama(prompt, llamaParams, { controller: controller.value })) {
+        const data = chunk.data;
         currentMessage += data.content;
+
         // remove leading whitespace
         currentMessage = currentMessage.replace(/^\s+/, "")
 
@@ -192,7 +192,7 @@
         if (data.timings) {
           llamaStats.value = data.timings;
         }
-      })
+      }
 
       controller.value = null;
     }