Spaces:

jbilcke-hf
/

ai-comic-factory

Running on CPU Upgrade

App Files Files Community

1062

jbilcke-hf HF staff commited on Nov 20, 2023

Commit

faf4ba4

1 Parent(s): 9bcdb59

added suggestion from user + work on the progressive loader

Browse files

Files changed (17) hide show

.env +3 -0
src/app/engine/render.ts +4 -0
src/app/interface/settings-dialog/defaultSettings.ts +1 -0
src/app/interface/settings-dialog/getSettings.ts +1 -0
src/app/interface/settings-dialog/index.tsx +15 -0
src/app/interface/settings-dialog/localStorageKeys.ts +1 -0
src/app/main.tsx +65 -53
src/app/queries/getStory.ts +13 -13
src/app/queries/getStoryContinuation.ts +53 -81
src/app/queries/mockLLMResponse.ts +2 -2
src/app/queries/predictNextPanels.ts +95 -0
src/lib/cleanJson.ts +1 -1
src/lib/{dirtyCaptionCleaner.ts → dirtyGeneratedPanelCleaner.ts} +6 -8
src/lib/{dirtyLLMJsonParser.ts → dirtyGeneratedPanelsParser.ts} +3 -3
src/lib/dirtyLLMResponseCleaner.ts +1 -1
src/lib/parseBadJSON.ts +4 -4
src/types.ts +8 -1

.env CHANGED Viewed

@@ -53,6 +53,9 @@ RENDERING_HF_INFERENCE_API_BASE_MODEL="stabilityai/stable-diffusion-xl-base-1.0"
 # If you decided to use a Hugging Face Inference API model for the RENDERING engine
 RENDERING_HF_INFERENCE_API_REFINER_MODEL="stabilityai/stable-diffusion-xl-refiner-1.0"
 # An experimental RENDERING engine (sorry it is not very documented yet, so you can use one of the other engines)
 RENDERING_VIDEOCHAIN_API_URL="http://localhost:7860"

 # If you decided to use a Hugging Face Inference API model for the RENDERING engine
 RENDERING_HF_INFERENCE_API_REFINER_MODEL="stabilityai/stable-diffusion-xl-refiner-1.0"
+# If your model returns a different file type (eg. jpg or webp) change it here
+RENDERING_HF_INFERENCE_API_FILE_TYPE="image/png"
 # An experimental RENDERING engine (sorry it is not very documented yet, so you can use one of the other engines)
 RENDERING_VIDEOCHAIN_API_URL="http://localhost:7860"

src/app/engine/render.ts CHANGED Viewed

@@ -15,6 +15,7 @@ const serverHuggingfaceApiUrl = `${process.env.RENDERING_HF_INFERENCE_ENDPOINT_U
 const serverHuggingfaceInferenceApiModel = `${process.env.RENDERING_HF_INFERENCE_API_BASE_MODEL || ""}`
 const serverHuggingfaceInferenceApiModelRefinerModel = `${process.env.RENDERING_HF_INFERENCE_API_REFINER_MODEL || ""}`
 const serverHuggingfaceInferenceApiModelTrigger = `${process.env.RENDERING_HF_INFERENCE_API_MODEL_TRIGGER || ""}`
 const serverReplicateApiKey = `${process.env.AUTH_REPLICATE_API_TOKEN || ""}`
 const serverReplicateApiModel = `${process.env.RENDERING_REPLICATE_API_MODEL || ""}`
@@ -77,6 +78,7 @@ export async function newRender({
   let huggingfaceApiUrl = serverHuggingfaceApiUrl
   let huggingfaceInferenceApiModelRefinerModel = serverHuggingfaceInferenceApiModelRefinerModel
   let huggingfaceInferenceApiModelTrigger = serverHuggingfaceInferenceApiModelTrigger
   const placeholder = "<USE YOUR OWN TOKEN>"
@@ -116,6 +118,7 @@ export async function newRender({
     huggingfaceApiKey = settings.huggingfaceApiKey
     huggingfaceInferenceApiModel = settings.huggingfaceInferenceApiModel
     huggingfaceInferenceApiModelTrigger = settings.huggingfaceInferenceApiModelTrigger
   }
   try {
@@ -258,6 +261,7 @@ export async function newRender({
         method: "POST",
         headers: {
           "Content-Type": "application/json",
           Authorization: `Bearer ${huggingfaceApiKey}`,
         },
         body: JSON.stringify({

 const serverHuggingfaceInferenceApiModel = `${process.env.RENDERING_HF_INFERENCE_API_BASE_MODEL || ""}`
 const serverHuggingfaceInferenceApiModelRefinerModel = `${process.env.RENDERING_HF_INFERENCE_API_REFINER_MODEL || ""}`
 const serverHuggingfaceInferenceApiModelTrigger = `${process.env.RENDERING_HF_INFERENCE_API_MODEL_TRIGGER || ""}`
+const serverHuggingfaceInferenceApiFileType = `${process.env.RENDERING_HF_INFERENCE_API_FILE_TYPE || ""}`
 const serverReplicateApiKey = `${process.env.AUTH_REPLICATE_API_TOKEN || ""}`
 const serverReplicateApiModel = `${process.env.RENDERING_REPLICATE_API_MODEL || ""}`
   let huggingfaceApiUrl = serverHuggingfaceApiUrl
   let huggingfaceInferenceApiModelRefinerModel = serverHuggingfaceInferenceApiModelRefinerModel
   let huggingfaceInferenceApiModelTrigger = serverHuggingfaceInferenceApiModelTrigger
+  let huggingfaceInferenceApiFileType = serverHuggingfaceInferenceApiFileType
   const placeholder = "<USE YOUR OWN TOKEN>"
     huggingfaceApiKey = settings.huggingfaceApiKey
     huggingfaceInferenceApiModel = settings.huggingfaceInferenceApiModel
     huggingfaceInferenceApiModelTrigger = settings.huggingfaceInferenceApiModelTrigger
+    huggingfaceInferenceApiFileType = settings.huggingfaceInferenceApiFileType
   }
   try {
         method: "POST",
         headers: {
           "Content-Type": "application/json",
+          Accept: huggingfaceInferenceApiFileType,
           Authorization: `Bearer ${huggingfaceApiKey}`,
         },
         body: JSON.stringify({

src/app/interface/settings-dialog/defaultSettings.ts CHANGED Viewed

@@ -6,6 +6,7 @@ export const defaultSettings: Settings = {
   huggingfaceApiKey: "",
   huggingfaceInferenceApiModel: "stabilityai/stable-diffusion-xl-base-1.0",
   huggingfaceInferenceApiModelTrigger: "",
   replicateApiKey: "",
   replicateApiModel: "stabilityai/sdxl",
   replicateApiModelVersion: "da77bc59ee60423279fd632efb4795ab731d9e3ca9705ef3341091fb989b7eaf",

   huggingfaceApiKey: "",
   huggingfaceInferenceApiModel: "stabilityai/stable-diffusion-xl-base-1.0",
   huggingfaceInferenceApiModelTrigger: "",
+  huggingfaceInferenceApiFileType: "image/png",
   replicateApiKey: "",
   replicateApiModel: "stabilityai/sdxl",
   replicateApiModelVersion: "da77bc59ee60423279fd632efb4795ab731d9e3ca9705ef3341091fb989b7eaf",

src/app/interface/settings-dialog/getSettings.ts CHANGED Viewed

@@ -13,6 +13,7 @@ export function getSettings(): Settings {
       huggingfaceApiKey: getValidString(localStorage?.getItem?.(localStorageKeys.huggingfaceApiKey), defaultSettings.huggingfaceApiKey),
       huggingfaceInferenceApiModel: getValidString(localStorage?.getItem?.(localStorageKeys.huggingfaceInferenceApiModel), defaultSettings.huggingfaceInferenceApiModel),
       huggingfaceInferenceApiModelTrigger: getValidString(localStorage?.getItem?.(localStorageKeys.huggingfaceInferenceApiModelTrigger), defaultSettings.huggingfaceInferenceApiModelTrigger),
       replicateApiKey: getValidString(localStorage?.getItem?.(localStorageKeys.replicateApiKey), defaultSettings.replicateApiKey),
       replicateApiModel: getValidString(localStorage?.getItem?.(localStorageKeys.replicateApiModel), defaultSettings.replicateApiModel),
       replicateApiModelVersion: getValidString(localStorage?.getItem?.(localStorageKeys.replicateApiModelVersion), defaultSettings.replicateApiModelVersion),

       huggingfaceApiKey: getValidString(localStorage?.getItem?.(localStorageKeys.huggingfaceApiKey), defaultSettings.huggingfaceApiKey),
       huggingfaceInferenceApiModel: getValidString(localStorage?.getItem?.(localStorageKeys.huggingfaceInferenceApiModel), defaultSettings.huggingfaceInferenceApiModel),
       huggingfaceInferenceApiModelTrigger: getValidString(localStorage?.getItem?.(localStorageKeys.huggingfaceInferenceApiModelTrigger), defaultSettings.huggingfaceInferenceApiModelTrigger),
+      huggingfaceInferenceApiFileType:  getValidString(localStorage?.getItem?.(localStorageKeys.huggingfaceInferenceApiFileType), defaultSettings.huggingfaceInferenceApiFileType),
       replicateApiKey: getValidString(localStorage?.getItem?.(localStorageKeys.replicateApiKey), defaultSettings.replicateApiKey),
       replicateApiModel: getValidString(localStorage?.getItem?.(localStorageKeys.replicateApiModel), defaultSettings.replicateApiModel),
       replicateApiModelVersion: getValidString(localStorage?.getItem?.(localStorageKeys.replicateApiModelVersion), defaultSettings.replicateApiModelVersion),

src/app/interface/settings-dialog/index.tsx CHANGED Viewed

@@ -43,6 +43,10 @@ export function SettingsDialog() {
     localStorageKeys.huggingfaceInferenceApiModelTrigger,
     defaultSettings.huggingfaceInferenceApiModelTrigger
   )
   const [replicateApiKey, setReplicateApiKey] = useLocalStorage<string>(
     localStorageKeys.replicateApiKey,
     defaultSettings.replicateApiKey
@@ -148,6 +152,17 @@ export function SettingsDialog() {
                 value={huggingfaceInferenceApiModel}
               />
             </Field>
             <p className="text-sm text-zinc-700">
               Using a LoRA? Don&apos;t forget the trigger keyword! Also you will want to use the &quot;Neutral&quot; style.
             </p>

     localStorageKeys.huggingfaceInferenceApiModelTrigger,
     defaultSettings.huggingfaceInferenceApiModelTrigger
   )
+  const [huggingfaceInferenceApiFileType, setHuggingfaceInferenceApiFileType] = useLocalStorage<string>(
+    localStorageKeys.huggingfaceInferenceApiFileType,
+    defaultSettings.huggingfaceInferenceApiFileType
+  )
   const [replicateApiKey, setReplicateApiKey] = useLocalStorage<string>(
     localStorageKeys.replicateApiKey,
     defaultSettings.replicateApiKey
                 value={huggingfaceInferenceApiModel}
               />
             </Field>
+            <Field>
+              <Label>The file type supported by the model (jpg, webp..):</Label>
+              <Input
+                className="font-mono"
+                placeholder="Inference API file type"
+                onChange={(x) => {
+                  setHuggingfaceInferenceApiFileType(x.target.value)
+                }}
+                value={huggingfaceInferenceApiFileType}
+              />
+            </Field>
             <p className="text-sm text-zinc-700">
               Using a LoRA? Don&apos;t forget the trigger keyword! Also you will want to use the &quot;Neutral&quot; style.
             </p>

src/app/interface/settings-dialog/localStorageKeys.ts CHANGED Viewed

@@ -6,6 +6,7 @@ export const localStorageKeys: Record<keyof Settings, string> = {
   huggingfaceApiKey: "CONF_AUTH_HF_API_TOKEN",
   huggingfaceInferenceApiModel: "CONF_RENDERING_HF_INFERENCE_API_BASE_MODEL",
   huggingfaceInferenceApiModelTrigger: "CONF_RENDERING_HF_INFERENCE_API_BASE_MODEL_TRIGGER",
   replicateApiKey: "CONF_AUTH_REPLICATE_API_TOKEN",
   replicateApiModel: "CONF_RENDERING_REPLICATE_API_MODEL",
   replicateApiModelVersion: "CONF_RENDERING_REPLICATE_API_MODEL_VERSION",

   huggingfaceApiKey: "CONF_AUTH_HF_API_TOKEN",
   huggingfaceInferenceApiModel: "CONF_RENDERING_HF_INFERENCE_API_BASE_MODEL",
   huggingfaceInferenceApiModelTrigger: "CONF_RENDERING_HF_INFERENCE_API_BASE_MODEL_TRIGGER",
+  huggingfaceInferenceApiFileType: "CONF_RENDERING_HF_INFERENCE_API_FILE_TYPE",
   replicateApiKey: "CONF_AUTH_REPLICATE_API_TOKEN",
   replicateApiModel: "CONF_RENDERING_REPLICATE_API_MODEL",
   replicateApiModelVersion: "CONF_RENDERING_REPLICATE_API_MODEL_VERSION",

src/app/main.tsx CHANGED Viewed

@@ -10,8 +10,9 @@ import { Zoom } from "./interface/zoom"
 import { getStory } from "./queries/getStory"
 import { BottomBar } from "./interface/bottom-bar"
 import { Page } from "./interface/page"
-import { LLMResponse } from "@/types"
 import { joinWords } from "@/lib/joinWords"
 export default function Main() {
   const [_isPending, startTransition] = useTransition()
@@ -44,82 +45,93 @@ export default function Main() {
       // I don't think we are going to need a rate limiter on the LLM part anymore
       const enableRateLimiter = false // `${process.env.NEXT_PUBLIC_ENABLE_RATE_LIMITER}`  === "true"
-      let llmResponse: LLMResponse = []
       const [stylePrompt, userStoryPrompt] = prompt.split("||").map(x => x.trim())
-      try {
-        llmResponse = await getStory({
-          preset,
-          prompt: joinWords([ userStoryPrompt ]),
-          nbTotalPanels
-        })
-        console.log("LLM responded:", llmResponse)
-      } catch (err) {
-        console.log("LLM step failed due to:", err)
-        console.log("we are now switching to a degraded mode, using 4 similar panels")
-        llmResponse = []
-        for (let p = 0; p < nbTotalPanels; p++) {
-          llmResponse.push({
-            panel: p,
-            instructions: joinWords([
-              stylePrompt,
-              userStoryPrompt,
-              `${".".repeat(p)}`,
-            ]),
-            caption: "(Sorry, LLM generation failed: using degraded mode)"
-          })
-        }
-        console.error(err)
-      }
       // we have to limit the size of the prompt, otherwise the rest of the style won't be followed
       let limitedStylePrompt = stylePrompt.trim().slice(0, 77).trim()
       if (limitedStylePrompt.length !== stylePrompt.length) {
         console.log("Sorry folks, the style prompt was cut to:", limitedStylePrompt)
       }
       // new experimental prompt: let's drop the user prompt, and only use the style
       const lightPanelPromptPrefix = joinWords(preset.imagePrompt(limitedStylePrompt))
       // this prompt will be used if the LLM generation failed
       const degradedPanelPromptPrefix = joinWords([
         ...preset.imagePrompt(limitedStylePrompt),
         // we re-inject the story, then
         userStoryPrompt
       ])
-      const newPanels: string[] = []
       const newCaptions: string[] = []
-      setWaitABitMore(true)
-      console.log("Panel prompts for SDXL:")
-      for (let p = 0; p < nbTotalPanels; p++) {
-        newCaptions.push(llmResponse[p]?.caption.trim() || "...")
-        const newPanel = joinWords([
-          // what we do here is that ideally we give full control to the LLM for prompting,
-          // unless there was a catastrophic failure, in that case we preserve the original prompt
-          llmResponse[p]?.instructions
-          ? lightPanelPromptPrefix
-          : degradedPanelPromptPrefix,
-          llmResponse[p]?.instructions
-        ])
-        newPanels.push(newPanel)
-        console.log(newPanel)
       }
-      setCaptions(newCaptions)
-      setPanels(newPanels)
       setTimeout(() => {
         setGeneratingStory(false)
         setWaitABitMore(false)
       }, enableRateLimiter ? 12000 : 0)
     })
   }, [prompt, preset?.label, nbTotalPanels]) // important: we need to react to preset changes too

 import { getStory } from "./queries/getStory"
 import { BottomBar } from "./interface/bottom-bar"
 import { Page } from "./interface/page"
+import { GeneratedPanel } from "@/types"
 import { joinWords } from "@/lib/joinWords"
+import { getStoryContinuation } from "./queries/getStoryContinuation"
 export default function Main() {
   const [_isPending, startTransition] = useTransition()
       // I don't think we are going to need a rate limiter on the LLM part anymore
       const enableRateLimiter = false // `${process.env.NEXT_PUBLIC_ENABLE_RATE_LIMITER}`  === "true"
       const [stylePrompt, userStoryPrompt] = prompt.split("||").map(x => x.trim())
       // we have to limit the size of the prompt, otherwise the rest of the style won't be followed
       let limitedStylePrompt = stylePrompt.trim().slice(0, 77).trim()
       if (limitedStylePrompt.length !== stylePrompt.length) {
         console.log("Sorry folks, the style prompt was cut to:", limitedStylePrompt)
       }
       // new experimental prompt: let's drop the user prompt, and only use the style
       const lightPanelPromptPrefix = joinWords(preset.imagePrompt(limitedStylePrompt))
       // this prompt will be used if the LLM generation failed
       const degradedPanelPromptPrefix = joinWords([
         ...preset.imagePrompt(limitedStylePrompt),
         // we re-inject the story, then
         userStoryPrompt
       ])
+      let existingPanels: GeneratedPanel[] = []
+      const newPanelsPrompts: string[] = []
       const newCaptions: string[] = []
+      const nbPanelsToGenerate = 2
+      for (
+        let currentPanel = 0;
+        currentPanel < nbTotalPanels;
+        currentPanel += nbPanelsToGenerate
+      ) {
+        if (currentPanel > (nbTotalPanels / 2)) {
+          console.log("good, we are half way there, hold tight!")
+          // setWaitABitMore(true)
+        }
+        try {
+          const candidatePanels = await getStoryContinuation({
+            preset,
+            stylePrompt,
+            userStoryPrompt,
+            nbPanelsToGenerate,
+            existingPanels,
+          })
+          console.log("LLM generated some new panels:", candidatePanels)
+          existingPanels.push(...candidatePanels)
+          console.log(`Converting the ${nbPanelsToGenerate} new panels into image prompts..`)
+          const startAt = currentPanel
+          const endAt = currentPanel + nbPanelsToGenerate
+          for (let p = startAt; p < endAt; p++) {
+            newCaptions.push(existingPanels[p]?.caption.trim() || "...")
+            const newPanel = joinWords([
+              // what we do here is that ideally we give full control to the LLM for prompting,
+              // unless there was a catastrophic failure, in that case we preserve the original prompt
+              existingPanels[p]?.instructions
+              ? lightPanelPromptPrefix
+              : degradedPanelPromptPrefix,
+              existingPanels[p]?.instructions
+            ])
+            newPanelsPrompts.push(newPanel)
+            console.log(`Image prompt for panel ${p} => "${newPanel}"`)
+          }
+          // update the frontend
+          console.log("updating the frontend..")
+          setCaptions(newCaptions)
+          setPanels(newPanelsPrompts)
+          setGeneratingStory(false)
+        } catch (err) {
+          console.log("failed to generate the story, aborting here")
+          setGeneratingStory(false)
+          break
+        }
       }
+      /*
       setTimeout(() => {
         setGeneratingStory(false)
         setWaitABitMore(false)
       }, enableRateLimiter ? 12000 : 0)
+      */
     })
   }, [prompt, preset?.label, nbTotalPanels]) // important: we need to react to preset changes too

src/app/queries/getStory.ts CHANGED Viewed

@@ -1,13 +1,13 @@
-import { dirtyLLMJsonParser } from "@/lib/dirtyLLMJsonParser"
-import { dirtyCaptionCleaner } from "@/lib/dirtyCaptionCleaner"
 import { predict } from "./predict"
 import { Preset } from "../engine/presets"
-import { LLMResponse } from "@/types"
 import { cleanJson } from "@/lib/cleanJson"
 import { createZephyrPrompt } from "@/lib/createZephyrPrompt"
 export const getStory = async ({
   preset,
   prompt = "",
@@ -16,19 +16,19 @@ export const getStory = async ({
   preset: Preset;
   prompt: string;
   nbTotalPanels: number;
-}): Promise<LLMResponse> => {
   // throw new Error("Planned maintenance")
   // In case you need to quickly debug the RENDERING engine you can uncomment this:
-  // return mockLLMResponse
   const query = createZephyrPrompt([
     {
       role: "system",
       content: [
         `You are a writer specialized in ${preset.llmPrompt}`,
-        `Please write detailed drawing instructions and a short (2-3 sentences long) speech caption for the ${nbTotalPanels} panels of a new story. Please make sure each of the ${nbTotalPanels} panels include info about character gender, age, origin, clothes, colors, location, lights, etc.`,
-        `Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; caption: string}>\`.`,
         // `Give your response as Markdown bullet points.`,
         `Be brief in your ${nbTotalPanels} instructions and narrative captions, don't add your own comments. The whole story must be captivating, smart, entertaining. Be straight to the point, and never reply things like "Sure, I can.." etc. Reply using valid JSON.`
       ].filter(item => item).join("\n")
@@ -37,7 +37,7 @@ export const getStory = async ({
       role: "user",
       content: `The story is: ${prompt}`,
     }
-  ]) + "\n[{"
   let result = ""
@@ -64,10 +64,10 @@ export const getStory = async ({
   // console.log("Raw response from LLM:", result)
   const tmp = cleanJson(result)
-  let llmResponse: LLMResponse = []
   try {
-    llmResponse = dirtyLLMJsonParser(tmp)
   } catch (err) {
     // console.log(`failed to read LLM response: ${err}`)
     // console.log(`original response was:`, result)
@@ -75,7 +75,7 @@ export const getStory = async ({
       // in case of failure here, it might be because the LLM hallucinated a completely different response,
       // such as markdown. There is no real solution.. but we can try a fallback:
-    llmResponse = (
       tmp.split("*")
       .map(item => item.trim())
       .map((cap, i) => ({
@@ -86,5 +86,5 @@ export const getStory = async ({
     )
   }
-  return llmResponse.map(res => dirtyCaptionCleaner(res))
 }

 import { predict } from "./predict"
 import { Preset } from "../engine/presets"
+import { GeneratedPanels } from "@/types"
 import { cleanJson } from "@/lib/cleanJson"
 import { createZephyrPrompt } from "@/lib/createZephyrPrompt"
+import { dirtyGeneratedPanelCleaner } from "@/lib/dirtyGeneratedPanelCleaner"
+import { dirtyGeneratedPanelsParser } from "@/lib/dirtyGeneratedPanelsParser"
 export const getStory = async ({
   preset,
   prompt = "",
   preset: Preset;
   prompt: string;
   nbTotalPanels: number;
+}): Promise<GeneratedPanels> => {
   // throw new Error("Planned maintenance")
   // In case you need to quickly debug the RENDERING engine you can uncomment this:
+  // return mockGeneratedPanels
   const query = createZephyrPrompt([
     {
       role: "system",
       content: [
         `You are a writer specialized in ${preset.llmPrompt}`,
+        `Please write detailed drawing instructions and short (2-3 sentences long) speech captions for the ${nbTotalPanels} panels of a new story. Please make sure each of the ${nbTotalPanels} panels include info about character gender, age, origin, clothes, colors, location, lights, etc.`,
+        `Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; caption: string; }>\`.`,
         // `Give your response as Markdown bullet points.`,
         `Be brief in your ${nbTotalPanels} instructions and narrative captions, don't add your own comments. The whole story must be captivating, smart, entertaining. Be straight to the point, and never reply things like "Sure, I can.." etc. Reply using valid JSON.`
       ].filter(item => item).join("\n")
       role: "user",
       content: `The story is: ${prompt}`,
     }
+  ]) + "\n```[{"
   let result = ""
   // console.log("Raw response from LLM:", result)
   const tmp = cleanJson(result)
+  let GeneratedPanels: GeneratedPanels = []
   try {
+    GeneratedPanels = dirtyGeneratedPanelsParser(tmp)
   } catch (err) {
     // console.log(`failed to read LLM response: ${err}`)
     // console.log(`original response was:`, result)
       // in case of failure here, it might be because the LLM hallucinated a completely different response,
       // such as markdown. There is no real solution.. but we can try a fallback:
+    GeneratedPanels = (
       tmp.split("*")
       .map(item => item.trim())
       .map((cap, i) => ({
     )
   }
+  return GeneratedPanels.map(res => dirtyGeneratedPanelCleaner(res))
 }

src/app/queries/getStoryContinuation.ts CHANGED Viewed

@@ -1,96 +1,68 @@
-import { dirtyLLMJsonParser } from "@/lib/dirtyLLMJsonParser"
-import { dirtyCaptionCleaner } from "@/lib/dirtyCaptionCleaner"
-import { predict } from "./predict"
 import { Preset } from "../engine/presets"
-import { LLMResponse } from "@/types"
-import { cleanJson } from "@/lib/cleanJson"
-import { createZephyrPrompt } from "@/lib/createZephyrPrompt"
 export const getStoryContinuation = async ({
   preset,
-  prompt = "",
-  nbTotalPanels = 2,
-  previousCaptions = [],
 }: {
   preset: Preset;
-  prompt: string;
-  nbTotalPanels: number;
-  previousCaptions: string[];
-}): Promise<LLMResponse> => {
-  // throw new Error("Planned maintenance")
-  // In case you need to quickly debug the RENDERING engine you can uncomment this:
-  // return mockLLMResponse
-  const previousCaptionsTemplate = previousCaptions.length
-    ? `To help you, here are the previous panels and their captions (note: if you see an anomaly here eg. no caption or the same description repeated multiple times, do not hesitate to fix the story): ${JSON.stringify(previousCaptions, null, 2)}`
-    : ''
-  const query = createZephyrPrompt([
-    {
-      role: "system",
-      content: [
-        `You are a writer specialized in ${preset.llmPrompt}`,
-        `Please write detailed drawing instructions and a short (2-3 sentences long) speech caption for the next ${nbTotalPanels} panels of a new story, but keep it open-ended (it will be continued and expanded later). Please make sure each of those ${nbTotalPanels} panels include info about character gender, age, origin, clothes, colors, location, lights, etc.`,
-        `Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; caption: string}>\`.`,
-        // `Give your response as Markdown bullet points.`,
-        `Be brief in your ${nbTotalPanels} instructions and narrative captions, don't add your own comments. The captions must be captivating, smart, entertaining. Be straight to the point, and never reply things like "Sure, I can.." etc. Reply using valid JSON.`
-      ].filter(item => item).join("\n")
-    },
-    {
-      role: "user",
-      content: `The story is about: ${prompt}.${previousCaptionsTemplate}`,
-    }
-  ]) + "\n[{"
-  let result = ""
-  try {
-    // console.log(`calling predict(${query}, ${nbTotalPanels})`)
-    result = `${await predict(query, nbTotalPanels) || ""}`.trim()
-    if (!result.length) {
-      throw new Error("empty result!")
-    }
-  } catch (err) {
-    // console.log(`prediction of the story failed, trying again..`)
-    try {
-      result = `${await predict(query+".", nbTotalPanels) || ""}`.trim()
-      if (!result.length) {
-        throw new Error("empty result!")
-      }
-    } catch (err) {
-      console.error(`prediction of the story failed again 💩`)
-      throw new Error(`failed to generate the story ${err}`)
-    }
-  }
-  // console.log("Raw response from LLM:", result)
-  const tmp = cleanJson(result)
-  let llmResponse: LLMResponse = []
-  try {
-    llmResponse = dirtyLLMJsonParser(tmp)
   } catch (err) {
-    // console.log(`failed to read LLM response: ${err}`)
-    // console.log(`original response was:`, result)
-      // in case of failure here, it might be because the LLM hallucinated a completely different response,
-      // such as markdown. There is no real solution.. but we can try a fallback:
-    llmResponse = (
-      tmp.split("*")
-      .map(item => item.trim())
-      .map((cap, i) => ({
-        panel: i,
-        caption: cap,
-        instructions: cap,
-      }))
-    )
   }
-  return llmResponse.map(res => dirtyCaptionCleaner(res))
-}

 import { Preset } from "../engine/presets"
+import { GeneratedPanel } from "@/types"
+import { predictNextPanels } from "./predictNextPanels"
+import { joinWords } from "@/lib/joinWords"
 export const getStoryContinuation = async ({
   preset,
+  stylePrompt = "",
+  userStoryPrompt = "",
+  nbPanelsToGenerate = 2,
+  existingPanels = [],
 }: {
   preset: Preset;
+  stylePrompt: string;
+  userStoryPrompt: string;
+  nbPanelsToGenerate: number;
+  existingPanels: GeneratedPanel[];
+}): Promise<GeneratedPanel[]> => {
+  let panels: GeneratedPanel[] = []
+  const startAt: number = existingPanels.at(-1)?.panel || existingPanels.length || 0
+  const endAt: number = startAt + nbPanelsToGenerate
+  try {
+    const prompt = joinWords([ userStoryPrompt ])
+    const panelCandidates: GeneratedPanel[] = await predictNextPanels({
+      preset,
+      prompt,
+      nbPanelsToGenerate,
+      existingPanels,
+    })
+    console.log("LLM responded with panels:", panelCandidates)
+    // we clean the output from the LLM
+    // most importantly, we need to adjust the panel index,
+    // to start from where we last finished
+    for (let p = startAt; p < endAt; p++) {
+      panels.push({
+        panel: p,
+        instructions: `${panelCandidates[p]?.instructions || ""}`,
+        caption: `${panelCandidates[p]?.caption || ""}`,
+      })
+    }
   } catch (err) {
+    console.log("LLM step failed due to:", err)
+    console.log("we are now switching to a degraded mode, using 4 similar panels")
+    panels = []
+    for (let p = startAt; p < endAt; p++) {
+      panels.push({
+        panel: p,
+        instructions: joinWords([
+          stylePrompt,
+          userStoryPrompt,
+          `${".".repeat(p)}`,
+        ]),
+        caption: "(Sorry, LLM generation failed: using degraded mode)"
+      })
+    }
+    console.error(err)
+  } finally {
+    return panels
   }
+}

src/app/queries/mockLLMResponse.ts CHANGED Viewed

@@ -1,6 +1,6 @@
-import { LLMResponse } from "@/types"
-export const mockLLMResponse: LLMResponse = [{
   "panel": 1,
   "instructions": "wide shot of detective walking towards a UFO crash site",
   "caption": "Detective Jameson investigates a UFO crash in the desert"

+import { GeneratedPanels } from "@/types"
+export const mockGeneratedPanels: GeneratedPanels = [{
   "panel": 1,
   "instructions": "wide shot of detective walking towards a UFO crash site",
   "caption": "Detective Jameson investigates a UFO crash in the desert"

src/app/queries/predictNextPanels.ts ADDED Viewed

	@@ -0,0 +1,95 @@

+import { predict } from "./predict"
+import { Preset } from "../engine/presets"
+import { GeneratedPanel } from "@/types"
+import { cleanJson } from "@/lib/cleanJson"
+import { createZephyrPrompt } from "@/lib/createZephyrPrompt"
+import { dirtyGeneratedPanelCleaner } from "@/lib/dirtyGeneratedPanelCleaner"
+import { dirtyGeneratedPanelsParser } from "@/lib/dirtyGeneratedPanelsParser"
+export const predictNextPanels = async ({
+  preset,
+  prompt = "",
+  nbPanelsToGenerate = 2,
+  existingPanels = [],
+}: {
+  preset: Preset;
+  prompt: string;
+  nbPanelsToGenerate: number;
+  existingPanels: GeneratedPanel[];
+}): Promise<GeneratedPanel[]> => {
+  // throw new Error("Planned maintenance")
+  // In case you need to quickly debug the RENDERING engine you can uncomment this:
+  // return mockGeneratedPanels
+  const existingPanelsTemplate = existingPanels.length
+    ? ` To help you, here are the previous panels and their captions (note: if you see an anomaly here eg. no caption or the same description repeated multiple times, do not hesitate to fix the story): ${JSON.stringify(existingPanels, null, 2)}`
+    : ''
+  const query = createZephyrPrompt([
+    {
+      role: "system",
+      content: [
+        `You are a writer specialized in ${preset.llmPrompt}`,
+        `Please write detailed drawing instructions and short (2-3 sentences long) speech captions for the next ${nbPanelsToGenerate} panels of a new story, but keep it open-ended (it will be continued and expanded later). Please make sure each of those ${nbPanelsToGenerate} panels include info about character gender, age, origin, clothes, colors, location, lights, etc.`,
+        `Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; caption: string; }>\`.`,
+        // `Give your response as Markdown bullet points.`,
+        `Be brief in your ${nbPanelsToGenerate} instructions and narrative captions, don't add your own comments. The captions must be captivating, smart, entertaining. Be straight to the point, and never reply things like "Sure, I can.." etc. Reply using valid JSON.`
+      ].filter(item => item).join("\n")
+    },
+    {
+      role: "user",
+      content: `The story is about: ${prompt}.${existingPanelsTemplate}`,
+    }
+  ]) + "\n```[{"
+  let result = ""
+  try {
+    // console.log(`calling predict(${query}, ${nbTotalPanels})`)
+    result = `${await predict(query, nbPanelsToGenerate) || ""}`.trim()
+    if (!result.length) {
+      throw new Error("empty result!")
+    }
+  } catch (err) {
+    // console.log(`prediction of the story failed, trying again..`)
+    try {
+      result = `${await predict(query+".", nbPanelsToGenerate) || ""}`.trim()
+      if (!result.length) {
+        throw new Error("empty result!")
+      }
+    } catch (err) {
+      console.error(`prediction of the story failed again 💩`)
+      throw new Error(`failed to generate the story ${err}`)
+    }
+  }
+  // console.log("Raw response from LLM:", result)
+  const tmp = cleanJson(result)
+  let generatedPanels: GeneratedPanel[] = []
+  try {
+    generatedPanels = dirtyGeneratedPanelsParser(tmp)
+  } catch (err) {
+    // console.log(`failed to read LLM response: ${err}`)
+    // console.log(`original response was:`, result)
+      // in case of failure here, it might be because the LLM hallucinated a completely different response,
+      // such as markdown. There is no real solution.. but we can try a fallback:
+    generatedPanels = (
+      tmp.split("*")
+      .map(item => item.trim())
+      .map((cap, i) => ({
+        panel: i,
+        caption: cap,
+        instructions: cap,
+      }))
+    )
+  }
+  return generatedPanels.map(res => dirtyGeneratedPanelCleaner(res))
+}

src/lib/cleanJson.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { dirtyLLMResponseCleaner } from "./dirtyLLMResponseCleaner"
-export function cleanJson(input: string) {
   if (input.includes('```')) {
     input = input.split('```')[0]

 import { dirtyLLMResponseCleaner } from "./dirtyLLMResponseCleaner"
+export function cleanJson(input: string): string {
   if (input.includes('```')) {
     input = input.split('```')[0]

src/lib/{dirtyCaptionCleaner.ts → dirtyGeneratedPanelCleaner.ts} RENAMED Viewed

@@ -1,16 +1,14 @@
-export function dirtyCaptionCleaner({
   panel,
   instructions,
   caption
-}: {
-  panel: number;
-  instructions: string;
-  caption: string
-}) {
-  let newCaption = caption.split(":").pop()?.trim() || ""
   let newInstructions = (
     // need to remove from LLM garbage here, too
-    (instructions.split(":").pop() || "")
     .replaceAll("Draw a", "")
     .replaceAll("Draw the", "")
     .replaceAll("Draw", "")

+import { GeneratedPanel } from "@/types"
+export function dirtyGeneratedPanelCleaner({
   panel,
   instructions,
   caption
+}: GeneratedPanel): GeneratedPanel {
+  let newCaption = `${caption || ""}`.split(":").pop()?.trim() || ""
   let newInstructions = (
     // need to remove from LLM garbage here, too
+    (`${instructions || ""}`.split(":").pop() || "")
     .replaceAll("Draw a", "")
     .replaceAll("Draw the", "")
     .replaceAll("Draw", "")

src/lib/{dirtyLLMJsonParser.ts → dirtyGeneratedPanelsParser.ts} RENAMED Viewed

@@ -1,8 +1,8 @@
-import { LLMResponse } from "@/types"
 import { cleanJson } from "./cleanJson"
 import { parseBadJSON } from "./parseBadJSON"
-export function dirtyLLMJsonParser(input: string): LLMResponse {
   if (input.includes("```")) {
     input = input.split("```")[0]
@@ -10,7 +10,7 @@ export function dirtyLLMJsonParser(input: string): LLMResponse {
   // we only keep what's after the first [
   let jsonOrNot = cleanJson(input)
-  const jsonData = parseBadJSON(jsonOrNot) as LLMResponse
   const results = jsonData.map((item, i) => {
     let panel = i

+import { GeneratedPanel } from "@/types"
 import { cleanJson } from "./cleanJson"
 import { parseBadJSON } from "./parseBadJSON"
+export function dirtyGeneratedPanelsParser(input: string): GeneratedPanel[] {
   if (input.includes("```")) {
     input = input.split("```")[0]
   // we only keep what's after the first [
   let jsonOrNot = cleanJson(input)
+  const jsonData = parseBadJSON(jsonOrNot) as GeneratedPanel[]
   const results = jsonData.map((item, i) => {
     let panel = i

src/lib/dirtyLLMResponseCleaner.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-export function dirtyLLMResponseCleaner(input: string) {
   let str = (
     `${input || ""}`
     // a summary of all the weird hallucinations I saw it make..

+export function dirtyLLMResponseCleaner(input: string): string {
   let str = (
     `${input || ""}`
     // a summary of all the weird hallucinations I saw it make..

src/lib/parseBadJSON.ts CHANGED Viewed

@@ -1,9 +1,9 @@
-import { LLMResponse } from "@/types"
-export function parseBadJSON(jsonLikeString: string): LLMResponse {
   try {
-    return JSON.parse(jsonLikeString) as LLMResponse
   } catch (err) {
     var regex = /\{\s*"panel":\s*(\d+),\s*"instructions"\s*:\s*"([^"]+)",\s*"caption":\s*"([^"]*)"\s*\}/gs;
@@ -19,6 +19,6 @@ export function parseBadJSON(jsonLikeString: string): LLMResponse {
       results.push(json);
     }
-    return results as LLMResponse
   }
 }

+import { GeneratedPanels } from "@/types"
+export function parseBadJSON(jsonLikeString: string): GeneratedPanels {
   try {
+    return JSON.parse(jsonLikeString) as GeneratedPanels
   } catch (err) {
     var regex = /\{\s*"panel":\s*(\d+),\s*"instructions"\s*:\s*"([^"]+)",\s*"caption":\s*"([^"]*)"\s*\}/gs;
       results.push(json);
     }
+    return results as GeneratedPanels
   }
 }

src/types.ts CHANGED Viewed

@@ -87,7 +87,13 @@ export interface ImageAnalysisResponse {
   error?: string
 }
-export type LLMResponse = Array<{panel: number; instructions: string; caption: string }>
 export type LLMEngine =
   | "INFERENCE_API"
@@ -154,6 +160,7 @@ export type Settings = {
   huggingfaceApiKey: string
   huggingfaceInferenceApiModel: string
   huggingfaceInferenceApiModelTrigger: string
   replicateApiKey: string
   replicateApiModel: string
   replicateApiModelVersion: string

   error?: string
 }
+export type GeneratedPanel = {
+  panel: number
+  instructions: string
+  caption: string
+}
+export type GeneratedPanels = GeneratedPanel[]
 export type LLMEngine =
   | "INFERENCE_API"
   huggingfaceApiKey: string
   huggingfaceInferenceApiModel: string
   huggingfaceInferenceApiModelTrigger: string
+  huggingfaceInferenceApiFileType: string
   replicateApiKey: string
   replicateApiModel: string
   replicateApiModelVersion: string