jbilcke-hf HF staff commited on
Commit
faf4ba4
β€’
1 Parent(s): 9bcdb59

added suggestion from user + work on the progressive loader

Browse files
.env CHANGED
@@ -53,6 +53,9 @@ RENDERING_HF_INFERENCE_API_BASE_MODEL="stabilityai/stable-diffusion-xl-base-1.0"
53
  # If you decided to use a Hugging Face Inference API model for the RENDERING engine
54
  RENDERING_HF_INFERENCE_API_REFINER_MODEL="stabilityai/stable-diffusion-xl-refiner-1.0"
55
 
 
 
 
56
  # An experimental RENDERING engine (sorry it is not very documented yet, so you can use one of the other engines)
57
  RENDERING_VIDEOCHAIN_API_URL="http://localhost:7860"
58
 
 
53
  # If you decided to use a Hugging Face Inference API model for the RENDERING engine
54
  RENDERING_HF_INFERENCE_API_REFINER_MODEL="stabilityai/stable-diffusion-xl-refiner-1.0"
55
 
56
+ # If your model returns a different file type (eg. jpg or webp) change it here
57
+ RENDERING_HF_INFERENCE_API_FILE_TYPE="image/png"
58
+
59
  # An experimental RENDERING engine (sorry it is not very documented yet, so you can use one of the other engines)
60
  RENDERING_VIDEOCHAIN_API_URL="http://localhost:7860"
61
 
src/app/engine/render.ts CHANGED
@@ -15,6 +15,7 @@ const serverHuggingfaceApiUrl = `${process.env.RENDERING_HF_INFERENCE_ENDPOINT_U
15
  const serverHuggingfaceInferenceApiModel = `${process.env.RENDERING_HF_INFERENCE_API_BASE_MODEL || ""}`
16
  const serverHuggingfaceInferenceApiModelRefinerModel = `${process.env.RENDERING_HF_INFERENCE_API_REFINER_MODEL || ""}`
17
  const serverHuggingfaceInferenceApiModelTrigger = `${process.env.RENDERING_HF_INFERENCE_API_MODEL_TRIGGER || ""}`
 
18
 
19
  const serverReplicateApiKey = `${process.env.AUTH_REPLICATE_API_TOKEN || ""}`
20
  const serverReplicateApiModel = `${process.env.RENDERING_REPLICATE_API_MODEL || ""}`
@@ -77,6 +78,7 @@ export async function newRender({
77
  let huggingfaceApiUrl = serverHuggingfaceApiUrl
78
  let huggingfaceInferenceApiModelRefinerModel = serverHuggingfaceInferenceApiModelRefinerModel
79
  let huggingfaceInferenceApiModelTrigger = serverHuggingfaceInferenceApiModelTrigger
 
80
 
81
  const placeholder = "<USE YOUR OWN TOKEN>"
82
 
@@ -116,6 +118,7 @@ export async function newRender({
116
  huggingfaceApiKey = settings.huggingfaceApiKey
117
  huggingfaceInferenceApiModel = settings.huggingfaceInferenceApiModel
118
  huggingfaceInferenceApiModelTrigger = settings.huggingfaceInferenceApiModelTrigger
 
119
  }
120
 
121
  try {
@@ -258,6 +261,7 @@ export async function newRender({
258
  method: "POST",
259
  headers: {
260
  "Content-Type": "application/json",
 
261
  Authorization: `Bearer ${huggingfaceApiKey}`,
262
  },
263
  body: JSON.stringify({
 
15
  const serverHuggingfaceInferenceApiModel = `${process.env.RENDERING_HF_INFERENCE_API_BASE_MODEL || ""}`
16
  const serverHuggingfaceInferenceApiModelRefinerModel = `${process.env.RENDERING_HF_INFERENCE_API_REFINER_MODEL || ""}`
17
  const serverHuggingfaceInferenceApiModelTrigger = `${process.env.RENDERING_HF_INFERENCE_API_MODEL_TRIGGER || ""}`
18
+ const serverHuggingfaceInferenceApiFileType = `${process.env.RENDERING_HF_INFERENCE_API_FILE_TYPE || ""}`
19
 
20
  const serverReplicateApiKey = `${process.env.AUTH_REPLICATE_API_TOKEN || ""}`
21
  const serverReplicateApiModel = `${process.env.RENDERING_REPLICATE_API_MODEL || ""}`
 
78
  let huggingfaceApiUrl = serverHuggingfaceApiUrl
79
  let huggingfaceInferenceApiModelRefinerModel = serverHuggingfaceInferenceApiModelRefinerModel
80
  let huggingfaceInferenceApiModelTrigger = serverHuggingfaceInferenceApiModelTrigger
81
+ let huggingfaceInferenceApiFileType = serverHuggingfaceInferenceApiFileType
82
 
83
  const placeholder = "<USE YOUR OWN TOKEN>"
84
 
 
118
  huggingfaceApiKey = settings.huggingfaceApiKey
119
  huggingfaceInferenceApiModel = settings.huggingfaceInferenceApiModel
120
  huggingfaceInferenceApiModelTrigger = settings.huggingfaceInferenceApiModelTrigger
121
+ huggingfaceInferenceApiFileType = settings.huggingfaceInferenceApiFileType
122
  }
123
 
124
  try {
 
261
  method: "POST",
262
  headers: {
263
  "Content-Type": "application/json",
264
+ Accept: huggingfaceInferenceApiFileType,
265
  Authorization: `Bearer ${huggingfaceApiKey}`,
266
  },
267
  body: JSON.stringify({
src/app/interface/settings-dialog/defaultSettings.ts CHANGED
@@ -6,6 +6,7 @@ export const defaultSettings: Settings = {
6
  huggingfaceApiKey: "",
7
  huggingfaceInferenceApiModel: "stabilityai/stable-diffusion-xl-base-1.0",
8
  huggingfaceInferenceApiModelTrigger: "",
 
9
  replicateApiKey: "",
10
  replicateApiModel: "stabilityai/sdxl",
11
  replicateApiModelVersion: "da77bc59ee60423279fd632efb4795ab731d9e3ca9705ef3341091fb989b7eaf",
 
6
  huggingfaceApiKey: "",
7
  huggingfaceInferenceApiModel: "stabilityai/stable-diffusion-xl-base-1.0",
8
  huggingfaceInferenceApiModelTrigger: "",
9
+ huggingfaceInferenceApiFileType: "image/png",
10
  replicateApiKey: "",
11
  replicateApiModel: "stabilityai/sdxl",
12
  replicateApiModelVersion: "da77bc59ee60423279fd632efb4795ab731d9e3ca9705ef3341091fb989b7eaf",
src/app/interface/settings-dialog/getSettings.ts CHANGED
@@ -13,6 +13,7 @@ export function getSettings(): Settings {
13
  huggingfaceApiKey: getValidString(localStorage?.getItem?.(localStorageKeys.huggingfaceApiKey), defaultSettings.huggingfaceApiKey),
14
  huggingfaceInferenceApiModel: getValidString(localStorage?.getItem?.(localStorageKeys.huggingfaceInferenceApiModel), defaultSettings.huggingfaceInferenceApiModel),
15
  huggingfaceInferenceApiModelTrigger: getValidString(localStorage?.getItem?.(localStorageKeys.huggingfaceInferenceApiModelTrigger), defaultSettings.huggingfaceInferenceApiModelTrigger),
 
16
  replicateApiKey: getValidString(localStorage?.getItem?.(localStorageKeys.replicateApiKey), defaultSettings.replicateApiKey),
17
  replicateApiModel: getValidString(localStorage?.getItem?.(localStorageKeys.replicateApiModel), defaultSettings.replicateApiModel),
18
  replicateApiModelVersion: getValidString(localStorage?.getItem?.(localStorageKeys.replicateApiModelVersion), defaultSettings.replicateApiModelVersion),
 
13
  huggingfaceApiKey: getValidString(localStorage?.getItem?.(localStorageKeys.huggingfaceApiKey), defaultSettings.huggingfaceApiKey),
14
  huggingfaceInferenceApiModel: getValidString(localStorage?.getItem?.(localStorageKeys.huggingfaceInferenceApiModel), defaultSettings.huggingfaceInferenceApiModel),
15
  huggingfaceInferenceApiModelTrigger: getValidString(localStorage?.getItem?.(localStorageKeys.huggingfaceInferenceApiModelTrigger), defaultSettings.huggingfaceInferenceApiModelTrigger),
16
+ huggingfaceInferenceApiFileType: getValidString(localStorage?.getItem?.(localStorageKeys.huggingfaceInferenceApiFileType), defaultSettings.huggingfaceInferenceApiFileType),
17
  replicateApiKey: getValidString(localStorage?.getItem?.(localStorageKeys.replicateApiKey), defaultSettings.replicateApiKey),
18
  replicateApiModel: getValidString(localStorage?.getItem?.(localStorageKeys.replicateApiModel), defaultSettings.replicateApiModel),
19
  replicateApiModelVersion: getValidString(localStorage?.getItem?.(localStorageKeys.replicateApiModelVersion), defaultSettings.replicateApiModelVersion),
src/app/interface/settings-dialog/index.tsx CHANGED
@@ -43,6 +43,10 @@ export function SettingsDialog() {
43
  localStorageKeys.huggingfaceInferenceApiModelTrigger,
44
  defaultSettings.huggingfaceInferenceApiModelTrigger
45
  )
 
 
 
 
46
  const [replicateApiKey, setReplicateApiKey] = useLocalStorage<string>(
47
  localStorageKeys.replicateApiKey,
48
  defaultSettings.replicateApiKey
@@ -148,6 +152,17 @@ export function SettingsDialog() {
148
  value={huggingfaceInferenceApiModel}
149
  />
150
  </Field>
 
 
 
 
 
 
 
 
 
 
 
151
  <p className="text-sm text-zinc-700">
152
  Using a LoRA? Don&apos;t forget the trigger keyword! Also you will want to use the &quot;Neutral&quot; style.
153
  </p>
 
43
  localStorageKeys.huggingfaceInferenceApiModelTrigger,
44
  defaultSettings.huggingfaceInferenceApiModelTrigger
45
  )
46
+ const [huggingfaceInferenceApiFileType, setHuggingfaceInferenceApiFileType] = useLocalStorage<string>(
47
+ localStorageKeys.huggingfaceInferenceApiFileType,
48
+ defaultSettings.huggingfaceInferenceApiFileType
49
+ )
50
  const [replicateApiKey, setReplicateApiKey] = useLocalStorage<string>(
51
  localStorageKeys.replicateApiKey,
52
  defaultSettings.replicateApiKey
 
152
  value={huggingfaceInferenceApiModel}
153
  />
154
  </Field>
155
+ <Field>
156
+ <Label>The file type supported by the model (jpg, webp..):</Label>
157
+ <Input
158
+ className="font-mono"
159
+ placeholder="Inference API file type"
160
+ onChange={(x) => {
161
+ setHuggingfaceInferenceApiFileType(x.target.value)
162
+ }}
163
+ value={huggingfaceInferenceApiFileType}
164
+ />
165
+ </Field>
166
  <p className="text-sm text-zinc-700">
167
  Using a LoRA? Don&apos;t forget the trigger keyword! Also you will want to use the &quot;Neutral&quot; style.
168
  </p>
src/app/interface/settings-dialog/localStorageKeys.ts CHANGED
@@ -6,6 +6,7 @@ export const localStorageKeys: Record<keyof Settings, string> = {
6
  huggingfaceApiKey: "CONF_AUTH_HF_API_TOKEN",
7
  huggingfaceInferenceApiModel: "CONF_RENDERING_HF_INFERENCE_API_BASE_MODEL",
8
  huggingfaceInferenceApiModelTrigger: "CONF_RENDERING_HF_INFERENCE_API_BASE_MODEL_TRIGGER",
 
9
  replicateApiKey: "CONF_AUTH_REPLICATE_API_TOKEN",
10
  replicateApiModel: "CONF_RENDERING_REPLICATE_API_MODEL",
11
  replicateApiModelVersion: "CONF_RENDERING_REPLICATE_API_MODEL_VERSION",
 
6
  huggingfaceApiKey: "CONF_AUTH_HF_API_TOKEN",
7
  huggingfaceInferenceApiModel: "CONF_RENDERING_HF_INFERENCE_API_BASE_MODEL",
8
  huggingfaceInferenceApiModelTrigger: "CONF_RENDERING_HF_INFERENCE_API_BASE_MODEL_TRIGGER",
9
+ huggingfaceInferenceApiFileType: "CONF_RENDERING_HF_INFERENCE_API_FILE_TYPE",
10
  replicateApiKey: "CONF_AUTH_REPLICATE_API_TOKEN",
11
  replicateApiModel: "CONF_RENDERING_REPLICATE_API_MODEL",
12
  replicateApiModelVersion: "CONF_RENDERING_REPLICATE_API_MODEL_VERSION",
src/app/main.tsx CHANGED
@@ -10,8 +10,9 @@ import { Zoom } from "./interface/zoom"
10
  import { getStory } from "./queries/getStory"
11
  import { BottomBar } from "./interface/bottom-bar"
12
  import { Page } from "./interface/page"
13
- import { LLMResponse } from "@/types"
14
  import { joinWords } from "@/lib/joinWords"
 
15
 
16
  export default function Main() {
17
  const [_isPending, startTransition] = useTransition()
@@ -44,82 +45,93 @@ export default function Main() {
44
  // I don't think we are going to need a rate limiter on the LLM part anymore
45
  const enableRateLimiter = false // `${process.env.NEXT_PUBLIC_ENABLE_RATE_LIMITER}` === "true"
46
 
47
- let llmResponse: LLMResponse = []
48
-
49
  const [stylePrompt, userStoryPrompt] = prompt.split("||").map(x => x.trim())
50
 
51
- try {
52
- llmResponse = await getStory({
53
- preset,
54
- prompt: joinWords([ userStoryPrompt ]),
55
- nbTotalPanels
56
- })
57
- console.log("LLM responded:", llmResponse)
58
-
59
- } catch (err) {
60
- console.log("LLM step failed due to:", err)
61
- console.log("we are now switching to a degraded mode, using 4 similar panels")
62
-
63
- llmResponse = []
64
- for (let p = 0; p < nbTotalPanels; p++) {
65
- llmResponse.push({
66
- panel: p,
67
- instructions: joinWords([
68
- stylePrompt,
69
- userStoryPrompt,
70
- `${".".repeat(p)}`,
71
- ]),
72
- caption: "(Sorry, LLM generation failed: using degraded mode)"
73
- })
74
- }
75
- console.error(err)
76
- }
77
-
78
  // we have to limit the size of the prompt, otherwise the rest of the style won't be followed
79
 
80
  let limitedStylePrompt = stylePrompt.trim().slice(0, 77).trim()
81
  if (limitedStylePrompt.length !== stylePrompt.length) {
82
  console.log("Sorry folks, the style prompt was cut to:", limitedStylePrompt)
83
  }
84
-
85
  // new experimental prompt: let's drop the user prompt, and only use the style
86
  const lightPanelPromptPrefix = joinWords(preset.imagePrompt(limitedStylePrompt))
87
-
88
  // this prompt will be used if the LLM generation failed
89
  const degradedPanelPromptPrefix = joinWords([
90
  ...preset.imagePrompt(limitedStylePrompt),
91
-
92
  // we re-inject the story, then
93
  userStoryPrompt
94
  ])
95
 
96
- const newPanels: string[] = []
 
97
  const newCaptions: string[] = []
98
- setWaitABitMore(true)
99
- console.log("Panel prompts for SDXL:")
100
- for (let p = 0; p < nbTotalPanels; p++) {
101
- newCaptions.push(llmResponse[p]?.caption.trim() || "...")
102
- const newPanel = joinWords([
103
-
104
- // what we do here is that ideally we give full control to the LLM for prompting,
105
- // unless there was a catastrophic failure, in that case we preserve the original prompt
106
- llmResponse[p]?.instructions
107
- ? lightPanelPromptPrefix
108
- : degradedPanelPromptPrefix,
109
-
110
- llmResponse[p]?.instructions
111
- ])
112
- newPanels.push(newPanel)
113
- console.log(newPanel)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  }
115
 
116
- setCaptions(newCaptions)
117
- setPanels(newPanels)
118
-
119
  setTimeout(() => {
120
  setGeneratingStory(false)
121
  setWaitABitMore(false)
122
  }, enableRateLimiter ? 12000 : 0)
 
123
 
124
  })
125
  }, [prompt, preset?.label, nbTotalPanels]) // important: we need to react to preset changes too
 
10
  import { getStory } from "./queries/getStory"
11
  import { BottomBar } from "./interface/bottom-bar"
12
  import { Page } from "./interface/page"
13
+ import { GeneratedPanel } from "@/types"
14
  import { joinWords } from "@/lib/joinWords"
15
+ import { getStoryContinuation } from "./queries/getStoryContinuation"
16
 
17
  export default function Main() {
18
  const [_isPending, startTransition] = useTransition()
 
45
  // I don't think we are going to need a rate limiter on the LLM part anymore
46
  const enableRateLimiter = false // `${process.env.NEXT_PUBLIC_ENABLE_RATE_LIMITER}` === "true"
47
 
 
 
48
  const [stylePrompt, userStoryPrompt] = prompt.split("||").map(x => x.trim())
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  // we have to limit the size of the prompt, otherwise the rest of the style won't be followed
51
 
52
  let limitedStylePrompt = stylePrompt.trim().slice(0, 77).trim()
53
  if (limitedStylePrompt.length !== stylePrompt.length) {
54
  console.log("Sorry folks, the style prompt was cut to:", limitedStylePrompt)
55
  }
56
+
57
  // new experimental prompt: let's drop the user prompt, and only use the style
58
  const lightPanelPromptPrefix = joinWords(preset.imagePrompt(limitedStylePrompt))
59
+
60
  // this prompt will be used if the LLM generation failed
61
  const degradedPanelPromptPrefix = joinWords([
62
  ...preset.imagePrompt(limitedStylePrompt),
63
+
64
  // we re-inject the story, then
65
  userStoryPrompt
66
  ])
67
 
68
+ let existingPanels: GeneratedPanel[] = []
69
+ const newPanelsPrompts: string[] = []
70
  const newCaptions: string[] = []
71
+
72
+ const nbPanelsToGenerate = 2
73
+
74
+ for (
75
+ let currentPanel = 0;
76
+ currentPanel < nbTotalPanels;
77
+ currentPanel += nbPanelsToGenerate
78
+ ) {
79
+ if (currentPanel > (nbTotalPanels / 2)) {
80
+ console.log("good, we are half way there, hold tight!")
81
+ // setWaitABitMore(true)
82
+ }
83
+ try {
84
+ const candidatePanels = await getStoryContinuation({
85
+ preset,
86
+ stylePrompt,
87
+ userStoryPrompt,
88
+ nbPanelsToGenerate,
89
+ existingPanels,
90
+ })
91
+ console.log("LLM generated some new panels:", candidatePanels)
92
+
93
+ existingPanels.push(...candidatePanels)
94
+
95
+ console.log(`Converting the ${nbPanelsToGenerate} new panels into image prompts..`)
96
+
97
+ const startAt = currentPanel
98
+ const endAt = currentPanel + nbPanelsToGenerate
99
+ for (let p = startAt; p < endAt; p++) {
100
+ newCaptions.push(existingPanels[p]?.caption.trim() || "...")
101
+ const newPanel = joinWords([
102
+
103
+ // what we do here is that ideally we give full control to the LLM for prompting,
104
+ // unless there was a catastrophic failure, in that case we preserve the original prompt
105
+ existingPanels[p]?.instructions
106
+ ? lightPanelPromptPrefix
107
+ : degradedPanelPromptPrefix,
108
+
109
+ existingPanels[p]?.instructions
110
+ ])
111
+ newPanelsPrompts.push(newPanel)
112
+
113
+ console.log(`Image prompt for panel ${p} => "${newPanel}"`)
114
+ }
115
+
116
+ // update the frontend
117
+ console.log("updating the frontend..")
118
+ setCaptions(newCaptions)
119
+ setPanels(newPanelsPrompts)
120
+
121
+ setGeneratingStory(false)
122
+ } catch (err) {
123
+ console.log("failed to generate the story, aborting here")
124
+ setGeneratingStory(false)
125
+ break
126
+ }
127
  }
128
 
129
+ /*
 
 
130
  setTimeout(() => {
131
  setGeneratingStory(false)
132
  setWaitABitMore(false)
133
  }, enableRateLimiter ? 12000 : 0)
134
+ */
135
 
136
  })
137
  }, [prompt, preset?.label, nbTotalPanels]) // important: we need to react to preset changes too
src/app/queries/getStory.ts CHANGED
@@ -1,13 +1,13 @@
1
 
2
- import { dirtyLLMJsonParser } from "@/lib/dirtyLLMJsonParser"
3
- import { dirtyCaptionCleaner } from "@/lib/dirtyCaptionCleaner"
4
-
5
  import { predict } from "./predict"
6
  import { Preset } from "../engine/presets"
7
- import { LLMResponse } from "@/types"
8
  import { cleanJson } from "@/lib/cleanJson"
9
  import { createZephyrPrompt } from "@/lib/createZephyrPrompt"
10
 
 
 
 
11
  export const getStory = async ({
12
  preset,
13
  prompt = "",
@@ -16,19 +16,19 @@ export const getStory = async ({
16
  preset: Preset;
17
  prompt: string;
18
  nbTotalPanels: number;
19
- }): Promise<LLMResponse> => {
20
  // throw new Error("Planned maintenance")
21
 
22
  // In case you need to quickly debug the RENDERING engine you can uncomment this:
23
- // return mockLLMResponse
24
 
25
  const query = createZephyrPrompt([
26
  {
27
  role: "system",
28
  content: [
29
  `You are a writer specialized in ${preset.llmPrompt}`,
30
- `Please write detailed drawing instructions and a short (2-3 sentences long) speech caption for the ${nbTotalPanels} panels of a new story. Please make sure each of the ${nbTotalPanels} panels include info about character gender, age, origin, clothes, colors, location, lights, etc.`,
31
- `Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; caption: string}>\`.`,
32
  // `Give your response as Markdown bullet points.`,
33
  `Be brief in your ${nbTotalPanels} instructions and narrative captions, don't add your own comments. The whole story must be captivating, smart, entertaining. Be straight to the point, and never reply things like "Sure, I can.." etc. Reply using valid JSON.`
34
  ].filter(item => item).join("\n")
@@ -37,7 +37,7 @@ export const getStory = async ({
37
  role: "user",
38
  content: `The story is: ${prompt}`,
39
  }
40
- ]) + "\n[{"
41
 
42
 
43
  let result = ""
@@ -64,10 +64,10 @@ export const getStory = async ({
64
  // console.log("Raw response from LLM:", result)
65
  const tmp = cleanJson(result)
66
 
67
- let llmResponse: LLMResponse = []
68
 
69
  try {
70
- llmResponse = dirtyLLMJsonParser(tmp)
71
  } catch (err) {
72
  // console.log(`failed to read LLM response: ${err}`)
73
  // console.log(`original response was:`, result)
@@ -75,7 +75,7 @@ export const getStory = async ({
75
  // in case of failure here, it might be because the LLM hallucinated a completely different response,
76
  // such as markdown. There is no real solution.. but we can try a fallback:
77
 
78
- llmResponse = (
79
  tmp.split("*")
80
  .map(item => item.trim())
81
  .map((cap, i) => ({
@@ -86,5 +86,5 @@ export const getStory = async ({
86
  )
87
  }
88
 
89
- return llmResponse.map(res => dirtyCaptionCleaner(res))
90
  }
 
1
 
 
 
 
2
  import { predict } from "./predict"
3
  import { Preset } from "../engine/presets"
4
+ import { GeneratedPanels } from "@/types"
5
  import { cleanJson } from "@/lib/cleanJson"
6
  import { createZephyrPrompt } from "@/lib/createZephyrPrompt"
7
 
8
+ import { dirtyGeneratedPanelCleaner } from "@/lib/dirtyGeneratedPanelCleaner"
9
+ import { dirtyGeneratedPanelsParser } from "@/lib/dirtyGeneratedPanelsParser"
10
+
11
  export const getStory = async ({
12
  preset,
13
  prompt = "",
 
16
  preset: Preset;
17
  prompt: string;
18
  nbTotalPanels: number;
19
+ }): Promise<GeneratedPanels> => {
20
  // throw new Error("Planned maintenance")
21
 
22
  // In case you need to quickly debug the RENDERING engine you can uncomment this:
23
+ // return mockGeneratedPanels
24
 
25
  const query = createZephyrPrompt([
26
  {
27
  role: "system",
28
  content: [
29
  `You are a writer specialized in ${preset.llmPrompt}`,
30
+ `Please write detailed drawing instructions and short (2-3 sentences long) speech captions for the ${nbTotalPanels} panels of a new story. Please make sure each of the ${nbTotalPanels} panels include info about character gender, age, origin, clothes, colors, location, lights, etc.`,
31
+ `Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; caption: string; }>\`.`,
32
  // `Give your response as Markdown bullet points.`,
33
  `Be brief in your ${nbTotalPanels} instructions and narrative captions, don't add your own comments. The whole story must be captivating, smart, entertaining. Be straight to the point, and never reply things like "Sure, I can.." etc. Reply using valid JSON.`
34
  ].filter(item => item).join("\n")
 
37
  role: "user",
38
  content: `The story is: ${prompt}`,
39
  }
40
+ ]) + "\n```[{"
41
 
42
 
43
  let result = ""
 
64
  // console.log("Raw response from LLM:", result)
65
  const tmp = cleanJson(result)
66
 
67
+ let GeneratedPanels: GeneratedPanels = []
68
 
69
  try {
70
+ GeneratedPanels = dirtyGeneratedPanelsParser(tmp)
71
  } catch (err) {
72
  // console.log(`failed to read LLM response: ${err}`)
73
  // console.log(`original response was:`, result)
 
75
  // in case of failure here, it might be because the LLM hallucinated a completely different response,
76
  // such as markdown. There is no real solution.. but we can try a fallback:
77
 
78
+ GeneratedPanels = (
79
  tmp.split("*")
80
  .map(item => item.trim())
81
  .map((cap, i) => ({
 
86
  )
87
  }
88
 
89
+ return GeneratedPanels.map(res => dirtyGeneratedPanelCleaner(res))
90
  }
src/app/queries/getStoryContinuation.ts CHANGED
@@ -1,96 +1,68 @@
1
-
2
- import { dirtyLLMJsonParser } from "@/lib/dirtyLLMJsonParser"
3
- import { dirtyCaptionCleaner } from "@/lib/dirtyCaptionCleaner"
4
-
5
- import { predict } from "./predict"
6
  import { Preset } from "../engine/presets"
7
- import { LLMResponse } from "@/types"
8
- import { cleanJson } from "@/lib/cleanJson"
9
- import { createZephyrPrompt } from "@/lib/createZephyrPrompt"
10
 
11
  export const getStoryContinuation = async ({
12
  preset,
13
- prompt = "",
14
- nbTotalPanels = 2,
15
- previousCaptions = [],
 
16
  }: {
17
  preset: Preset;
18
- prompt: string;
19
- nbTotalPanels: number;
20
- previousCaptions: string[];
21
- }): Promise<LLMResponse> => {
22
- // throw new Error("Planned maintenance")
23
-
24
- // In case you need to quickly debug the RENDERING engine you can uncomment this:
25
- // return mockLLMResponse
26
 
27
- const previousCaptionsTemplate = previousCaptions.length
28
- ? `To help you, here are the previous panels and their captions (note: if you see an anomaly here eg. no caption or the same description repeated multiple times, do not hesitate to fix the story): ${JSON.stringify(previousCaptions, null, 2)}`
29
- : ''
30
 
31
- const query = createZephyrPrompt([
32
- {
33
- role: "system",
34
- content: [
35
- `You are a writer specialized in ${preset.llmPrompt}`,
36
- `Please write detailed drawing instructions and a short (2-3 sentences long) speech caption for the next ${nbTotalPanels} panels of a new story, but keep it open-ended (it will be continued and expanded later). Please make sure each of those ${nbTotalPanels} panels include info about character gender, age, origin, clothes, colors, location, lights, etc.`,
37
- `Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; caption: string}>\`.`,
38
- // `Give your response as Markdown bullet points.`,
39
- `Be brief in your ${nbTotalPanels} instructions and narrative captions, don't add your own comments. The captions must be captivating, smart, entertaining. Be straight to the point, and never reply things like "Sure, I can.." etc. Reply using valid JSON.`
40
- ].filter(item => item).join("\n")
41
- },
42
- {
43
- role: "user",
44
- content: `The story is about: ${prompt}.${previousCaptionsTemplate}`,
45
- }
46
- ]) + "\n[{"
47
 
 
48
 
49
- let result = ""
 
 
 
 
 
50
 
51
- try {
52
- // console.log(`calling predict(${query}, ${nbTotalPanels})`)
53
- result = `${await predict(query, nbTotalPanels) || ""}`.trim()
54
- if (!result.length) {
55
- throw new Error("empty result!")
56
- }
57
- } catch (err) {
58
- // console.log(`prediction of the story failed, trying again..`)
59
- try {
60
- result = `${await predict(query+".", nbTotalPanels) || ""}`.trim()
61
- if (!result.length) {
62
- throw new Error("empty result!")
63
- }
64
- } catch (err) {
65
- console.error(`prediction of the story failed again πŸ’©`)
66
- throw new Error(`failed to generate the story ${err}`)
67
- }
68
- }
69
 
70
- // console.log("Raw response from LLM:", result)
71
- const tmp = cleanJson(result)
72
-
73
- let llmResponse: LLMResponse = []
74
 
75
- try {
76
- llmResponse = dirtyLLMJsonParser(tmp)
 
 
 
 
 
 
 
 
 
77
  } catch (err) {
78
- // console.log(`failed to read LLM response: ${err}`)
79
- // console.log(`original response was:`, result)
80
-
81
- // in case of failure here, it might be because the LLM hallucinated a completely different response,
82
- // such as markdown. There is no real solution.. but we can try a fallback:
83
-
84
- llmResponse = (
85
- tmp.split("*")
86
- .map(item => item.trim())
87
- .map((cap, i) => ({
88
- panel: i,
89
- caption: cap,
90
- instructions: cap,
91
- }))
92
- )
 
 
93
  }
94
-
95
- return llmResponse.map(res => dirtyCaptionCleaner(res))
96
- }
 
 
 
 
 
 
1
  import { Preset } from "../engine/presets"
2
+ import { GeneratedPanel } from "@/types"
3
+ import { predictNextPanels } from "./predictNextPanels"
4
+ import { joinWords } from "@/lib/joinWords"
5
 
6
  export const getStoryContinuation = async ({
7
  preset,
8
+ stylePrompt = "",
9
+ userStoryPrompt = "",
10
+ nbPanelsToGenerate = 2,
11
+ existingPanels = [],
12
  }: {
13
  preset: Preset;
14
+ stylePrompt: string;
15
+ userStoryPrompt: string;
16
+ nbPanelsToGenerate: number;
17
+ existingPanels: GeneratedPanel[];
18
+ }): Promise<GeneratedPanel[]> => {
 
 
 
19
 
20
+ let panels: GeneratedPanel[] = []
21
+ const startAt: number = existingPanels.at(-1)?.panel || existingPanels.length || 0
22
+ const endAt: number = startAt + nbPanelsToGenerate
23
 
24
+ try {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ const prompt = joinWords([ userStoryPrompt ])
27
 
28
+ const panelCandidates: GeneratedPanel[] = await predictNextPanels({
29
+ preset,
30
+ prompt,
31
+ nbPanelsToGenerate,
32
+ existingPanels,
33
+ })
34
 
35
+ console.log("LLM responded with panels:", panelCandidates)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
 
 
 
 
37
 
38
+ // we clean the output from the LLM
39
+ // most importantly, we need to adjust the panel index,
40
+ // to start from where we last finished
41
+ for (let p = startAt; p < endAt; p++) {
42
+ panels.push({
43
+ panel: p,
44
+ instructions: `${panelCandidates[p]?.instructions || ""}`,
45
+ caption: `${panelCandidates[p]?.caption || ""}`,
46
+ })
47
+ }
48
+
49
  } catch (err) {
50
+ console.log("LLM step failed due to:", err)
51
+ console.log("we are now switching to a degraded mode, using 4 similar panels")
52
+ panels = []
53
+ for (let p = startAt; p < endAt; p++) {
54
+ panels.push({
55
+ panel: p,
56
+ instructions: joinWords([
57
+ stylePrompt,
58
+ userStoryPrompt,
59
+ `${".".repeat(p)}`,
60
+ ]),
61
+ caption: "(Sorry, LLM generation failed: using degraded mode)"
62
+ })
63
+ }
64
+ console.error(err)
65
+ } finally {
66
+ return panels
67
  }
68
+ }
 
 
src/app/queries/mockLLMResponse.ts CHANGED
@@ -1,6 +1,6 @@
1
- import { LLMResponse } from "@/types"
2
 
3
- export const mockLLMResponse: LLMResponse = [{
4
  "panel": 1,
5
  "instructions": "wide shot of detective walking towards a UFO crash site",
6
  "caption": "Detective Jameson investigates a UFO crash in the desert"
 
1
+ import { GeneratedPanels } from "@/types"
2
 
3
+ export const mockGeneratedPanels: GeneratedPanels = [{
4
  "panel": 1,
5
  "instructions": "wide shot of detective walking towards a UFO crash site",
6
  "caption": "Detective Jameson investigates a UFO crash in the desert"
src/app/queries/predictNextPanels.ts ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import { predict } from "./predict"
3
+ import { Preset } from "../engine/presets"
4
+ import { GeneratedPanel } from "@/types"
5
+ import { cleanJson } from "@/lib/cleanJson"
6
+ import { createZephyrPrompt } from "@/lib/createZephyrPrompt"
7
+ import { dirtyGeneratedPanelCleaner } from "@/lib/dirtyGeneratedPanelCleaner"
8
+ import { dirtyGeneratedPanelsParser } from "@/lib/dirtyGeneratedPanelsParser"
9
+
10
+ export const predictNextPanels = async ({
11
+ preset,
12
+ prompt = "",
13
+ nbPanelsToGenerate = 2,
14
+ existingPanels = [],
15
+ }: {
16
+ preset: Preset;
17
+ prompt: string;
18
+ nbPanelsToGenerate: number;
19
+ existingPanels: GeneratedPanel[];
20
+ }): Promise<GeneratedPanel[]> => {
21
+ // throw new Error("Planned maintenance")
22
+
23
+ // In case you need to quickly debug the RENDERING engine you can uncomment this:
24
+ // return mockGeneratedPanels
25
+
26
+ const existingPanelsTemplate = existingPanels.length
27
+ ? ` To help you, here are the previous panels and their captions (note: if you see an anomaly here eg. no caption or the same description repeated multiple times, do not hesitate to fix the story): ${JSON.stringify(existingPanels, null, 2)}`
28
+ : ''
29
+
30
+ const query = createZephyrPrompt([
31
+ {
32
+ role: "system",
33
+ content: [
34
+ `You are a writer specialized in ${preset.llmPrompt}`,
35
+ `Please write detailed drawing instructions and short (2-3 sentences long) speech captions for the next ${nbPanelsToGenerate} panels of a new story, but keep it open-ended (it will be continued and expanded later). Please make sure each of those ${nbPanelsToGenerate} panels include info about character gender, age, origin, clothes, colors, location, lights, etc.`,
36
+ `Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; caption: string; }>\`.`,
37
+ // `Give your response as Markdown bullet points.`,
38
+ `Be brief in your ${nbPanelsToGenerate} instructions and narrative captions, don't add your own comments. The captions must be captivating, smart, entertaining. Be straight to the point, and never reply things like "Sure, I can.." etc. Reply using valid JSON.`
39
+ ].filter(item => item).join("\n")
40
+ },
41
+ {
42
+ role: "user",
43
+ content: `The story is about: ${prompt}.${existingPanelsTemplate}`,
44
+ }
45
+ ]) + "\n```[{"
46
+
47
+
48
+ let result = ""
49
+
50
+ try {
51
+ // console.log(`calling predict(${query}, ${nbTotalPanels})`)
52
+ result = `${await predict(query, nbPanelsToGenerate) || ""}`.trim()
53
+ if (!result.length) {
54
+ throw new Error("empty result!")
55
+ }
56
+ } catch (err) {
57
+ // console.log(`prediction of the story failed, trying again..`)
58
+ try {
59
+ result = `${await predict(query+".", nbPanelsToGenerate) || ""}`.trim()
60
+ if (!result.length) {
61
+ throw new Error("empty result!")
62
+ }
63
+ } catch (err) {
64
+ console.error(`prediction of the story failed again πŸ’©`)
65
+ throw new Error(`failed to generate the story ${err}`)
66
+ }
67
+ }
68
+
69
+ // console.log("Raw response from LLM:", result)
70
+ const tmp = cleanJson(result)
71
+
72
+ let generatedPanels: GeneratedPanel[] = []
73
+
74
+ try {
75
+ generatedPanels = dirtyGeneratedPanelsParser(tmp)
76
+ } catch (err) {
77
+ // console.log(`failed to read LLM response: ${err}`)
78
+ // console.log(`original response was:`, result)
79
+
80
+ // in case of failure here, it might be because the LLM hallucinated a completely different response,
81
+ // such as markdown. There is no real solution.. but we can try a fallback:
82
+
83
+ generatedPanels = (
84
+ tmp.split("*")
85
+ .map(item => item.trim())
86
+ .map((cap, i) => ({
87
+ panel: i,
88
+ caption: cap,
89
+ instructions: cap,
90
+ }))
91
+ )
92
+ }
93
+
94
+ return generatedPanels.map(res => dirtyGeneratedPanelCleaner(res))
95
+ }
src/lib/cleanJson.ts CHANGED
@@ -1,6 +1,6 @@
1
  import { dirtyLLMResponseCleaner } from "./dirtyLLMResponseCleaner"
2
 
3
- export function cleanJson(input: string) {
4
 
5
  if (input.includes('```')) {
6
  input = input.split('```')[0]
 
1
  import { dirtyLLMResponseCleaner } from "./dirtyLLMResponseCleaner"
2
 
3
+ export function cleanJson(input: string): string {
4
 
5
  if (input.includes('```')) {
6
  input = input.split('```')[0]
src/lib/{dirtyCaptionCleaner.ts β†’ dirtyGeneratedPanelCleaner.ts} RENAMED
@@ -1,16 +1,14 @@
1
- export function dirtyCaptionCleaner({
 
 
2
  panel,
3
  instructions,
4
  caption
5
- }: {
6
- panel: number;
7
- instructions: string;
8
- caption: string
9
- }) {
10
- let newCaption = caption.split(":").pop()?.trim() || ""
11
  let newInstructions = (
12
  // need to remove from LLM garbage here, too
13
- (instructions.split(":").pop() || "")
14
  .replaceAll("Draw a", "")
15
  .replaceAll("Draw the", "")
16
  .replaceAll("Draw", "")
 
1
+ import { GeneratedPanel } from "@/types"
2
+
3
+ export function dirtyGeneratedPanelCleaner({
4
  panel,
5
  instructions,
6
  caption
7
+ }: GeneratedPanel): GeneratedPanel {
8
+ let newCaption = `${caption || ""}`.split(":").pop()?.trim() || ""
 
 
 
 
9
  let newInstructions = (
10
  // need to remove from LLM garbage here, too
11
+ (`${instructions || ""}`.split(":").pop() || "")
12
  .replaceAll("Draw a", "")
13
  .replaceAll("Draw the", "")
14
  .replaceAll("Draw", "")
src/lib/{dirtyLLMJsonParser.ts β†’ dirtyGeneratedPanelsParser.ts} RENAMED
@@ -1,8 +1,8 @@
1
- import { LLMResponse } from "@/types"
2
  import { cleanJson } from "./cleanJson"
3
  import { parseBadJSON } from "./parseBadJSON"
4
 
5
- export function dirtyLLMJsonParser(input: string): LLMResponse {
6
 
7
  if (input.includes("```")) {
8
  input = input.split("```")[0]
@@ -10,7 +10,7 @@ export function dirtyLLMJsonParser(input: string): LLMResponse {
10
  // we only keep what's after the first [
11
  let jsonOrNot = cleanJson(input)
12
 
13
- const jsonData = parseBadJSON(jsonOrNot) as LLMResponse
14
 
15
  const results = jsonData.map((item, i) => {
16
  let panel = i
 
1
+ import { GeneratedPanel } from "@/types"
2
  import { cleanJson } from "./cleanJson"
3
  import { parseBadJSON } from "./parseBadJSON"
4
 
5
+ export function dirtyGeneratedPanelsParser(input: string): GeneratedPanel[] {
6
 
7
  if (input.includes("```")) {
8
  input = input.split("```")[0]
 
10
  // we only keep what's after the first [
11
  let jsonOrNot = cleanJson(input)
12
 
13
+ const jsonData = parseBadJSON(jsonOrNot) as GeneratedPanel[]
14
 
15
  const results = jsonData.map((item, i) => {
16
  let panel = i
src/lib/dirtyLLMResponseCleaner.ts CHANGED
@@ -1,4 +1,4 @@
1
- export function dirtyLLMResponseCleaner(input: string) {
2
  let str = (
3
  `${input || ""}`
4
  // a summary of all the weird hallucinations I saw it make..
 
1
+ export function dirtyLLMResponseCleaner(input: string): string {
2
  let str = (
3
  `${input || ""}`
4
  // a summary of all the weird hallucinations I saw it make..
src/lib/parseBadJSON.ts CHANGED
@@ -1,9 +1,9 @@
1
- import { LLMResponse } from "@/types"
2
 
3
- export function parseBadJSON(jsonLikeString: string): LLMResponse {
4
 
5
  try {
6
- return JSON.parse(jsonLikeString) as LLMResponse
7
  } catch (err) {
8
  var regex = /\{\s*"panel":\s*(\d+),\s*"instructions"\s*:\s*"([^"]+)",\s*"caption":\s*"([^"]*)"\s*\}/gs;
9
 
@@ -19,6 +19,6 @@ export function parseBadJSON(jsonLikeString: string): LLMResponse {
19
  results.push(json);
20
  }
21
 
22
- return results as LLMResponse
23
  }
24
  }
 
1
+ import { GeneratedPanels } from "@/types"
2
 
3
+ export function parseBadJSON(jsonLikeString: string): GeneratedPanels {
4
 
5
  try {
6
+ return JSON.parse(jsonLikeString) as GeneratedPanels
7
  } catch (err) {
8
  var regex = /\{\s*"panel":\s*(\d+),\s*"instructions"\s*:\s*"([^"]+)",\s*"caption":\s*"([^"]*)"\s*\}/gs;
9
 
 
19
  results.push(json);
20
  }
21
 
22
+ return results as GeneratedPanels
23
  }
24
  }
src/types.ts CHANGED
@@ -87,7 +87,13 @@ export interface ImageAnalysisResponse {
87
  error?: string
88
  }
89
 
90
- export type LLMResponse = Array<{panel: number; instructions: string; caption: string }>
 
 
 
 
 
 
91
 
92
  export type LLMEngine =
93
  | "INFERENCE_API"
@@ -154,6 +160,7 @@ export type Settings = {
154
  huggingfaceApiKey: string
155
  huggingfaceInferenceApiModel: string
156
  huggingfaceInferenceApiModelTrigger: string
 
157
  replicateApiKey: string
158
  replicateApiModel: string
159
  replicateApiModelVersion: string
 
87
  error?: string
88
  }
89
 
90
+ export type GeneratedPanel = {
91
+ panel: number
92
+ instructions: string
93
+ caption: string
94
+ }
95
+
96
+ export type GeneratedPanels = GeneratedPanel[]
97
 
98
  export type LLMEngine =
99
  | "INFERENCE_API"
 
160
  huggingfaceApiKey: string
161
  huggingfaceInferenceApiModel: string
162
  huggingfaceInferenceApiModelTrigger: string
163
+ huggingfaceInferenceApiFileType: string
164
  replicateApiKey: string
165
  replicateApiModel: string
166
  replicateApiModelVersion: string