Spaces:

jbilcke-hf
/

ai-comic-factory

Running on CPU Upgrade

jbilcke-hf HF staff commited on Mar 4

Commit

d64b893

•

1 Parent(s): a2c0551

working on improvements

Files changed (6) hide show

src/app/main.tsx CHANGED Viewed

@@ -68,7 +68,7 @@ export default function Main() {
       const newPanelsPrompts: string[] = []
       const newCaptions: string[] = []
-      const nbPanelsToGenerate = 2
       for (
         let currentPanel = 0;

       const newPanelsPrompts: string[] = []
       const newCaptions: string[] = []
+      const nbPanelsToGenerate = 1
       for (
         let currentPanel = 0;

src/app/queries/getStoryContinuation.ts CHANGED Viewed

@@ -7,8 +7,8 @@ export const getStoryContinuation = async ({
   preset,
   stylePrompt = "",
   userStoryPrompt = "",
-  nbPanelsToGenerate = 2,
-  nbTotalPanels = 8,
   existingPanels = [],
 }: {
   preset: Preset;

   preset,
   stylePrompt = "",
   userStoryPrompt = "",
+  nbPanelsToGenerate = 1,
+  nbTotalPanels = 4,
   existingPanels = [],
 }: {
   preset: Preset;

src/app/queries/predictNextPanels.ts CHANGED Viewed

@@ -6,12 +6,13 @@ import { cleanJson } from "@/lib/cleanJson"
 import { createZephyrPrompt } from "@/lib/createZephyrPrompt"
 import { dirtyGeneratedPanelCleaner } from "@/lib/dirtyGeneratedPanelCleaner"
 import { dirtyGeneratedPanelsParser } from "@/lib/dirtyGeneratedPanelsParser"
 export const predictNextPanels = async ({
   preset,
   prompt = "",
-  nbPanelsToGenerate = 2,
-  nbTotalPanels = 8,
   existingPanels = [],
 }: {
   preset: Preset;
@@ -58,17 +59,26 @@ export const predictNextPanels = async ({
   let result = ""
   try {
     // console.log(`calling predict(${query}, ${nbTotalPanels})`)
-    result = `${await predict(query, nbPanelsToGenerate) || ""}`.trim()
     console.log("LLM result (1st trial):", result)
     if (!result.length) {
       throw new Error("empty result on 1st trial!")
     }
   } catch (err) {
     // console.log(`prediction of the story failed, trying again..`)
     try {
-      result = `${await predict(query + " \n ", nbPanelsToGenerate) || ""}`.trim()
       console.log("LLM result (2nd trial):", result)
       if (!result.length) {
         throw new Error("empty result on 2nd trial!")

 import { createZephyrPrompt } from "@/lib/createZephyrPrompt"
 import { dirtyGeneratedPanelCleaner } from "@/lib/dirtyGeneratedPanelCleaner"
 import { dirtyGeneratedPanelsParser } from "@/lib/dirtyGeneratedPanelsParser"
+import { sleep } from "@/lib/sleep"
 export const predictNextPanels = async ({
   preset,
   prompt = "",
+  nbPanelsToGenerate = 1,
+  nbTotalPanels = 4,
   existingPanels = [],
 }: {
   preset: Preset;
   let result = ""
+  // we don't require a lot of token for our task
+  // but to be safe, let's count ~130 tokens per panel
+  const nbTokensPerPanel = 130
+  const nbMaxNewTokens = nbPanelsToGenerate * nbTokensPerPanel
   try {
     // console.log(`calling predict(${query}, ${nbTotalPanels})`)
+    result = `${await predict(query, nbMaxNewTokens)}`.trim()
     console.log("LLM result (1st trial):", result)
     if (!result.length) {
       throw new Error("empty result on 1st trial!")
     }
   } catch (err) {
     // console.log(`prediction of the story failed, trying again..`)
+    // this should help throttle things on a bit on the LLM API side
+    await sleep(2000)
     try {
+      result = `${await predict(query + " \n ", nbMaxNewTokens)}`.trim()
       console.log("LLM result (2nd trial):", result)
       if (!result.length) {
         throw new Error("empty result on 2nd trial!")

src/app/queries/predictWithGroq.ts CHANGED Viewed

@@ -2,7 +2,7 @@
 import Groq from "groq-sdk"
-export async function predict(inputs: string, nbPanels: number): Promise<string> {
   const groqApiKey = `${process.env.AUTH_GROQ_API_KEY || ""}`
   const groqApiModel = `${process.env.LLM_GROQ_API_MODEL || "mixtral-8x7b-32768"}`
@@ -18,6 +18,9 @@ export async function predict(inputs: string, nbPanels: number): Promise<string>
     const res = await groq.chat.completions.create({
       messages: messages,
       model: groqApiModel,
     })
     return res.choices[0].message.content || ""

 import Groq from "groq-sdk"
+export async function predict(inputs: string, nbMaxNewTokens: number): Promise<string> {
   const groqApiKey = `${process.env.AUTH_GROQ_API_KEY || ""}`
   const groqApiModel = `${process.env.LLM_GROQ_API_MODEL || "mixtral-8x7b-32768"}`
     const res = await groq.chat.completions.create({
       messages: messages,
       model: groqApiModel,
+      stream: false,
+      temperature: 0.5,
+      max_tokens: nbMaxNewTokens,
     })
     return res.choices[0].message.content || ""

src/app/queries/predictWithHuggingFace.ts CHANGED Viewed

@@ -3,7 +3,7 @@
 import { HfInference, HfInferenceEndpoint } from "@huggingface/inference"
 import { LLMEngine } from "@/types"
-export async function predict(inputs: string, nbPanels: number): Promise<string> {
   const hf = new HfInference(process.env.AUTH_HF_API_TOKEN)
   const llmEngine = `${process.env.LLM_ENGINE || ""}` as LLMEngine
@@ -12,10 +12,6 @@ export async function predict(inputs: string, nbPanels: number): Promise<string>
   let hfie: HfInferenceEndpoint = hf
-  // we don't require a lot of token for our task
-  // but to be safe, let's count ~110 tokens per panel
-  const nbMaxNewTokens = nbPanels * 130 // 110 isn't enough anymore for long dialogues
   switch (llmEngine) {
     case "INFERENCE_ENDPOINT":
       if (inferenceEndpoint) {

 import { HfInference, HfInferenceEndpoint } from "@huggingface/inference"
 import { LLMEngine } from "@/types"
+export async function predict(inputs: string, nbMaxNewTokens: number): Promise<string> {
   const hf = new HfInference(process.env.AUTH_HF_API_TOKEN)
   const llmEngine = `${process.env.LLM_ENGINE || ""}` as LLMEngine
   let hfie: HfInferenceEndpoint = hf
   switch (llmEngine) {
     case "INFERENCE_ENDPOINT":
       if (inferenceEndpoint) {

src/app/queries/predictWithOpenAI.ts CHANGED Viewed

@@ -3,7 +3,7 @@
 import type { ChatCompletionMessage } from "openai/resources/chat"
 import OpenAI from "openai"
-export async function predict(inputs: string, nbPanels: number): Promise<string> {
   const openaiApiKey = `${process.env.AUTH_OPENAI_API_KEY || ""}`
   const openaiApiBaseUrl = `${process.env.LLM_OPENAI_API_BASE_URL || "https://api.openai.com/v1"}`
   const openaiApiModel = `${process.env.LLM_OPENAI_API_MODEL || "gpt-3.5-turbo"}`
@@ -23,6 +23,8 @@ export async function predict(inputs: string, nbPanels: number): Promise<string>
       stream: false,
       model: openaiApiModel,
       temperature: 0.8,
       // TODO: use the nbPanels to define a max token limit
     })

 import type { ChatCompletionMessage } from "openai/resources/chat"
 import OpenAI from "openai"
+export async function predict(inputs: string, nbMaxNewTokens: number): Promise<string> {
   const openaiApiKey = `${process.env.AUTH_OPENAI_API_KEY || ""}`
   const openaiApiBaseUrl = `${process.env.LLM_OPENAI_API_BASE_URL || "https://api.openai.com/v1"}`
   const openaiApiModel = `${process.env.LLM_OPENAI_API_MODEL || "gpt-3.5-turbo"}`
       stream: false,
       model: openaiApiModel,
       temperature: 0.8,
+      max_tokens: nbMaxNewTokens,
       // TODO: use the nbPanels to define a max token limit
     })