jbilcke-hf HF staff commited on
Commit
241036e
β€’
1 Parent(s): 8c5d17c
src/app/engine/caption.ts ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use server"
2
+
3
+ import { ImageAnalysisRequest, ImageAnalysisResponse } from "@/types"
4
+
5
+ const apiUrl = `${process.env.RENDERING_ENGINE_API || ""}`
6
+
7
+ export async function see({
8
+ prompt,
9
+ imageBase64
10
+ }: {
11
+ prompt: string
12
+ imageBase64: string
13
+ }): Promise<string> {
14
+ if (!prompt) {
15
+ console.error(`cannot call the API without an image, aborting..`)
16
+ throw new Error(`cannot call the API without an image, aborting..`)
17
+ }
18
+
19
+ try {
20
+ const request = {
21
+ prompt,
22
+ image: imageBase64
23
+
24
+ } as ImageAnalysisRequest
25
+
26
+ console.log(`calling ${apiUrl}/analyze called with: `, {
27
+ prompt: request.prompt,
28
+ image: request.image.slice(0, 20)
29
+ })
30
+
31
+ const res = await fetch(`${apiUrl}/analyze`, {
32
+ method: "POST",
33
+ headers: {
34
+ Accept: "application/json",
35
+ "Content-Type": "application/json",
36
+ // Authorization: `Bearer ${process.env.VC_SECRET_ACCESS_TOKEN}`,
37
+ },
38
+ body: JSON.stringify(request),
39
+ cache: 'no-store',
40
+ // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
41
+ // next: { revalidate: 1 }
42
+ })
43
+
44
+ if (res.status !== 200) {
45
+ throw new Error('Failed to fetch data')
46
+ }
47
+
48
+ const response = (await res.json()) as ImageAnalysisResponse
49
+ return response.result
50
+ } catch (err) {
51
+ console.error(err)
52
+ return ""
53
+ }
54
+ }
src/app/engine/render.ts CHANGED
@@ -29,6 +29,7 @@ export async function newRender({
29
  renderId: "",
30
  status: "error",
31
  assetUrl: "",
 
32
  maskUrl: "",
33
  error: "failed to fetch the data",
34
  segments: []
@@ -58,7 +59,7 @@ export async function newRender({
58
  // no need to upscale right now as we generate tiny panels
59
  // maybe later we can provide an "export" button to PDF
60
  upscalingFactor: 2,
61
-
62
  cache: "ignore"
63
  } as Partial<RenderRequest>),
64
  cache: 'no-store',
@@ -95,6 +96,7 @@ export async function getRender(renderId: string) {
95
  renderId: "",
96
  status: "error",
97
  assetUrl: "",
 
98
  maskUrl: "",
99
  error: "failed to fetch the data",
100
  segments: []
 
29
  renderId: "",
30
  status: "error",
31
  assetUrl: "",
32
+ alt: prompt || "",
33
  maskUrl: "",
34
  error: "failed to fetch the data",
35
  segments: []
 
59
  // no need to upscale right now as we generate tiny panels
60
  // maybe later we can provide an "export" button to PDF
61
  upscalingFactor: 2,
62
+ analyze: true,
63
  cache: "ignore"
64
  } as Partial<RenderRequest>),
65
  cache: 'no-store',
 
96
  renderId: "",
97
  status: "error",
98
  assetUrl: "",
99
+ alt: "",
100
  maskUrl: "",
101
  error: "failed to fetch the data",
102
  segments: []
src/app/interface/panel/index.tsx CHANGED
@@ -11,6 +11,7 @@ import { useStore } from "@/app/store"
11
  import { cn } from "@/lib/utils"
12
  import { getInitialRenderedScene } from "@/lib/getInitialRenderedScene"
13
  import { Progress } from "@/app/interface/progress"
 
14
  // import { Bubble } from "./bubble"
15
 
16
  export function Panel({
@@ -34,6 +35,10 @@ export function Panel({
34
  const panels = useStore(state => state.panels)
35
  const prompt = panels[panel] || ""
36
 
 
 
 
 
37
  const [_isPending, startTransition] = useTransition()
38
  const [rendered, setRendered] = useState<RenderedScene>(getInitialRenderedScene())
39
  const renderedRef = useRef<RenderedScene>()
@@ -72,6 +77,7 @@ export function Panel({
72
  renderId: "",
73
  status: "error",
74
  assetUrl: "",
 
75
  maskUrl: "",
76
  error: "failed to fetch the data",
77
  segments: []
@@ -128,6 +134,32 @@ export function Panel({
128
  }
129
  }, [])
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  if (isLoading) {
132
  return (
133
  <div className={cn(
@@ -152,6 +184,7 @@ export function Panel({
152
  {rendered.assetUrl && <img
153
  src={rendered.assetUrl}
154
  className="w-full h-full object-cover"
 
155
  />}
156
 
157
  {/*<Bubble className="absolute top-4 left-4">
 
11
  import { cn } from "@/lib/utils"
12
  import { getInitialRenderedScene } from "@/lib/getInitialRenderedScene"
13
  import { Progress } from "@/app/interface/progress"
14
+ import { see } from "@/app/engine/caption"
15
  // import { Bubble } from "./bubble"
16
 
17
  export function Panel({
 
35
  const panels = useStore(state => state.panels)
36
  const prompt = panels[panel] || ""
37
 
38
+ // const setCaption = useStore(state => state.setCaption)
39
+ // const captions = useStore(state => state.captions)
40
+ // const caption = captions[panel] || ""
41
+
42
  const [_isPending, startTransition] = useTransition()
43
  const [rendered, setRendered] = useState<RenderedScene>(getInitialRenderedScene())
44
  const renderedRef = useRef<RenderedScene>()
 
77
  renderId: "",
78
  status: "error",
79
  assetUrl: "",
80
+ alt: "",
81
  maskUrl: "",
82
  error: "failed to fetch the data",
83
  segments: []
 
134
  }
135
  }, [])
136
 
137
+ /*
138
+ doing the captionning from the browser is expensive
139
+ a simpler solution is to caption directly during SDXL generation
140
+
141
+ useEffect(() => {
142
+ if (!rendered.assetUrl) { return }
143
+ // the asset url can evolve with time (link to a better resolution image)
144
+ // however it would be costly to ask for the caption, the low resolution is enough for the semantic resolution
145
+ // so we just do nothing if we already have the caption
146
+ if (caption) { return }
147
+ startTransition(async () => {
148
+ try {
149
+ const newCaption = await see({
150
+ prompt: "please caption the following image",
151
+ imageBase64: rendered.assetUrl
152
+ })
153
+ if (newCaption) {
154
+ setCaption(newCaption)
155
+ }
156
+ } catch (err) {
157
+ console.error(`failed to generate the caption:`, err)
158
+ }
159
+ })
160
+ }, [rendered.assetUrl, caption])
161
+ */
162
+
163
  if (isLoading) {
164
  return (
165
  <div className={cn(
 
184
  {rendered.assetUrl && <img
185
  src={rendered.assetUrl}
186
  className="w-full h-full object-cover"
187
+ alt={rendered.alt}
188
  />}
189
 
190
  {/*<Bubble className="absolute top-4 left-4">
src/app/layouts/index.tsx CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import { Panel } from "@/app/interface/panel"
2
  import { pick } from "@/lib/pick"
3
 
 
1
+ "use client"
2
+
3
  import { Panel } from "@/app/interface/panel"
4
  import { pick } from "@/lib/pick"
5
 
src/app/store/index.ts CHANGED
@@ -1,4 +1,5 @@
1
  "use client"
 
2
  import { create } from "zustand"
3
 
4
  import { FontName } from "@/lib/fonts"
@@ -10,6 +11,7 @@ export const useStore = create<{
10
  font: FontName
11
  preset: Preset
12
  panels: string[]
 
13
  layout: LayoutName
14
  zoomLevel: number
15
  isGeneratingLogic: boolean
@@ -21,6 +23,7 @@ export const useStore = create<{
21
  setPreset: (preset: Preset) => void
22
  setPanels: (panels: string[]) => void
23
  setLayout: (layout: LayoutName) => void
 
24
  setZoomLevel: (zoomLevel: number) => void
25
  setGeneratingLogic: (isGeneratingLogic: boolean) => void
26
  setGeneratingImages: (panelId: number, value: boolean) => void
@@ -30,16 +33,49 @@ export const useStore = create<{
30
  font: "cartoonist",
31
  preset: getPreset("japanese_manga"),
32
  panels: [],
 
33
  layout: getRandomLayoutName(),
34
  zoomLevel: 50,
35
  isGeneratingLogic: false,
36
  panelGenerationStatus: {},
37
  isGeneratingText: false,
38
  atLeastOnePanelIsBusy: false,
39
- setPrompt: (prompt: string) => set({ prompt }),
40
- setFont: (font: FontName) => set({ font }),
41
- setPreset: (preset: Preset) => set({ preset }),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  setPanels: (panels: string[]) => set({ panels }),
 
 
 
 
 
 
 
 
43
  setLayout: (layout: LayoutName) => set({ layout }),
44
  setZoomLevel: (zoomLevel: number) => set({ zoomLevel }),
45
  setGeneratingLogic: (isGeneratingLogic: boolean) => set({ isGeneratingLogic }),
 
1
  "use client"
2
+
3
  import { create } from "zustand"
4
 
5
  import { FontName } from "@/lib/fonts"
 
11
  font: FontName
12
  preset: Preset
13
  panels: string[]
14
+ captions: Record<string, string>
15
  layout: LayoutName
16
  zoomLevel: number
17
  isGeneratingLogic: boolean
 
23
  setPreset: (preset: Preset) => void
24
  setPanels: (panels: string[]) => void
25
  setLayout: (layout: LayoutName) => void
26
+ setCaption: (panelId: number, caption: string) => void
27
  setZoomLevel: (zoomLevel: number) => void
28
  setGeneratingLogic: (isGeneratingLogic: boolean) => void
29
  setGeneratingImages: (panelId: number, value: boolean) => void
 
33
  font: "cartoonist",
34
  preset: getPreset("japanese_manga"),
35
  panels: [],
36
+ captions: {},
37
  layout: getRandomLayoutName(),
38
  zoomLevel: 50,
39
  isGeneratingLogic: false,
40
  panelGenerationStatus: {},
41
  isGeneratingText: false,
42
  atLeastOnePanelIsBusy: false,
43
+ setPrompt: (prompt: string) => {
44
+ const existingPrompt = get().prompt
45
+ if (prompt === existingPrompt) { return }
46
+ set({
47
+ prompt,
48
+ panels: [],
49
+ captions: {},
50
+ })
51
+ },
52
+ setFont: (font: FontName) => {
53
+ const existingFont = get().font
54
+ if (font === existingFont) { return }
55
+ set({
56
+ font,
57
+ panels: [],
58
+ captions: {}
59
+ })
60
+ },
61
+ setPreset: (preset: Preset) => {
62
+ const existingPreset = get().preset
63
+ if (preset.label === existingPreset.label) { return }
64
+ set({
65
+ preset,
66
+ panels: [],
67
+ captions: {}
68
+ })
69
+ },
70
  setPanels: (panels: string[]) => set({ panels }),
71
+ setCaption: (panelId: number, caption: string) => {
72
+ set({
73
+ captions: {
74
+ ...get().captions,
75
+ [panelId]: caption
76
+ }
77
+ })
78
+ },
79
  setLayout: (layout: LayoutName) => set({ layout }),
80
  setZoomLevel: (zoomLevel: number) => set({ zoomLevel }),
81
  setGeneratingLogic: (isGeneratingLogic: boolean) => set({ isGeneratingLogic }),
src/lib/getInitialRenderedScene.ts CHANGED
@@ -4,6 +4,7 @@ export const getInitialRenderedScene = (): RenderedScene => ({
4
  renderId: "",
5
  status: "pending",
6
  assetUrl: "",
 
7
  error: "",
8
  maskUrl: "",
9
  segments: []
 
4
  renderId: "",
5
  status: "pending",
6
  assetUrl: "",
7
+ alt: "",
8
  error: "",
9
  maskUrl: "",
10
  segments: []
src/types.ts CHANGED
@@ -42,6 +42,8 @@ export interface RenderRequest {
42
  cache: CacheMode
43
 
44
  wait: boolean // wait until the job is completed
 
 
45
  }
46
 
47
  export interface ImageSegment {
@@ -61,8 +63,18 @@ export interface RenderedScene {
61
  renderId: string
62
  status: RenderedSceneStatus
63
  assetUrl: string
 
64
  error: string
65
  maskUrl: string
66
  segments: ImageSegment[]
67
  }
68
 
 
 
 
 
 
 
 
 
 
 
42
  cache: CacheMode
43
 
44
  wait: boolean // wait until the job is completed
45
+
46
+ analyze: boolean // analyze the image to generate a caption (optional)
47
  }
48
 
49
  export interface ImageSegment {
 
63
  renderId: string
64
  status: RenderedSceneStatus
65
  assetUrl: string
66
+ alt: string
67
  error: string
68
  maskUrl: string
69
  segments: ImageSegment[]
70
  }
71
 
72
+ export interface ImageAnalysisRequest {
73
+ image: string // in base64
74
+ prompt: string
75
+ }
76
+
77
+ export interface ImageAnalysisResponse {
78
+ result: string
79
+ error?: string
80
+ }