jbilcke-hf HF staff commited on
Commit
6896326
1 Parent(s): 9802882

things are taking shape

Browse files
package-lock.json CHANGED
@@ -8,6 +8,7 @@
8
  "name": "video-quest",
9
  "version": "0.0.0",
10
  "dependencies": {
 
11
  "@huggingface/inference": "^2.6.1",
12
  "@radix-ui/react-accordion": "^1.1.2",
13
  "@radix-ui/react-avatar": "^1.0.3",
@@ -184,6 +185,17 @@
184
  "react-dom": ">=16.8.0"
185
  }
186
  },
 
 
 
 
 
 
 
 
 
 
 
187
  "node_modules/@huggingface/inference": {
188
  "version": "2.6.1",
189
  "resolved": "https://registry.npmjs.org/@huggingface/inference/-/inference-2.6.1.tgz",
 
8
  "name": "video-quest",
9
  "version": "0.0.0",
10
  "dependencies": {
11
+ "@huggingface/agents": "^0.0.4",
12
  "@huggingface/inference": "^2.6.1",
13
  "@radix-ui/react-accordion": "^1.1.2",
14
  "@radix-ui/react-avatar": "^1.0.3",
 
185
  "react-dom": ">=16.8.0"
186
  }
187
  },
188
+ "node_modules/@huggingface/agents": {
189
+ "version": "0.0.4",
190
+ "resolved": "https://registry.npmjs.org/@huggingface/agents/-/agents-0.0.4.tgz",
191
+ "integrity": "sha512-jjsiMEba2dLLYUkMnX0g+bddM/9De0tk73RAiXL0uE117qdDOlUK/D+tI1krDfxpn9J+Qu9w7BqmufIMZFhV4Q==",
192
+ "dependencies": {
193
+ "@huggingface/inference": "^2.6.1"
194
+ },
195
+ "engines": {
196
+ "node": ">=18"
197
+ }
198
+ },
199
  "node_modules/@huggingface/inference": {
200
  "version": "2.6.1",
201
  "resolved": "https://registry.npmjs.org/@huggingface/inference/-/inference-2.6.1.tgz",
package.json CHANGED
@@ -9,6 +9,7 @@
9
  "lint": "next lint"
10
  },
11
  "dependencies": {
 
12
  "@huggingface/inference": "^2.6.1",
13
  "@radix-ui/react-accordion": "^1.1.2",
14
  "@radix-ui/react-avatar": "^1.0.3",
 
9
  "lint": "next lint"
10
  },
11
  "dependencies": {
12
+ "@huggingface/agents": "^0.0.4",
13
  "@huggingface/inference": "^2.6.1",
14
  "@radix-ui/react-accordion": "^1.1.2",
15
  "@radix-ui/react-avatar": "^1.0.3",
src/app/agents/index.ts DELETED
@@ -1,11 +0,0 @@
1
- import { Agent, AgentType } from "./types"
2
-
3
- import { agent as pirates } from "./pirates"
4
- import { agent as city } from "./city"
5
- import { agent as dungeon } from "./dungeon"
6
-
7
- export const agents = { pirates, city, dungeon }
8
-
9
- export const defaultAgent: AgentType = "pirates"
10
-
11
- export const getAgent = (type?: AgentType) => agents[type || defaultAgent] || agents[defaultAgent]
 
 
 
 
 
 
 
 
 
 
 
 
src/app/agents/server.ts DELETED
@@ -1,10 +0,0 @@
1
- "use server"
2
-
3
- import { HfInference } from "@huggingface/inference"
4
-
5
- const hfi = new HfInference(process.env.HF_API_TOKEN)
6
- const hf = hfi.endpoint(`${process.env.HF_INFERENCE_ENDPOINT_URL || ""}`)
7
-
8
- export async function decideNextSteps(userAction: string) {
9
- return ""
10
- }
 
 
 
 
 
 
 
 
 
 
 
src/app/agents/types.ts DELETED
@@ -1,15 +0,0 @@
1
- export type AgentType = 'pirates' | 'city' | 'dungeon'
2
-
3
- export interface Scene {
4
- action: string
5
- position: string
6
- light: string
7
- actionnables: string[]
8
- prompt: string
9
- }
10
-
11
- export interface Agent {
12
- title: string
13
- type: AgentType
14
- simulate: () => Scene
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/app/{agents → games}/city.ts RENAMED
@@ -1,5 +1,4 @@
1
- import { pick } from "./pick"
2
- import { Agent, Scene } from "./types"
3
 
4
  const actions = [
5
  "busy pedestrians",
@@ -25,40 +24,33 @@ const lights = [
25
  // "during the night",
26
  ]
27
 
28
- const actionnables = [
 
 
 
 
 
29
  "building",
30
  "road",
31
  "car",
32
  "tower",
33
  "tree",
34
  "river",
35
- "sea"
 
36
  ]
37
 
38
- export const agent: Agent = {
39
  title: "City",
40
  type: "city",
41
- simulate: (): Scene => {
42
- const action = pick(actions)
43
- const position = pick(positions)
44
- const light = pick(lights)
45
-
46
- const prompt = [
47
- `static isometrical view of 3D rendered city`,
48
- action,
49
- position,
50
- light,
51
- `isometric`,
52
- `game`,
53
- `high res`,
54
- ].join(", ")
55
-
56
- return {
57
- action,
58
- position,
59
- light,
60
- actionnables,
61
- prompt
62
- }
63
- }
64
- }
 
1
+ import { Game } from "./types"
 
2
 
3
  const actions = [
4
  "busy pedestrians",
 
24
  // "during the night",
25
  ]
26
 
27
+ const initialSituation = [
28
+ `over the city town center`,
29
+ `at noon`,
30
+ ].join(", ")
31
+
32
+ const initialActionnables = [
33
  "building",
34
  "road",
35
  "car",
36
  "tower",
37
  "tree",
38
  "river",
39
+ "sea",
40
+ "house"
41
  ]
42
 
43
+ export const game: Game = {
44
  title: "City",
45
  type: "city",
46
+ initialSituation,
47
+ initialActionnables,
48
+ getScenePrompt: (situation?: string) => [
49
+ `isometrical bird view of 3D rendered city`,
50
+ situation || initialSituation,
51
+ `game screenshot`,
52
+ `isometric`,
53
+ `unreal engine`,
54
+ `high res`,
55
+ ]
56
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
src/app/{agents → games}/dungeon.ts RENAMED
@@ -1,5 +1,4 @@
1
- import { pick } from "./pick"
2
- import { Agent, Scene } from "./types"
3
 
4
  const actions = [
5
  "not moving",
@@ -24,45 +23,37 @@ const positions = [
24
 
25
  const lights = [
26
  "lit through windows",
27
- "lit through wall-mounted torchs"
28
  // "poorly lit"
29
  ]
30
 
31
- const actionnables = [
32
- "floor",
33
- "fireplace",
 
 
 
 
 
34
  "door",
35
- "window",
36
  "chair",
 
 
37
  "table",
38
- "torch"
39
  ]
40
 
41
- export const agent: Agent = {
42
  title: "Dungeon",
43
  type: "dungeon",
44
- simulate: (): Scene => {
45
- const action = pick(actions)
46
- const position = pick(positions)
47
- const light = pick(lights)
48
-
49
- const prompt = [
50
- `first-person footage`,
51
- action,
52
- position,
53
- light,
54
- `medieval`,
55
- `photography`,
56
- `documentary`,
57
- `high res`,
58
- ].join(", ")
59
-
60
- return {
61
- action,
62
- position,
63
- light,
64
- actionnables,
65
- prompt
66
- }
67
- }
68
- }
 
1
+ import { Game, Scene } from "./types"
 
2
 
3
  const actions = [
4
  "not moving",
 
23
 
24
  const lights = [
25
  "lit through windows",
26
+ "lit through wall-mounted torches"
27
  // "poorly lit"
28
  ]
29
 
30
+ const initialSituation = [
31
+ `inside a beautiful room with stone walls and wooden floor`,
32
+ `a fireplace on the wall and a metal chest in the center with a large lock`,
33
+ ].join(", ")
34
+
35
+ const initialActionnables = [
36
+ // "floor",
37
+ // "fireplace",
38
  "door",
39
+ // "window",
40
  "chair",
41
+ "chest",
42
+ "key",
43
  "table",
44
+ // torch"
45
  ]
46
 
47
+ export const game: Game = {
48
  title: "Dungeon",
49
  type: "dungeon",
50
+ initialSituation,
51
+ initialActionnables,
52
+ getScenePrompt: (situation?: string) => [
53
+ `screenshot from an adventure videogame`,
54
+ // `first-person footage`,
55
+ situation || initialSituation,
56
+ `medieval`,
57
+ `unreal engine`,
58
+ ].join(", ")
59
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/app/games/index.ts ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { GameType } from "./types"
2
+
3
+ import { game as pirates } from "./pirates"
4
+ import { game as city } from "./city"
5
+ import { game as dungeon } from "./dungeon"
6
+
7
+ export const games = { pirates, city, dungeon }
8
+
9
+ export const defaultGame: GameType = "pirates"
10
+
11
+ export const getGame = (type?: GameType) => games[type || defaultGame] || games[defaultGame]
src/app/{agents → games}/pick.ts RENAMED
File without changes
src/app/{agents → games}/pirates.ts RENAMED
@@ -1,5 +1,4 @@
1
- import { pick } from "./pick"
2
- import { Agent, Scene } from "./types"
3
 
4
  const actions = [
5
  "idling",
@@ -25,8 +24,9 @@ const lights = [
25
  "during the day",
26
  ]
27
 
28
- const actionnables = [
29
  "chest",
 
30
  // "door",
31
  // "window",
32
  // "sail",
@@ -34,41 +34,33 @@ const actionnables = [
34
  // "ship's wheel",
35
  // "hat",
36
  // "barrel",
37
- "cannon",
38
  // "rope",
39
  // "bucket",
40
- "skull",
41
  "parrot",
42
  "lock",
43
- "ship",
44
  // "wooden leg"
45
  ]
46
 
47
- export const agent: Agent = {
 
 
 
 
 
 
 
48
  title: "Pirates",
49
  type: "pirates",
50
- simulate: (): Scene => {
51
- const action = pick(actions)
52
- const position = pick(positions)
53
- const light = pick(lights)
54
-
55
  // this prompt is beautiful:
56
  // screenshot from an adventure videogame, inside the hold of a pirate ship, with a pirate chest in the center, at sunset, beautiful, award winning, unreal engine, intricate details
57
- const prompt = [
58
  `screenshot from an adventure videogame`,
59
- `inside the hold of a pirate ship`,
60
- `a pirate chest in the center with a large lock`,
61
- `a parrot on top of it`,
62
- `at sunset`,
63
  `unreal engine`,
64
- ].join(", ")
65
-
66
- return {
67
- action,
68
- position,
69
- light,
70
- actionnables,
71
- prompt
72
- }
73
- }
74
  }
 
1
+ import { Game } from "./types"
 
2
 
3
  const actions = [
4
  "idling",
 
24
  "during the day",
25
  ]
26
 
27
+ const initialActionnables = [
28
  "chest",
29
+ "box",
30
  // "door",
31
  // "window",
32
  // "sail",
 
34
  // "ship's wheel",
35
  // "hat",
36
  // "barrel",
37
+ // "cannon",
38
  // "rope",
39
  // "bucket",
40
+ // "skull",
41
  "parrot",
42
  "lock",
43
+ // "ship",
44
  // "wooden leg"
45
  ]
46
 
47
+ const initialSituation = [
48
+ `inside the hold of a pirate ship`,
49
+ `a pirate chest in the center with a large lock`,
50
+ `a parrot on top of it`,
51
+ `at sunset`,
52
+ ].join(", ")
53
+
54
+ export const game: Game = {
55
  title: "Pirates",
56
  type: "pirates",
57
+ initialSituation,
58
+ initialActionnables,
59
+ getScenePrompt: (situation?: string) => [
 
 
60
  // this prompt is beautiful:
61
  // screenshot from an adventure videogame, inside the hold of a pirate ship, with a pirate chest in the center, at sunset, beautiful, award winning, unreal engine, intricate details
 
62
  `screenshot from an adventure videogame`,
63
+ situation || initialSituation,
 
 
 
64
  `unreal engine`,
65
+ ],
 
 
 
 
 
 
 
 
 
66
  }
src/app/games/types.ts ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export type GameType = 'pirates' | 'city' | 'dungeon'
2
+
3
+ export interface Scene {
4
+ actionnables: string[]
5
+ prompt: string
6
+ }
7
+
8
+ export interface Game {
9
+ title: string
10
+ type: GameType
11
+ initialSituation: string
12
+ initialActionnables: string[]
13
+ getScenePrompt: (situation?: string) => string | string[]
14
+ }
src/app/main.tsx CHANGED
@@ -13,40 +13,51 @@ import {
13
  } from "@/components/ui/select"
14
 
15
  import { render } from "./render"
16
- import { AgentType, Scene } from "./agents/types"
17
- import { agents, defaultAgent, getAgent } from "./agents"
18
  import { RenderedScene } from "./types"
 
 
 
19
 
20
  export default function Main() {
21
  const [isPending, startTransition] = useTransition()
22
- const [scene, setScene] = useState<Scene>()
23
  const [rendered, setRendered] = useState<RenderedScene>({
24
  assetUrl: "",
25
  error: "",
26
  maskBase64: "",
27
  segments:[]
28
  })
29
- const ref = useRef<AgentType>(defaultAgent)
30
-
31
-
32
- const loadNextScene = async () => {
 
 
 
 
33
  // console.log(`update view..`)
 
34
 
35
  await startTransition(async () => {
36
 
37
  // console.log(`getting agent..`)
 
38
  const type = ref?.current
39
- const agent = getAgent(type)
40
-
41
- // console.log(`asking agent to determine things..`)
42
- const scene = agent.simulate()
43
 
44
  // console.log(`rendering scene..`)
45
  const newRendered = await render(
46
- scene.prompt,
47
- scene.actionnables.slice(0, 5) // too many can slow us down it seems
 
 
 
 
 
 
48
  )
49
 
 
50
  if (type !== ref?.current) {
51
  console.log("agent type changed! reloading scene")
52
  setTimeout(() => { loadNextScene() }, 0)
@@ -54,9 +65,11 @@ export default function Main() {
54
  }
55
 
56
  if (newRendered.assetUrl) {
57
- setRendered(newRendered)
58
- // console.log(`got a new url: ${newUrl}`)
59
  setScene(scene)
 
 
 
60
  }
61
  })
62
  }
@@ -65,15 +78,108 @@ export default function Main() {
65
  loadNextScene()
66
  }, [])
67
 
68
- const handleUserAction = (action: string) => {
69
- console.log("user action:", action)
70
-
71
  // TODO: ask Llama2 what to do about it
72
  // we need a frame and some actionnables,
73
  // perhaps even some music or sound effects
74
 
75
- console.log("we don't know what to do, so we just load the next frame!")
76
- loadNextScene()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  }
78
 
79
  return (
@@ -82,9 +188,9 @@ export default function Main() {
82
  <div className="flex flex-row items-center space-x-3">
83
  <label className="flex">Select a story:</label>
84
  <Select
85
- defaultValue={defaultAgent}
86
  onValueChange={(value) => {
87
- ref.current = value as AgentType
88
  setRendered({
89
  assetUrl: "",
90
  error: "",
@@ -96,27 +202,30 @@ export default function Main() {
96
  <SelectValue placeholder="Type" />
97
  </SelectTrigger>
98
  <SelectContent>
99
- {Object.entries(agents).map(([key, agent]) =>
100
- <SelectItem key={key} value={key}>{agent.title}</SelectItem>
101
  )}
102
  </SelectContent>
103
  </Select>
104
  </div>
105
- <p>Note: it takes about 1 minute to generate a new game panel</p>
106
-
107
- {(scene) ? <div>
108
- <p>Action: {scene.action}</p>
109
- <p>Position: {scene.position}</p>
110
- <p>Light: {scene.light}</p>
111
- </div> : null}
112
- <div className="flex flex-col">
113
- {rendered.segments.map((segment, i) =>
114
- <div key={i}>
115
- {segment.label} ({segment.score})
116
  </div>)}
117
  </div>
 
118
  </div>
119
- <ImageRenderer rendered={rendered} onUserAction={handleUserAction} />
 
 
 
 
 
120
  </div>
121
  )
122
  }
 
13
  } from "@/components/ui/select"
14
 
15
  import { render } from "./render"
16
+
 
17
  import { RenderedScene } from "./types"
18
+ import { predict } from "./predict"
19
+ import { GameType } from "./games/types"
20
+ import { defaultGame, games, getGame } from "./games"
21
 
22
  export default function Main() {
23
  const [isPending, startTransition] = useTransition()
 
24
  const [rendered, setRendered] = useState<RenderedScene>({
25
  assetUrl: "",
26
  error: "",
27
  maskBase64: "",
28
  segments:[]
29
  })
30
+ const ref = useRef<GameType>(defaultGame)
31
+ const [situation, setSituation] = useState("")
32
+ const [scene, setScene] = useState("")
33
+ const [dialogue, setDialogue] = useState("")
34
+ const [hoveredActionnable, setHoveredActionnable] = useState("")
35
+ const [isLoading, setLoading] = useState(true)
36
+
37
+ const loadNextScene = async (nextSituation?: string, nextActionnables?: string[]) => {
38
  // console.log(`update view..`)
39
+ setLoading(true)
40
 
41
  await startTransition(async () => {
42
 
43
  // console.log(`getting agent..`)
44
+ // note: we use a ref so that it can be changed in the background
45
  const type = ref?.current
46
+ const game = getGame(type)
 
 
 
47
 
48
  // console.log(`rendering scene..`)
49
  const newRendered = await render(
50
+ // SCENE PROMPT
51
+ [...game.getScenePrompt(nextSituation)].join(", "),
52
+
53
+ // ACTIONNABLES
54
+ (Array.isArray(nextActionnables) && nextActionnables.length
55
+ ? nextActionnables
56
+ : game.initialActionnables
57
+ ).slice(0, 6) // too many can slow us down it seems
58
  )
59
 
60
+ // detect if something changed in the background
61
  if (type !== ref?.current) {
62
  console.log("agent type changed! reloading scene")
63
  setTimeout(() => { loadNextScene() }, 0)
 
65
  }
66
 
67
  if (newRendered.assetUrl) {
68
+ // console.log(`got a new url: ${newRendered.assetUrl}`)
 
69
  setScene(scene)
70
+
71
+ setRendered(newRendered)
72
+ setLoading(false)
73
  }
74
  })
75
  }
 
78
  loadNextScene()
79
  }, [])
80
 
81
+ const handleUserAction = async (actionnable: string) => {
82
+ console.log("user actionnable:", actionnable)
83
+
84
  // TODO: ask Llama2 what to do about it
85
  // we need a frame and some actionnables,
86
  // perhaps even some music or sound effects
87
 
88
+ await startTransition(async () => {
89
+
90
+ setLoading(true)
91
+
92
+ const game = getGame(ref.current)
93
+ const initialPrompt = [...game.getScenePrompt()].join(", ")
94
+
95
+ const currentPrompt = situation
96
+ ? [...game.getScenePrompt(situation)].join(", ")
97
+ : initialPrompt
98
+
99
+ try {
100
+ const basePrompt = [
101
+ `QUESTION: You are the AI game master of a role video game.`,
102
+ initialPrompt !== currentPrompt ? `The initial scene of the game was this: "${initialPrompt}".` : '',
103
+ `The player is currently in this scene: "${currentPrompt}".`,
104
+ `The player has just clicked on "${actionnable}".`
105
+ ]
106
+
107
+ console.log("ask the LLM to invent next steps..")
108
+
109
+ const rawSituation = await predict([
110
+ ...basePrompt,
111
+ `Please describe the new scene to display in intricate details: the environment, lights, era, characters, objects, textures, light etc. You must include important objects, that the user can click on (eg. characters, doors, vehicles, useful objects).\nANSWER:`
112
+ ].join(" "))
113
+
114
+ console.log(`rawSituation: `, rawSituation)
115
+
116
+ if (!rawSituation) {
117
+ throw new Error("failed to generate the situation")
118
+ }
119
+ const newSituation = `${rawSituation.split("QUESTION:")[0] || ""}`
120
+ if (!newSituation) {
121
+ throw new Error("failed to parse the situation")
122
+ }
123
+
124
+ console.log(`newSituation: `, newSituation)
125
+
126
+ const rawActionnables = await predict([
127
+ ...basePrompt,
128
+ `Here are the 4 most important objects visible in this scene, that the user can click on. The list is in JSON (list of strings). You must list basic name of things (eg. "parrot", "chest", "spaceship", "glass", "door", "person", "window", "light", "knob", "button" etc..) \nJSON = [`
129
+ ].join(" "))
130
+ console.log(`rawActionnables: `, rawActionnables)
131
+
132
+
133
+ if (!rawActionnables) {
134
+ throw new Error("failed to generate the actionnables")
135
+ }
136
+
137
+ let newActionnables = []
138
+ try {
139
+ newActionnables = (JSON.parse(
140
+ `[${rawActionnables.split("]")[0] || ""}]`
141
+ ) as string[]).map(item =>
142
+ // clean the words to remove any punctuation
143
+ item.replace(/\W/g, '').trim()
144
+ )
145
+
146
+ if (!newActionnables.length) {
147
+ throw new Error("no actionnables")
148
+ }
149
+ } catch (err) {
150
+ throw new Error("failed to parse the actionnables")
151
+ }
152
+
153
+ console.log(`newActionnables: `, newActionnables)
154
+
155
+
156
+ const rawDialogue = await predict([
157
+ ...basePrompt,
158
+ `As a game master, what should you say next? (Only reply with 2 sentences, please).\nANSWER:`
159
+ ].join(" "))
160
+ console.log(`rawDialogue: `, rawDialogue)
161
+
162
+ if (!rawDialogue) {
163
+ throw new Error("failed to generate the dialogue")
164
+ }
165
+ const newDialogue = `${rawDialogue.split("QUESTION:")[0] || ""}`
166
+ if (!newDialogue) {
167
+ throw new Error("failed to parse the dialogue")
168
+ }
169
+ console.log(`newDialogue: `, newDialogue)
170
+
171
+
172
+ setDialogue(newDialogue)
173
+ setSituation(newSituation)
174
+
175
+ console.log("loading next scene..")
176
+ await loadNextScene(newSituation, newActionnables)
177
+
178
+ // todo we could also use useEffect
179
+ } catch (err) {
180
+ console.error(err)
181
+ }
182
+ })
183
  }
184
 
185
  return (
 
188
  <div className="flex flex-row items-center space-x-3">
189
  <label className="flex">Select a story:</label>
190
  <Select
191
+ defaultValue={defaultGame}
192
  onValueChange={(value) => {
193
+ ref.current = value as GameType
194
  setRendered({
195
  assetUrl: "",
196
  error: "",
 
202
  <SelectValue placeholder="Type" />
203
  </SelectTrigger>
204
  <SelectContent>
205
+ {Object.entries(games).map(([key, game]) =>
206
+ <SelectItem key={key} value={key}>{game.title}</SelectItem>
207
  )}
208
  </SelectContent>
209
  </Select>
210
  </div>
211
+ <p className="text-xl">The server is blowing up! Loading a panel may take a few minutes.</p>
212
+ <p className="text-xl">{dialogue}</p>
213
+ <div className="flex flex-row">
214
+ <div className="text-xl mr-2">🔎 Possible items:</div>
215
+ {rendered.segments.map((segment, i) =>
216
+ <div key={i} className="flex flex-row text-xl mr-2">
217
+ <div className="">{segment.label}</div>
218
+ {i < (rendered.segments.length - 1) ? <div>,</div> : null}
 
 
 
219
  </div>)}
220
  </div>
221
+ <p className="text-xl font-normal">You may be looking at.. <span className="font-bold">{hoveredActionnable || "nothing"}</span></p>
222
  </div>
223
+ <ImageRenderer
224
+ rendered={rendered}
225
+ onUserAction={handleUserAction}
226
+ onUserHover={setHoveredActionnable}
227
+ isLoading={isLoading}
228
+ />
229
  </div>
230
  )
231
  }
src/app/predict.ts ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use server"
2
+
3
+ import { HfInference } from "@huggingface/inference"
4
+
5
+ const hfi = new HfInference(process.env.HF_API_TOKEN)
6
+ const hf = hfi.endpoint(`${process.env.HF_INFERENCE_ENDPOINT_URL || ""}`)
7
+
8
+ export async function predict(inputs: string) {
9
+
10
+ let instructions = ""
11
+ try {
12
+ for await (const output of hf.textGenerationStream({
13
+ inputs,
14
+ parameters: {
15
+ do_sample: true,
16
+
17
+ // hard limit for max_new_tokens is 1512
18
+ max_new_tokens: 200, // 1150,
19
+ return_full_text: false,
20
+ }
21
+ })) {
22
+ instructions += output.token.text
23
+ process.stdout.write(output.token.text)
24
+ if (
25
+ instructions.includes("</s>") ||
26
+ instructions.includes("<s>") ||
27
+ instructions.includes("<|end|>") ||
28
+ instructions.includes("<|assistant|>")
29
+ ) {
30
+ break
31
+ }
32
+ }
33
+ } catch (err) {
34
+ console.error(`error during generation: ${err}`)
35
+ }
36
+
37
+ // need to do some cleanup of the garbage the LLM might have gave us
38
+ return (
39
+ instructions
40
+ .replaceAll("<|end|>", "")
41
+ .replaceAll("<s>", "")
42
+ .replaceAll("</s>", "")
43
+ .replaceAll("<|assistant|>", "")
44
+ .replaceAll('""', '"')
45
+ )
46
+ }
src/components/business/image-renderer.tsx CHANGED
@@ -9,9 +9,13 @@ export const ImageRenderer = ({
9
  segments = []
10
  },
11
  onUserAction,
 
 
12
  }: {
13
  rendered: RenderedScene
14
- onUserAction: (action: string) => void
 
 
15
  }) => {
16
  const imgRef = useRef<HTMLImageElement | null>(null)
17
  const canvasRef = useRef<HTMLCanvasElement | null>(null)
@@ -20,7 +24,7 @@ export const ImageRenderer = ({
20
 
21
  useEffect(() => {
22
  if (maskBase64) {
23
- console.log("maskBase64:", maskBase64)
24
  const img = new Image();
25
  img.onload = function () {
26
  canvasRef.current = document.createElement('canvas');
@@ -31,7 +35,7 @@ export const ImageRenderer = ({
31
  }
32
  img.src = "data:image/png;base64," + maskBase64;
33
  } else {
34
- console.log("error, no maskBase64 detected!")
35
  }
36
  }, [maskBase64]);
37
 
@@ -82,15 +86,21 @@ export const ImageRenderer = ({
82
  if(distance < minDistance) {
83
  minDistance = distance;
84
  closestSegment = segment;
 
85
  }
86
  });
87
 
88
  return closestSegment;
89
  }
90
 
91
- const handleMouseEvent = (event: React.MouseEvent, isClickEvent: boolean = false) => {
92
  if (!contextRef.current) return; // Return early if mask image has not been loaded yet
93
 
 
 
 
 
 
94
  const boundingRect = imgRef.current!.getBoundingClientRect();
95
  const x = event.clientX - boundingRect.left;
96
  const y = event.clientY - boundingRect.top;
@@ -98,22 +108,28 @@ export const ImageRenderer = ({
98
  const newSegment = getSegmentAt(x, y)
99
 
100
  if (actionnable !== newSegment.label) {
 
 
 
 
 
101
  setActionnable(newSegment.label)
102
  }
103
 
104
- if (!newSegment.label) { return }
105
-
106
- console.log("actionnable: ", actionnable)
107
-
108
  if (isClickEvent) {
109
- console.log("User clicked on " + actionnable);
110
- // onUserAction(actionnable);
 
 
 
 
 
111
  }
112
  };
113
 
114
  if (!assetUrl) {
115
  return <div className="flex w-full h-screen items-center justify-center text-center">
116
- <div>Rendering first frame.. (might take around 30s)</div>
117
  </div>
118
  }
119
 
@@ -150,17 +166,22 @@ export const ImageRenderer = ({
150
  */
151
 
152
  return (
153
- <div className="w-full py-8 px-2">
 
 
 
 
154
  <div className="relative w-full">
155
  <img
156
- src={"data:image/png;base64," + maskBase64}
 
157
  ref={imgRef}
158
  width="1024px"
159
  height="512px"
160
  className={
161
  [
162
- "absolute top-0 left-0 opacity-30",
163
- actionnable ? "cursor-pointer" : ""
164
  ].join(" ")
165
  }
166
  onMouseDown={(event) => handleMouseEvent(event, true)}
 
9
  segments = []
10
  },
11
  onUserAction,
12
+ onUserHover,
13
+ isLoading = false,
14
  }: {
15
  rendered: RenderedScene
16
+ onUserAction: (actionnable: string) => void
17
+ onUserHover: (actionnable: string) => void
18
+ isLoading?: boolean
19
  }) => {
20
  const imgRef = useRef<HTMLImageElement | null>(null)
21
  const canvasRef = useRef<HTMLCanvasElement | null>(null)
 
24
 
25
  useEffect(() => {
26
  if (maskBase64) {
27
+ // console.log("maskBase64:", maskBase64)
28
  const img = new Image();
29
  img.onload = function () {
30
  canvasRef.current = document.createElement('canvas');
 
35
  }
36
  img.src = "data:image/png;base64," + maskBase64;
37
  } else {
38
+ // console.log("error, no maskBase64 detected!")
39
  }
40
  }, [maskBase64]);
41
 
 
86
  if(distance < minDistance) {
87
  minDistance = distance;
88
  closestSegment = segment;
89
+ console.log(`${distance} -> ${segment.label}: score = ${segment.score}`)
90
  }
91
  });
92
 
93
  return closestSegment;
94
  }
95
 
96
+ const handleMouseEvent = async (event: React.MouseEvent, isClickEvent: boolean = false) => {
97
  if (!contextRef.current) return; // Return early if mask image has not been loaded yet
98
 
99
+ if (isLoading) {
100
+ // we ignore all user interactions
101
+ return false
102
+ }
103
+
104
  const boundingRect = imgRef.current!.getBoundingClientRect();
105
  const x = event.clientX - boundingRect.left;
106
  const y = event.clientY - boundingRect.top;
 
108
  const newSegment = getSegmentAt(x, y)
109
 
110
  if (actionnable !== newSegment.label) {
111
+ if (newSegment.label) {
112
+ console.log(`User is hovering "${newSegment.label}"`);
113
+ } else {
114
+ console.log(`Nothing in the area`);
115
+ }
116
  setActionnable(newSegment.label)
117
  }
118
 
 
 
 
 
119
  if (isClickEvent) {
120
+ if (!newSegment.label) {
121
+ return
122
+ }
123
+ console.log("User clicked on " + newSegment.label)
124
+ onUserAction(actionnable)
125
+ } else {
126
+ onUserHover(actionnable)
127
  }
128
  };
129
 
130
  if (!assetUrl) {
131
  return <div className="flex w-full h-screen items-center justify-center text-center">
132
+ <div>Generating a new panel..</div>
133
  </div>
134
  }
135
 
 
166
  */
167
 
168
  return (
169
+ <div className={[
170
+ "w-full py-8 px-2",
171
+ isLoading ? "animate-pulse" : ""
172
+ ].join(" ")
173
+ }>
174
  <div className="relative w-full">
175
  <img
176
+ src={assetUrl}
177
+ // src={"data:image/png;base64," + maskBase64}
178
  ref={imgRef}
179
  width="1024px"
180
  height="512px"
181
  className={
182
  [
183
+ "absolute top-0 left-0",
184
+ actionnable && !isLoading ? "cursor-pointer" : ""
185
  ].join(" ")
186
  }
187
  onMouseDown={(event) => handleMouseEvent(event, true)}