diff --git a/.env b/.env index 08b69857c12eee20fe4944310f1edc417f0a93d7..cd1f42ac674c51e915ebaabad7b159b63d3b1a66 100644 --- a/.env +++ b/.env @@ -1,2 +1,9 @@ HF_API_TOKEN="" MICROSERVICE_API_SECRET_TOKEN="" + +AI_TUBE_URL="https://aitube.at" +# AI_TUBE_URL="http://localhost:3000" + +AI_TUBE_API_SECRET_JWT_KEY="" +AI_TUBE_API_SECRET_JWT_ISSUER="" +AI_TUBE_API_SECRET_JWT_AUDIENCE="" \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index e0d40e7d68d0b7a76cefb8751120af363deb098c..7898c92447507b7cab9d542ab5f64ad4feb3c909 100644 --- a/package-lock.json +++ b/package-lock.json @@ -54,6 +54,7 @@ "typescript": "5.4.5", "usehooks-ts": "^2.14.0", "uuid": "^9.0.1", + "yaml": "^2.4.1", "zustand": "^4.5.1" } }, diff --git a/package.json b/package.json index c74e2d3619d3535d4b313fb376c78fba31eaba17..548a9d14446162f1cc529e96593ebe47ea1764f6 100644 --- a/package.json +++ b/package.json @@ -55,6 +55,7 @@ "typescript": "5.4.5", "usehooks-ts": "^2.14.0", "uuid": "^9.0.1", + "yaml": "^2.4.1", "zustand": "^4.5.1" } } diff --git a/src/app/layout.tsx b/src/app/layout.tsx index 7f337889dc47a658188d1969bc2bbced825aed60..82e1be7710429d89c7030432fa492f3316824ca3 100644 --- a/src/app/layout.tsx +++ b/src/app/layout.tsx @@ -1,4 +1,4 @@ -import { cn } from '@/lib/utils' +import { cn } from '@/lib/utils/cn' import './globals.css' import type { Metadata } from 'next' import { inter, salsa } from './fonts' diff --git a/src/app/main.tsx b/src/app/main.tsx index e3904665c3f2edd225248185d4135fb8e4c4ffa6..680e9ecfc094b5de990f70c7c8944887c74a2336 100644 --- a/src/app/main.tsx +++ b/src/app/main.tsx @@ -6,12 +6,13 @@ import { Card, CardContent, CardHeader } from '@/components/ui/card' import { Button } from '@/components/ui/button' import { InputField } from '@/components/form/input-field' import { Toaster } from '@/components/ui/sonner' -import { cn } from '@/lib/utils' +import { cn } from '@/lib/utils/cn' import { useStore } from './store' import { TextareaField } from '@/components/form/textarea-field' import { DeviceFrameset } from 'react-device-frameset' import 'react-device-frameset/styles/marvel-devices.min.css' +import { generateClap } from './server/aitube/generateClap' export function Main() { const [_isPending, startTransition] = useTransition() @@ -39,7 +40,23 @@ export function Main() { const isBusy = status === "generating" || hasPendingTasks const handleSubmit = async () => { + const prompt = storyPromptDraft + setStatus("generating") + setStoryPrompt(prompt) + + startTransition(async () => { + console.log(`handleSubmit(): generating a clap using prompt = "${prompt}" `) + + try { + const clap = await generateClap({ prompt }) + + console.log(`handleSubmit(): received a clap = `, clap) + setStatus("finished") + } catch (err) { + setStatus("error") + } + }) } return ( diff --git a/src/app/page.tsx b/src/app/page.tsx index 4a36cf46d35d7d28f417c46de2310b8560a0e83d..17ec16b9ecb31cf38c01f265562b0f5f3653cbc6 100644 --- a/src/app/page.tsx +++ b/src/app/page.tsx @@ -4,7 +4,7 @@ import { useEffect, useState } from "react" import Head from "next/head" import Script from "next/script" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" import { Main } from "./main" diff --git a/src/app/server/aitube/generateClap.ts b/src/app/server/aitube/generateClap.ts new file mode 100644 index 0000000000000000000000000000000000000000..2bf3e5c30ca36972f66e07aa1eea5aa5c654ea59 --- /dev/null +++ b/src/app/server/aitube/generateClap.ts @@ -0,0 +1,44 @@ +"use server" + +import { parseClap } from "@/lib/clap/parseClap" +import { ClapProject } from "@/lib/clap/types" + +import { aitubeApiUrl } from "../config" + +export async function generateClap({ + prompt = "", +}: { + prompt: string +}): Promise { + if (!prompt) { throw new Error(`please provide a prompt`) } + + // AiTube Stories is nice, but we also need to leave some compute for AiTube Live and AiTube Gaming + const height = 1024 + const width = 512 + + // console.log(`calling `+ gradioApi + (gradioApi.endsWith("/") ? "" : "/") + "api/predict") + + // remember: a space needs to be public for the classic fetch() to work + const res = await fetch(aitubeApiUrl, { + method: "POST", + headers: { + "Content-Type": "application/json", + // TODO pass the JWT so that only the AI Stories Factory can call the API + // Authorization: `Bearer ${hfApiToken}`, + }, + body: JSON.stringify({ + prompt, + width, + height + }), + cache: "no-store", + // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache) + // next: { revalidate: 1 } + }) + + const blob = await res.blob() + + const clap = await parseClap(blob) + + return clap +} \ No newline at end of file diff --git a/src/app/server/config.ts b/src/app/server/config.ts index f59578c607a945f0de11e250e335434becf7e646..df575ae467a079452d69817420d735e7e486f65a 100644 --- a/src/app/server/config.ts +++ b/src/app/server/config.ts @@ -1,2 +1,5 @@ export const serverHuggingfaceApiKey = `${process.env.HF_API_TOKEN || ""}` + +export const aitubeUrl = `${process.env.AI_TUBE_URL || "" }` +export const aitubeApiUrl = + aitubeUrl + (aitubeUrl.endsWith("/") ? "" : "/") + "api/" \ No newline at end of file diff --git a/src/app/server/services/background.ts b/src/app/server/services/background.ts deleted file mode 100644 index 174f78f91603dfc9dd3ca2c614bb95358e2f4b47..0000000000000000000000000000000000000000 --- a/src/app/server/services/background.ts +++ /dev/null @@ -1,50 +0,0 @@ -"use server" - -import { BackgroundRemovalParams } from "@/types" - -import { addBase64HeaderToPng } from "../utils/addBase64HeaderToPng" - -const gradioApi = `https://jbilcke-hf-background-removal-api.hf.space` -const microserviceApiKey = `${process.env.MICROSERVICE_API_SECRET_TOKEN || ""}` - -export async function removeBackground({ - imageAsBase64, -}: BackgroundRemovalParams): Promise { - - // remember: a space needs to be public for the classic fetch() to work - const res = await fetch(gradioApi + (gradioApi.endsWith("/") ? "" : "/") + "api/predict", { - method: "POST", - headers: { - "Content-Type": "application/json", - // Authorization: `Bearer ${hfApiToken}`, - }, - body: JSON.stringify({ - fn_index: 0, // <- is it 0 or 1? - data: [ - microserviceApiKey, - imageAsBase64, - ], - }), - cache: "no-store", - // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache) - // next: { revalidate: 1 } - }) - - const { data } = await res.json() - - // console.log("data:", data) - // Recommendation: handle errors - if (res.status !== 200 || !Array.isArray(data)) { - // This will activate the closest `error.js` Error Boundary - throw new Error(`Failed to fetch data (status: ${res.status})`) - } - // console.log("data:", data.slice(0, 50)) - - const base64Content = (data?.[0] || "") as string - - if (!base64Content) { - throw new Error(`invalid response (no content)`) - } - - return addBase64HeaderToPng(base64Content) -} \ No newline at end of file diff --git a/src/app/server/services/inpaint.ts b/src/app/server/services/inpaint.ts deleted file mode 100644 index e255d0bbc60356e805f37c6305622c8d76b3afde..0000000000000000000000000000000000000000 --- a/src/app/server/services/inpaint.ts +++ /dev/null @@ -1,107 +0,0 @@ -"use server" - -import { InpaintingParams } from "@/types" - -import { addBase64HeaderToPng } from "../utils/addBase64HeaderToPng" -import { segmentToInpaintingMask } from "../utils/segmentToInpaintingMask" - -const gradioApi = `https://jbilcke-hf-inpainting-api.hf.space` -const microserviceApiKey = `${process.env.MICROSERVICE_API_SECRET_TOKEN || ""}` - -export async function inpaint({ - imageAsBase64, - maskAsBase64, - positivePrompt = "", - negativePrompt = "", - guidanceScale = 7.5, - numInferenceSteps = 20, - strength = 0.99, - scheduler = "EulerDiscreteScheduler" -}: InpaintingParams): Promise { - - const posPrompt = [ - positivePrompt, - "clean", - "high-resolution", - "8k", - "best quality", - "masterpiece", - "crisp", - "sharp", - "intricate details" - ].join(", ") - - const negPrompt = [ - negativePrompt, - "pixelated", - "pixels", - "noise", - "blur", - "motion blur", - "lowres", - "oversmooth", - "longbody", - "bad anatomy", - "bad hands", - "missing fingers", - "extra digit", - "fewer digits", - "cropped", - "worst quality", - "low quality", - "artificial", - "unrealistic", - "watermark", - "trademark", - "error", - "mistake" - ].join(", ") - - // the segmentation mask is a RGB color one (that's how we can split layers) - // so we first convert it to either black or white - const inpaintingMaskAsBase64 = await segmentToInpaintingMask(maskAsBase64) - - // remember: a space needs to be public for the classic fetch() to work - const res = await fetch(gradioApi + (gradioApi.endsWith("/") ? "" : "/") + "api/predict", { - method: "POST", - headers: { - "Content-Type": "application/json", - // Authorization: `Bearer ${hfApiToken}`, - }, - body: JSON.stringify({ - fn_index: 0, // <- is it 0 or 1? - data: [ - microserviceApiKey, - imageAsBase64, // blob in 'parameter_5' Image component - inpaintingMaskAsBase64, - posPrompt, - negPrompt, - guidanceScale, - numInferenceSteps, - strength, - scheduler, - ], - }), - cache: "no-store", - // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache) - // next: { revalidate: 1 } - }) - - const { data } = await res.json() - - // console.log("data:", data) - // Recommendation: handle errors - if (res.status !== 200 || !Array.isArray(data)) { - // This will activate the closest `error.js` Error Boundary - throw new Error(`Failed to fetch data (status: ${res.status})`) - } - // console.log("data:", data.slice(0, 50)) - - const base64Content = (data?.[0] || "") as string - - if (!base64Content) { - throw new Error(`invalid response (no content)`) - } - - return addBase64HeaderToPng(base64Content) -} \ No newline at end of file diff --git a/src/app/server/services/segment.ts b/src/app/server/services/segment.ts deleted file mode 100644 index 72f90beaae627ab1a034a07811a69d6a3ff4bb3b..0000000000000000000000000000000000000000 --- a/src/app/server/services/segment.ts +++ /dev/null @@ -1,138 +0,0 @@ -"use server" - -import { SemanticLayer, SemanticLayers } from "@/lib/config" - -import { addBase64HeaderToPng } from "../utils/addBase64HeaderToPng" -import { segmentsToInpaintingMasks } from "../utils/segmentsToInpaintingMasks" -import { alphaToWhite } from "../utils/alphaToWhite" - -const gradioApi = `https://jbilcke-hf-segmentation-api.hf.space` -const microserviceApiKey = `${process.env.MICROSERVICE_API_SECRET_TOKEN || ""}` - -export type SegmentationResult = { - id: number - box: number[] // [299.63092041015625, 111.72967529296875, 661.6744384765625, 692.8449096679688], - label: string - score: number - color: number[] // [0.8506358185563304, 0.9904733533937202, 0.32005103765589715, 1.0] -} - -type SegmentationApiResponse = { - data: SegmentationResult[] - bitmap: string // base64 png -} - -export type SegmentationResults = { - data: Partial> - bitmap: string // base64 png -} - -export async function segment({ - imageAsBase64, - layers, -}: { - imageAsBase64: string - layers: SemanticLayers -}): Promise { - - const emptyResponse: SemanticLayers = {} - - Object.entries(layers).forEach(([key, value]) => { - emptyResponse[key as SemanticLayer] = "" - }) - - // console.log(`calling `+ gradioApi + (gradioApi.endsWith("/") ? "" : "/") + "api/predict") - - const detectionPrompt = Object.keys(layers).map(x => x.trim().toLowerCase()).join(" . ") - - // min 0, max 1, value 0.3, step 0.001 - const boxThreshold = 0.3 - - // min 0, max 1, value 0.25, step 0.001 - const textThreshold = 0.25 - - // min 0, max 1, value 0.8, step 0.001 - const iouThreshold = 0.8 - - // SAM is finicky, it doesn't work on images with an alpha channel - // so we first need to remove that - let imageToSegmentInBase64 = "" - imageToSegmentInBase64 = imageAsBase64 - /* - try { - imageToSegmentInBase64 = await alphaToWhite(imageAsBase64) - } catch (err) { - console.error(`failed to get a valid imageToSegmentInBase64:`, err) - return emptyResponse - } - */ - - // remember: a space needs to be public for the classic fetch() to work - const res = await fetch(gradioApi + (gradioApi.endsWith("/") ? "" : "/") + "api/predict", { - method: "POST", - headers: { - "Content-Type": "application/json", - // Authorization: `Bearer ${hfApiToken}`, - }, - body: JSON.stringify({ - fn_index: 0, // <- is it 0 or 1? - data: [ - microserviceApiKey, - imageToSegmentInBase64, - detectionPrompt, - boxThreshold, - textThreshold, - iouThreshold, - ], - }), - cache: "no-store", - // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache) - // next: { revalidate: 1 } - }) - - const layeredResults = {} as Partial> - - const { data } = await res.json() - - // console.log("data:", data) - // Recommendation: handle errors - if (res.status !== 200 || !Array.isArray(data)) { - // This will activate the closest `error.js` Error Boundary - console.error(`Failed to fetch data (${res.status} error: ${res.statusText})`, res) - return emptyResponse - } - // console.log("data:", data.slice(0, 50)) - - let apiResponse: SegmentationApiResponse = { - data: [], - bitmap: "" - } - - try { - apiResponse = JSON.parse((data?.[0] || "{}")) as SegmentationApiResponse - } catch (err) { - console.error(`Failed to parse api response`, err) - return emptyResponse - } - - // console.log("segmentation", segmentation) - // console.log("segmentation.data:", segmentation.data) - const items = [...(apiResponse.data || [])] - // console.log("items:", items) - - const bitmap = apiResponse.bitmap ? addBase64HeaderToPng(apiResponse.bitmap) : "" - - Object.entries(layers).forEach(([key, value]) => { - const match = items.find(x => `${key || ""}`.trim().toLowerCase() === `${x.label || ""}`.trim().toLowerCase()) - if (match) { - layeredResults[key as SemanticLayer] = match - } - }) - - const maskLayers = await segmentsToInpaintingMasks({ - data: layeredResults, - bitmap, - }) - - return maskLayers -} \ No newline at end of file diff --git a/src/app/server/services/stableCascade.ts b/src/app/server/services/stableCascade.ts deleted file mode 100644 index 495bdd79afa22155696a357ee7ec8d27f845b692..0000000000000000000000000000000000000000 --- a/src/app/server/services/stableCascade.ts +++ /dev/null @@ -1,67 +0,0 @@ -"use server" - -import { generateSeed } from "@/lib/generateSeed" -import { StableCascadeParams } from "@/types" - -import { addBase64HeaderToPng } from "../utils/addBase64HeaderToPng" - -const gradioApi = `https://jbilcke-hf-stable-cascade-api.hf.space` -const microserviceApiKey = `${process.env.MICROSERVICE_API_SECRET_TOKEN || ""}` - -export async function stableCascade({ - prompt, - negativePrompt, - guidanceScale, - nbPriorInferenceSteps, - nbDecoderInferenceSteps, - seed, - width, - height, -}: StableCascadeParams): Promise { - - // console.log(`calling `+ gradioApi + (gradioApi.endsWith("/") ? "" : "/") + "api/predict") - - // remember: a space needs to be public for the classic fetch() to work - const res = await fetch(gradioApi + (gradioApi.endsWith("/") ? "" : "/") + "api/predict", { - method: "POST", - headers: { - "Content-Type": "application/json", - // Authorization: `Bearer ${hfApiToken}`, - }, - body: JSON.stringify({ - fn_index: 0, // <- is it 0 or 1? - data: [ - microserviceApiKey, - prompt, - negativePrompt, - height, - width, - guidanceScale, - seed || generateSeed(), - nbPriorInferenceSteps, - nbDecoderInferenceSteps - ], - }), - cache: "no-store", - // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache) - // next: { revalidate: 1 } - }) - - const { data } = await res.json() - - // console.log("data:", data) - // Recommendation: handle errors - if (res.status !== 200 || !Array.isArray(data)) { - // This will activate the closest `error.js` Error Boundary - throw new Error(`Failed to fetch data (status: ${res.status})`) - } - // console.log("data:", data.slice(0, 50)) - - const base64Content = (data?.[0] || "") as string - - if (!base64Content) { - throw new Error(`invalid response (no content)`) - } - - return addBase64HeaderToPng(base64Content) -} \ No newline at end of file diff --git a/src/app/server/services/upscale.ts b/src/app/server/services/upscale.ts deleted file mode 100644 index 20f0bcffe85a04766012903763f5c5ba16d26c8b..0000000000000000000000000000000000000000 --- a/src/app/server/services/upscale.ts +++ /dev/null @@ -1,105 +0,0 @@ -"use server" - -import { generateSeed } from "@/lib/generateSeed" -import { UpscalingParams } from "@/types" - -import { addBase64HeaderToPng } from "../utils/addBase64HeaderToPng" - -const gradioApi = `https://jbilcke-hf-image-upscaling-api.hf.space` -const microserviceApiKey = `${process.env.MICROSERVICE_API_SECRET_TOKEN || ""}` - -export async function upscale({ - imageAsBase64, - prompt, - negativePrompt, - scaleFactor, - nbSteps, - seed, -}: UpscalingParams): Promise { - - const addedPrompt = [ - "clean", - "high-resolution", - "8k", - "best quality", - "masterpiece", - "crisp", - "sharp", - "intricate details" - ].join(", ") - - const negPrompt = [ - negativePrompt, - "pixelated", - "pixels", - "noise", - "blur", - "motion blur", - "lowres", - "oversmooth", - "longbody", - "bad anatomy", - "bad hands", - "missing fingers", - "extra digit", - "fewer digits", - "cropped", - "worst quality", - "low quality", - "artificial", - "unrealistic", - "watermark", - "trademark", - "error", - "mistake" - ].join(", ") - - const conditioningScale = 1.4 - const classifierFreeGuidance = 9.5 - - // remember: a space needs to be public for the classic fetch() to work - const res = await fetch(gradioApi + (gradioApi.endsWith("/") ? "" : "/") + "api/predict", { - method: "POST", - headers: { - "Content-Type": "application/json", - // Authorization: `Bearer ${hfApiToken}`, - }, - body: JSON.stringify({ - fn_index: 0, // <- is it 0 or 1? - data: [ - microserviceApiKey, - imageAsBase64, // blob in 'parameter_5' Image component - prompt, // string in 'Prompt' Textbox component - addedPrompt, // string in 'Added Prompt' Textbox component - negPrompt, // string in 'Negative Prompt' Textbox component - nbSteps, // number (numeric value between 10 and 50) in 'Denoise Steps' Slider component - scaleFactor, // number (numeric value between 1 and 4) in 'Upsample Scale' Slider component - conditioningScale, // number (numeric value between 0.5 and 1.5) in 'Conditioning Scale' Slider component - classifierFreeGuidance, // number (numeric value between 0.1 and 10.0) in 'Classier-free Guidance' Slider component - seed || generateSeed(), // number (numeric value between -1 and 2147483647) in 'Seed' Slider component - ], - }), - cache: "no-store", - // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache) - // next: { revalidate: 1 } - }) - - const { data } = await res.json() - - // Recommendation: handle errors - if (res.status !== 200 || !Array.isArray(data)) { - // This will activate the closest `error.js` Error Boundary - throw new Error(`Failed to fetch data (status: ${res.status})`) - } - // console.log("data:", data.slice(0, 50)) - - const base64Content = (data?.[0] || "") as string - - if (!base64Content) { - throw new Error(`invalid response (no content)`) - } - - // console.log("upscaling base64Content:", addBase64HeaderToPng(base64Content).slice(0, 50)) - - return addBase64HeaderToPng(base64Content) -} \ No newline at end of file diff --git a/src/app/server/utils/alphaToWhite.ts b/src/app/server/utils/alphaToWhite.ts deleted file mode 100644 index 82dbd2a9e79dccfb33c85a4d75686aa558c9f43e..0000000000000000000000000000000000000000 --- a/src/app/server/utils/alphaToWhite.ts +++ /dev/null @@ -1,34 +0,0 @@ -import Jimp from 'jimp'; - -/** - * Convert a PNG with an alpha channel to a PNG with a white background - * - * this also makes sure the image is 1024x1024, as the segmentation algorithm is finicky - * and will fail if this is not respected - * @param dataUri - * @returns - */ -export async function alphaToWhite(dataUri: string): Promise { - - - // strip off the "data:image/png;base64," part - const base64Data = dataUri.replace(/^data:image\/\w+;base64,/, ""); - if (!base64Data) { - throw new Error(`invalid image, cannot convert from alpha to white background`) - } - - // convert base64 to buffer - const imageData = Buffer.from(base64Data, 'base64'); - - // read the image using Jimp - let img = await Jimp.read(imageData); - - img = img.background(0xFFFFFFFF).resize(1024, 1024); - - return new Promise((resolve, reject) => { - img.getBuffer(Jimp.MIME_PNG, (err, buffer) => { - if (err) reject(err); - else resolve(`data:${Jimp.MIME_PNG};base64,${buffer.toString('base64')}`); - }); - }); -} \ No newline at end of file diff --git a/src/app/server/utils/segmentToInpaintingMask.ts b/src/app/server/utils/segmentToInpaintingMask.ts deleted file mode 100644 index 048789f61f5e4c250d4b3d4770457c818ad70099..0000000000000000000000000000000000000000 --- a/src/app/server/utils/segmentToInpaintingMask.ts +++ /dev/null @@ -1,35 +0,0 @@ -"use server" - -import Jimp from 'jimp'; - -/** - * Converts a segment mask (colored pixels over white) - * into an inpainting mask (black pixels over white) - * - * @param pngBase64 - * @returns - */ -export async function segmentToInpaintingMask(pngBase64: string) { - const black = 0x00000000; - const white = 0xFFFFFFFF; - - // strip off the "data:image/png;base64," part - const base64Data = pngBase64.replace(/^data:image\/\w+;base64,/, ""); - - // convert base64 to buffer - const imageData = Buffer.from(base64Data, 'base64'); - - // read the image using Jimp - const image = await Jimp.read(imageData); - - image.scan(0, 0, image.bitmap.width, image.bitmap.height, (x, y, idx) => { - const currentColor = image.getPixelColor(x, y); - if (currentColor !== white) { - image.bitmap.data[idx] = black; - } - }); - - // get base64 data - const base64Image = await image.getBase64Async(Jimp.MIME_PNG); - return "data:image/png;base64," + base64Image.split(",")[1]; -} \ No newline at end of file diff --git a/src/app/server/utils/segmentsToInpaintingMasks.ts b/src/app/server/utils/segmentsToInpaintingMasks.ts deleted file mode 100644 index d706e4333c157f892c4056cbdffa359d998afc0b..0000000000000000000000000000000000000000 --- a/src/app/server/utils/segmentsToInpaintingMasks.ts +++ /dev/null @@ -1,78 +0,0 @@ -import Jimp from "jimp" - -import { SemanticLayer, SemanticLayers } from "@/lib/config" - -import { SegmentationResults } from "../segment" - -function getEuclideanDistance(color1: number[], color2: number[]): number { - return Math.sqrt( - Math.pow(color1[0] - color2[0], 2) + - Math.pow(color1[1] - color2[1], 2) + - Math.pow(color1[2] - color2[2], 2) - ); -} - -export async function segmentsToInpaintingMasks(segmentationResults: SegmentationResults): Promise { - const image = await Jimp.read(Buffer.from(segmentationResults.bitmap.replace(/^data:image\/\w+;base64,/, ""), 'base64')); - - const resultImages: Partial> = {} - // Convert all result images to base64 strings - const base64Images: SemanticLayers = {} - - for (let layer in segmentationResults.data) { - resultImages[layer as SemanticLayer] = new Jimp(image) - base64Images[layer as SemanticLayer] = "" - } - - // Iterate through each pixel in the image - image.scan(0, 0, image.bitmap.width, image.bitmap.height, (x, y, idx) => { - // Get the color of the current pixel - const color = Jimp.intToRGBA(image.getPixelColor(x, y)); - const currentColor = [color.r / 255, color.g / 255, color.b / 255]; - - // Determine which category the color belongs to - let minDistance = Infinity; - let closestLayer: SemanticLayer | null = null; - - for (let layer in segmentationResults.data) { - const layerColor = segmentationResults.data[layer as SemanticLayer]!.color; - const distance = getEuclideanDistance(currentColor, layerColor); - - if(distance < minDistance) { - minDistance = distance; - closestLayer = layer as SemanticLayer; - } - }; - - if (!closestLayer) { - return; - } - - // Set the color of the pixel in the corresponding result image to black, and white in others - for (let layer in resultImages) { - // used to guarantee the !.bitmap - if (!resultImages[layer as SemanticLayer]?.bitmap) { - continue - } - - for (let i = 0; i < 4; i++) { - if (layer === closestLayer) { - if(i < 3) - resultImages[layer as SemanticLayer]!.bitmap.data[idx + i] = 0x00; // set rgb channels to black - else - resultImages[layer as SemanticLayer]!.bitmap.data[idx + i] = 0xFF; // set alpha channel to maximum - } else { - resultImages[layer as SemanticLayer]!.bitmap.data[idx + i] = 0xFF; // set rgba channels to white - } - } - } - }); - - // Convert all result images to base64 strings - for (let layer in resultImages) { - const base64Image = await resultImages[layer as SemanticLayer]!.getBase64Async(Jimp.MIME_PNG); - base64Images[layer as SemanticLayer] = "data:image/png;base64," + base64Image.split(",")[1]; - } - - return base64Images; -} \ No newline at end of file diff --git a/src/components/form/input-field.tsx b/src/components/form/input-field.tsx index 8989acbd857bca8a44f001b4211a8d9255b39ede..b3c8e6191494adf75a04deb7f97fdb9260b07cd4 100644 --- a/src/components/form/input-field.tsx +++ b/src/components/form/input-field.tsx @@ -2,7 +2,7 @@ import { ComponentProps } from "react"; import { Input } from "@/components/ui/input"; import { Label } from "@/components/ui/label"; -import { cn } from "@/lib/utils"; +import { cn } from "@/lib/utils/cn"; export function InputField({ label, diff --git a/src/components/form/select-field.tsx b/src/components/form/select-field.tsx index 4144ce0ec6364aeea07e9904ad5abb9379313acd..6b71d681019254535f57a9ba3eb2f44589386be4 100644 --- a/src/components/form/select-field.tsx +++ b/src/components/form/select-field.tsx @@ -2,7 +2,7 @@ import { ComponentProps } from "react"; import { Select } from "@/components/ui/select"; import { Label } from "@/components/ui/label"; -import { cn } from "@/lib/utils"; +import { cn } from "@/lib/utils/cn"; export function SelectField({ label, diff --git a/src/components/form/slider-field.tsx b/src/components/form/slider-field.tsx index 977d1c238fa3aa1d0cdc454c7500a9463962428a..7bf1be818ea427819d8a97aa1cc23ffb21d91d31 100644 --- a/src/components/form/slider-field.tsx +++ b/src/components/form/slider-field.tsx @@ -2,7 +2,7 @@ import { ComponentProps } from "react"; import { Slider } from "@/components/ui/slider"; import { Label } from "@/components/ui/label"; -import { cn } from "@/lib/utils"; +import { cn } from "@/lib/utils/cn"; export function SliderField({ label, diff --git a/src/components/form/textarea-field.tsx b/src/components/form/textarea-field.tsx index 32eea16763d58d26eab23a24093c24d41df6d010..919137d4cc8f0571d4f021927ef4226c043fc6a0 100644 --- a/src/components/form/textarea-field.tsx +++ b/src/components/form/textarea-field.tsx @@ -2,7 +2,7 @@ import { ComponentProps } from "react"; import { Textarea } from "../ui/textarea"; import { Label } from "@/components/ui/label"; -import { cn } from "@/lib/utils"; +import { cn } from "@/lib/utils/cn"; export function TextareaField({ label, diff --git a/src/components/ui/accordion.tsx b/src/components/ui/accordion.tsx index 937620af27e5d8ef577f0baca229a9b753ebd017..b9ed85cb6f537d7f4bb2ece1d654d9d915de4f07 100644 --- a/src/components/ui/accordion.tsx +++ b/src/components/ui/accordion.tsx @@ -4,7 +4,7 @@ import * as React from "react" import * as AccordionPrimitive from "@radix-ui/react-accordion" import { ChevronDown } from "lucide-react" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const Accordion = AccordionPrimitive.Root diff --git a/src/components/ui/alert.tsx b/src/components/ui/alert.tsx index f589783193a6cfe14032a77b89055cb3e920fe8c..e38ab609f96cdd7c53bb12d9ee99c033f4d99c9a 100644 --- a/src/components/ui/alert.tsx +++ b/src/components/ui/alert.tsx @@ -1,7 +1,7 @@ import * as React from "react" import { cva, type VariantProps } from "class-variance-authority" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const alertVariants = cva( "relative w-full rounded-lg border border-stone-200 p-4 [&:has(svg)]:pl-11 [&>svg+div]:translate-y-[-3px] [&>svg]:absolute [&>svg]:left-4 [&>svg]:top-4 [&>svg]:text-stone-950 dark:border-stone-800 dark:[&>svg]:text-stone-50", diff --git a/src/components/ui/avatar.tsx b/src/components/ui/avatar.tsx index 88aeea9d9368f2bd7385f0a0885829bf6d789492..dcf6788bf0b2281bd965c4391ebeb46573fb55f2 100644 --- a/src/components/ui/avatar.tsx +++ b/src/components/ui/avatar.tsx @@ -3,7 +3,7 @@ import * as React from "react" import * as AvatarPrimitive from "@radix-ui/react-avatar" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const Avatar = React.forwardRef< React.ElementRef, diff --git a/src/components/ui/badge.tsx b/src/components/ui/badge.tsx index 8a05c5e844f6551efb3b35a0a23c748a9a6639b4..f45954283495c2bdb3076e720f97005957b624f7 100644 --- a/src/components/ui/badge.tsx +++ b/src/components/ui/badge.tsx @@ -1,7 +1,7 @@ import * as React from "react" import { cva, type VariantProps } from "class-variance-authority" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const badgeVariants = cva( "inline-flex items-center rounded-full border border-stone-200 px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-stone-400 focus:ring-offset-2 dark:border-stone-800 dark:focus:ring-stone-800", diff --git a/src/components/ui/button.tsx b/src/components/ui/button.tsx index d0042a291a9dfc9d3ca1bc323f08a3f276df79b5..96f7ff1cde7d1735ae2314b87d9a2edb175c29f0 100644 --- a/src/components/ui/button.tsx +++ b/src/components/ui/button.tsx @@ -2,7 +2,7 @@ import * as React from "react" import { Slot } from "@radix-ui/react-slot" import { cva, type VariantProps } from "class-variance-authority" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const buttonVariants = cva( "inline-flex items-center justify-center rounded-md text-sm font-medium ring-offset-white transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-stone-400 focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 dark:ring-offset-stone-950 dark:focus-visible:ring-stone-800", diff --git a/src/components/ui/card.tsx b/src/components/ui/card.tsx index 6583ebc1bb942bfb94e00fb4e7c7d685073c7b2a..072e3a28994215a8b06423a38e87ba4f91fc7bed 100644 --- a/src/components/ui/card.tsx +++ b/src/components/ui/card.tsx @@ -1,6 +1,6 @@ import * as React from "react" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const Card = React.forwardRef< HTMLDivElement, diff --git a/src/components/ui/checkbox.tsx b/src/components/ui/checkbox.tsx index 5850485b9fecba303bdba1849e5a7b6329300af4..cc57969632512eaf528d7026b2234ab150bd1ee7 100644 --- a/src/components/ui/checkbox.tsx +++ b/src/components/ui/checkbox.tsx @@ -4,7 +4,7 @@ import * as React from "react" import * as CheckboxPrimitive from "@radix-ui/react-checkbox" import { Check } from "lucide-react" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const Checkbox = React.forwardRef< React.ElementRef, diff --git a/src/components/ui/command.tsx b/src/components/ui/command.tsx index a4e602ef2508a071948aef7779023540c9f25381..1412de8ccf3946d754c213ee42b0ec24a5c17c21 100644 --- a/src/components/ui/command.tsx +++ b/src/components/ui/command.tsx @@ -5,7 +5,7 @@ import { DialogProps } from "@radix-ui/react-dialog" import { Command as CommandPrimitive } from "cmdk" import { Search } from "lucide-react" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" import { Dialog, DialogContent } from "@/components/ui/dialog" const Command = React.forwardRef< diff --git a/src/components/ui/dialog.tsx b/src/components/ui/dialog.tsx index cf53b714fe959bf6cfb26db5f4ba6020f6e63b5b..125cf39d949b98007872e0c3c6cdac53c9e77503 100644 --- a/src/components/ui/dialog.tsx +++ b/src/components/ui/dialog.tsx @@ -4,7 +4,7 @@ import * as React from "react" import * as DialogPrimitive from "@radix-ui/react-dialog" import { X } from "lucide-react" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const Dialog = DialogPrimitive.Root diff --git a/src/components/ui/dropdown-menu.tsx b/src/components/ui/dropdown-menu.tsx index 5803489a1d197a9db5018e413e63abe84b2efb8e..3eb85741ef0629381f741f05ea0b815828648bf6 100644 --- a/src/components/ui/dropdown-menu.tsx +++ b/src/components/ui/dropdown-menu.tsx @@ -4,7 +4,7 @@ import * as React from "react" import * as DropdownMenuPrimitive from "@radix-ui/react-dropdown-menu" import { Check, ChevronRight, Circle } from "lucide-react" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const DropdownMenu = DropdownMenuPrimitive.Root diff --git a/src/components/ui/input.tsx b/src/components/ui/input.tsx index 0757ddebdca3800bbd4a46fe1c2c17dff86c5e2f..1c55d2164e846105000383645b51420c19a6cd64 100644 --- a/src/components/ui/input.tsx +++ b/src/components/ui/input.tsx @@ -1,6 +1,6 @@ import * as React from "react" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" export interface InputProps extends React.InputHTMLAttributes {} diff --git a/src/components/ui/label.tsx b/src/components/ui/label.tsx index 534182176bf87f9308355514adc884d2b69750a5..cbeab78ab932bc8e8e61dc231abf88d2223ab7a3 100644 --- a/src/components/ui/label.tsx +++ b/src/components/ui/label.tsx @@ -4,7 +4,7 @@ import * as React from "react" import * as LabelPrimitive from "@radix-ui/react-label" import { cva, type VariantProps } from "class-variance-authority" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const labelVariants = cva( "text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70" diff --git a/src/components/ui/menubar.tsx b/src/components/ui/menubar.tsx index d57454816cea9b7572ad1ae6ab139d6946c4d5d5..602fd73fcdb49d2a1c7ee891b22a2e666b9617ea 100644 --- a/src/components/ui/menubar.tsx +++ b/src/components/ui/menubar.tsx @@ -4,7 +4,7 @@ import * as React from "react" import * as MenubarPrimitive from "@radix-ui/react-menubar" import { Check, ChevronRight, Circle } from "lucide-react" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const MenubarMenu = MenubarPrimitive.Menu diff --git a/src/components/ui/popover.tsx b/src/components/ui/popover.tsx index 8b35ce6d7b0dd78003308b09354e9f7197eb161a..d5d06df346424d067fc364d1f1b60c33e67efb81 100644 --- a/src/components/ui/popover.tsx +++ b/src/components/ui/popover.tsx @@ -3,7 +3,7 @@ import * as React from "react" import * as PopoverPrimitive from "@radix-ui/react-popover" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const Popover = PopoverPrimitive.Root diff --git a/src/components/ui/select.tsx b/src/components/ui/select.tsx index 704239634b359b9e680dab25275e205e72579f82..90ca9d78552389b8dbe56a3494578c2fa4f5140f 100644 --- a/src/components/ui/select.tsx +++ b/src/components/ui/select.tsx @@ -4,7 +4,7 @@ import * as React from "react" import * as SelectPrimitive from "@radix-ui/react-select" import { Check, ChevronDown } from "lucide-react" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const Select = SelectPrimitive.Root diff --git a/src/components/ui/separator.tsx b/src/components/ui/separator.tsx index a6ed83ef827829cf42a7b27d1d5714b4473bd1c5..098cfbdddccd861ca4093c37fd8cab96f2faa437 100644 --- a/src/components/ui/separator.tsx +++ b/src/components/ui/separator.tsx @@ -3,7 +3,7 @@ import * as React from "react" import * as SeparatorPrimitive from "@radix-ui/react-separator" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const Separator = React.forwardRef< React.ElementRef, diff --git a/src/components/ui/slider.tsx b/src/components/ui/slider.tsx index 0e35bc7fb000cffa5e29956283ecf7d75453236c..94963ea52bda9fd0b3a676f869d7ec07229c88b9 100644 --- a/src/components/ui/slider.tsx +++ b/src/components/ui/slider.tsx @@ -3,7 +3,7 @@ import * as React from "react" import * as SliderPrimitive from "@radix-ui/react-slider" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const Slider = React.forwardRef< React.ElementRef, diff --git a/src/components/ui/switch.tsx b/src/components/ui/switch.tsx index 9d1e79dffe05b79b4208570f487e506513430355..d4dfa9c683edf2b99903394835eb784c1127ee85 100644 --- a/src/components/ui/switch.tsx +++ b/src/components/ui/switch.tsx @@ -3,7 +3,7 @@ import * as React from "react" import * as SwitchPrimitives from "@radix-ui/react-switch" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const Switch = React.forwardRef< React.ElementRef, diff --git a/src/components/ui/table.tsx b/src/components/ui/table.tsx index 953fb3c003bc0cd9d93059c373bc23e6aecbded8..d76ba22b1b5ae57c8041e9bc9c52c48e1dcaff14 100644 --- a/src/components/ui/table.tsx +++ b/src/components/ui/table.tsx @@ -1,6 +1,6 @@ import * as React from "react" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const Table = React.forwardRef< HTMLTableElement, diff --git a/src/components/ui/tabs.tsx b/src/components/ui/tabs.tsx index d56f6b79bf6dde6d7e3bd755e808eff542c34b9a..6f98f57974d5395d19c979ece26bc4cdbb02ae8d 100644 --- a/src/components/ui/tabs.tsx +++ b/src/components/ui/tabs.tsx @@ -3,7 +3,7 @@ import * as React from "react" import * as TabsPrimitive from "@radix-ui/react-tabs" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const Tabs = TabsPrimitive.Root diff --git a/src/components/ui/textarea.tsx b/src/components/ui/textarea.tsx index af10d34eeae448c2614c67141f83a8748754332c..16fd7efd5b0b64f2a1680d9543c2b297e5bf6fcb 100644 --- a/src/components/ui/textarea.tsx +++ b/src/components/ui/textarea.tsx @@ -1,6 +1,6 @@ import * as React from "react" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" export interface TextareaProps extends React.TextareaHTMLAttributes {} diff --git a/src/components/ui/toast.tsx b/src/components/ui/toast.tsx index 94b1e9a1d3a82fe1beea6e931c4887e2260371cd..7e0e5ef0de5585224c9b65bac3f38ac599066143 100644 --- a/src/components/ui/toast.tsx +++ b/src/components/ui/toast.tsx @@ -3,7 +3,7 @@ import * as ToastPrimitives from "@radix-ui/react-toast" import { cva, type VariantProps } from "class-variance-authority" import { X } from "lucide-react" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const ToastProvider = ToastPrimitives.Provider diff --git a/src/components/ui/tooltip.tsx b/src/components/ui/tooltip.tsx index 15f831b13198545d236d3d7b2cb62970eb20854c..657d134eb4c4ac3d8cfb89188867c18b3bb41e90 100644 --- a/src/components/ui/tooltip.tsx +++ b/src/components/ui/tooltip.tsx @@ -3,7 +3,7 @@ import * as React from "react" import * as TooltipPrimitive from "@radix-ui/react-tooltip" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const TooltipProvider = TooltipPrimitive.Provider diff --git a/src/components/ui/vertical-slider.tsx b/src/components/ui/vertical-slider.tsx index b28a1200cb06d1f26e3c640c85e655c99e88954e..8350b69d5c6e35fd5bf2119b23b0557d1a9e2acf 100644 --- a/src/components/ui/vertical-slider.tsx +++ b/src/components/ui/vertical-slider.tsx @@ -3,7 +3,7 @@ import * as React from "react" import * as SliderPrimitive from "@radix-ui/react-slider" -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils/cn" const VerticalSlider = React.forwardRef< React.ElementRef, diff --git a/src/app/server/utils/addBase64HeaderToJpeg.ts b/src/lib/base64/addBase64HeaderToJpeg.ts similarity index 100% rename from src/app/server/utils/addBase64HeaderToJpeg.ts rename to src/lib/base64/addBase64HeaderToJpeg.ts diff --git a/src/app/server/utils/addBase64HeaderToPng.ts b/src/lib/base64/addBase64HeaderToPng.ts similarity index 100% rename from src/app/server/utils/addBase64HeaderToPng.ts rename to src/lib/base64/addBase64HeaderToPng.ts diff --git a/src/lib/base64/blobToDataUri.ts b/src/lib/base64/blobToDataUri.ts new file mode 100644 index 0000000000000000000000000000000000000000..42d219bb06c2c638f2e8e9553e0d45d5a33036db --- /dev/null +++ b/src/lib/base64/blobToDataUri.ts @@ -0,0 +1,21 @@ +export async function blobToDataUri(blob: Blob, defaultContentType = ""): Promise { + if (typeof window === "undefined") { + const arrayBuffer = await blob.arrayBuffer() + let buffer = Buffer.from(arrayBuffer) + return "data:" + (defaultContentType || blob.type) + ';base64,' + buffer.toString('base64'); + } else { + return new Promise((resolve, reject) => { + const reader = new FileReader() + reader.onload = _e => { + let dataUri = `${reader.result as string || ""}` + if (defaultContentType) { + dataUri = dataUri.replace("application/octet-stream", defaultContentType) + } + resolve(dataUri) + } + reader.onerror = _e => reject(reader.error) + reader.onabort = _e => reject(new Error("Read aborted")) + reader.readAsDataURL(blob) + }); + } +} \ No newline at end of file diff --git a/src/lib/base64/dataUriToBlob.ts b/src/lib/base64/dataUriToBlob.ts new file mode 100644 index 0000000000000000000000000000000000000000..f60f4250d3ed35f9968a2f1fb18fa0d50940915f --- /dev/null +++ b/src/lib/base64/dataUriToBlob.ts @@ -0,0 +1,15 @@ + +export function dataUriToBlob(dataURI = "", defaultContentType = ""): Blob { + dataURI = dataURI.replace(/^data:/, ''); + + const type = dataURI.match(/(?:image|application|video|audio|text)\/[^;]+/)?.[0] || defaultContentType; + const base64 = dataURI.replace(/^[^,]+,/, ''); + const arrayBuffer = new ArrayBuffer(base64.length); + const typedArray = new Uint8Array(arrayBuffer); + + for (let i = 0; i < base64.length; i++) { + typedArray[i] = base64.charCodeAt(i); + } + + return new Blob([arrayBuffer], { type }); +} \ No newline at end of file diff --git a/src/lib/clap/clap-specification-draft.md b/src/lib/clap/clap-specification-draft.md new file mode 100644 index 0000000000000000000000000000000000000000..88d3106d5099f393453d3bffc6295d1cd7dd9915 --- /dev/null +++ b/src/lib/clap/clap-specification-draft.md @@ -0,0 +1,162 @@ +# CLAP Format Specification + +- Status: DRAFT +- Document revision: 0.0.1 +- Last updated: Feb 6th, 2024 +- Author(s): Julian BILCKE (@flngr) + +## BEFORE YOU READ + +The CLAP format spec is experimental and not finished yet! +There might be inconsistencies, unnecessary redundancies or blatant omissions. + +## What are CLAP files? + +The CLAP format (.clap) is a file format designed for AI video projects. + +It preserves prompts and assets into the same container, making it easier to share an AI video project between different people or applications. + +## Structure + +A CLAP is an array of objects serialized into a YAML text string, then finally compressed using gzip to a binary file. + +The file extension is `.clap` +The mime type is `application/x-yaml` + +There can be 5 different types of objects: + +- one HEADER +- one METADATA +- zero, one or more MODEL(s) +- zero, one or more SCENE(s) +- zero, one or more SEGMENT(s) + +This can be represented in javascript like this: + +```javascript +[ + clapHeader, // one metadata object + clapMeta, // one metadata object + ...clapModels, // optional array of models + ...clapScenes, // optional array of scenes + ...clapSegments // optional array of segments +] +``` + +## Header + +The HEADER provides information about how to decode a CLAP. + +Knowing in advance the number of models, scenes and segments helps the decoder parsing the information, +and in some implementation, help with debugging, logging, and provisioning memory usage. + +However in the future, it is possible that a different scheme is used, in order to support streaming. + +Either by recognizing the shape of each object (fields), or by using a specific field eg. a `_type`. + +```typescript +{ + // used to know which format version is used. + // CLAP is still in development and the format is not fully specified yet, + // during the period most .clap file will have the "clap-0" format + format: "clap-0" + + numberOfModels: number // integer + numberOfScenes: number // integer + numberOfSegments: number // integer +} +``` + +## Metadata + +```typescript +{ + id: string // "" + title: string // "project title" + description: string // "project description" + licence: string // "information about licensing" + + // this provides information about the image ratio + // this might be removed in the final spec, as this + // can be re-computed from width and height + orientation: "landscape" | "vertical" | "square" + + // the expected duration of the project + durationInMs: number + + // the suggested width and height of the video + // note that this is just an indicator, + // and might be superseeded by the application reading the .clap file + width: number // integer between 256 and 8192 (value in pixels) + height: number // integer between 256 and 8192 (value in pixels) + + // name of the suggested video model to use + // note that this is just an indicator, + // and might be superseeded by the application reading the .clap file + defaultVideoModel: string + + // additional prompt to use in the video generation + // this helps adding some magic touch and flair to the videos, + // but perhaps the field should be renamed + extraPositivePrompt: string + + // the screenplay (script) of the video + screenplay: string + + // whether to loop the content by default or not + isLoop: boolean + + // helper to indicate whether the .clap might contain interactive elements + isInteractive: boolean +} +``` + +## Models + +Before talking about models, first we should describe the concept of entity: + +in a story, an entity is something (person, place, vehicle, animal, robot, alien, object) with a name, a description of the appearance, an age, mileage or quality, an origin, and so on. + +An example could be "a giant magical school bus, with appearance of a cat with wheels, and which talks" + +The CLAP model would be an instance (an interpretation) of this entity, where we would assign it an identity: +- a name and age +- a visual style (a photo of the magic school bus cat) +- a voice style +- and maybe other things eg. an origin or background story + +As you can see, it can be difficult to create clearly separated categories, like "vehicule", "character", or "location" +(the magical cat bus could turn into a location in some scene, a speaking character in another etc) + +This is why there is a common schema for all models: + +```typescript +{ + id: string + category: ClapSegmentCategory + triggerName: string + label: string + description: string + author: string + thumbnailUrl: string + seed: number + + assetSourceType: ClapAssetSource + assetUrl: string + + age: number + gender: ClapModelGender + region: ClapModelRegion + appearance: ClapModelAppearance + voiceVendor: ClapVoiceVendor + voiceId: string +} +``` + +## Atomic types + +... + +## TO BE CONTINUED + +(you can read "./types.ts" for more information) \ No newline at end of file diff --git a/src/lib/clap/clapToDataUri.ts b/src/lib/clap/clapToDataUri.ts new file mode 100644 index 0000000000000000000000000000000000000000..c9fc3256b7820bb88c97d7fa07535fd003a74772 --- /dev/null +++ b/src/lib/clap/clapToDataUri.ts @@ -0,0 +1,11 @@ + +import { blobToDataUri } from "@/lib/base64/blobToDataUri" + +import { serializeClap } from "./serializeClap" +import { ClapProject } from "./types" + +export async function clapToDataUri(clap: ClapProject): Promise { + const archive = await serializeClap(clap) + const dataUri = await blobToDataUri(archive, "application/x-gzip") + return dataUri +} \ No newline at end of file diff --git a/src/lib/clap/emptyClap.ts b/src/lib/clap/emptyClap.ts new file mode 100644 index 0000000000000000000000000000000000000000..d3f4178275d5d23495520aa4db978b6abd1cd79f --- /dev/null +++ b/src/lib/clap/emptyClap.ts @@ -0,0 +1,18 @@ +import { newClap } from "./newClap" +import { serializeClap } from "./serializeClap" + +let globalState: { + blob?: Blob +} = { + blob: undefined +} + +export async function getEmptyClap(): Promise { + if (globalState.blob) { return globalState.blob } + + const clap = newClap() + + globalState.blob = await serializeClap(clap) + + return globalState.blob +} \ No newline at end of file diff --git a/src/lib/clap/generateClapFromSimpleStory.ts b/src/lib/clap/generateClapFromSimpleStory.ts new file mode 100644 index 0000000000000000000000000000000000000000..4f9851ca344d9af2c74fc5f2333314be3806c53d --- /dev/null +++ b/src/lib/clap/generateClapFromSimpleStory.ts @@ -0,0 +1,149 @@ +import { newClap } from "./newClap" +import { newSegment } from "./newSegment" +import { ClapProject } from "./types" + +let defaultSegmentDurationInMs = 2500 // 2584 + +const fishDemoStory = [ + "Siamese fighting fish, bokeh, underwater, coral, lively, bubbles, translucency, perfect", + + // this one is magnificient! + "princess parrot fish, bokeh, underwater, coral, lively, bubbles, translucency, perfect", + + "pacific ocean perch, bokeh, underwater, coral, lively, bubbles, translucency, perfect", + + "Queen angelfish, bokeh, underwater, coral, lively, bubbles, translucency, perfect", + + "sea turtle, bokeh, underwater, coral, lively, bubbles, translucency, perfect", + + "hippocampus, bokeh, underwater, coral, lively, bubbles, translucency, perfect", +] + +let demoStory = [ + ...fishDemoStory, + + // "portrait of one man news anchor, 60yo, thin, fit, american, mustache, beard, wearing a suit, medium-shot, central park, outside, serious, bokeh, perfect", + + // "screenshot from Call of Duty, FPS game, nextgen, videogame screenshot, unreal engine, raytracing, perfect", + + // "screenshot from a flight simulator, nextgen, videogame screenshot, unreal engine, raytracing, perfect", + // "screenshot from fallout3, fallout4, western, wasteland, 3rd person RPG, nextgen, videogame screenshot, unreal engine, raytracing, perfect", + // "portrait of single influencer woman, 30yo, thin, fit, american, wearing a red tshirt, medium-shot, central park, outside, serious, bokeh, perfect", +] + + +export function generateClapFromSimpleStory({ + story = demoStory, + showIntroPoweredByEngine = false, + showIntroDisclaimerAboutAI = false, +}: { + story?: string[] + showIntroPoweredByEngine?: boolean + showIntroDisclaimerAboutAI?: boolean +} = { + story: demoStory, + showIntroPoweredByEngine: false, + showIntroDisclaimerAboutAI: false, +}): ClapProject { + + const clap = newClap({ + meta: { + title: "Interactive Demo", + isInteractive: true, + isLoop: true, + } + }) + + let currentElapsedTimeInMs = 0 + let currentSegmentDurationInMs = defaultSegmentDurationInMs + + if (showIntroPoweredByEngine) { + clap.segments.push(newSegment({ + startTimeInMs: currentElapsedTimeInMs, + endTimeInMs: currentSegmentDurationInMs, + category: "interface", + prompt: "", + label: "disclaimer", + outputType: "interface", + })) + currentElapsedTimeInMs += currentSegmentDurationInMs + } + + if (showIntroDisclaimerAboutAI) { + clap.segments.push(newSegment({ + startTimeInMs: currentElapsedTimeInMs, + endTimeInMs: currentSegmentDurationInMs, + category: "interface", + prompt: "", + label: "disclaimer", + outputType: "interface", + })) + currentElapsedTimeInMs += currentSegmentDurationInMs + } + + /* + clap.segments.push( + newSegment({ + // id: string + // track: number + startTimeInMs: currentElapsedTimeInMs, + endTimeInMs: currentSegmentDurationInMs, + category: "interface", + // modelId: string + // sceneId: string + prompt: "a hello world", + label: "hello world", + outputType: "interface" + // renderId: string + // status: ClapSegmentStatus + // assetUrl: string + // assetDurationInMs: number + // createdBy: ClapAuthor + // editedBy: ClapAuthor + // outputGain: number + // seed: number + }) + ) + + currentElapsedTimeInMs += currentSegmentDurationInMs + */ + + + + for (let prompt of story) { + + clap.segments.push(newSegment({ + track: 0, + startTimeInMs: currentElapsedTimeInMs, + endTimeInMs: currentSegmentDurationInMs, + category: "video", + prompt: "", + label: "video", + outputType: "video", + })) + clap.segments.push(newSegment({ + track: 1, + startTimeInMs: currentElapsedTimeInMs, + endTimeInMs: currentSegmentDurationInMs, + category: "generic", + prompt, + label: prompt, + outputType: "text", + })) + clap.segments.push(newSegment({ + track: 2, + startTimeInMs: currentElapsedTimeInMs, + endTimeInMs: currentSegmentDurationInMs, + category: "camera", + prompt: "medium-shot", + label: "medium-shot", + outputType: "text", + })) + + currentElapsedTimeInMs += currentSegmentDurationInMs + } + + clap.meta.durationInMs = currentElapsedTimeInMs + + return clap +} \ No newline at end of file diff --git a/src/lib/clap/getClapAssetSource.ts b/src/lib/clap/getClapAssetSource.ts new file mode 100644 index 0000000000000000000000000000000000000000..285de2d8263058b6073709998aadd4eef8164473 --- /dev/null +++ b/src/lib/clap/getClapAssetSource.ts @@ -0,0 +1,25 @@ +import { ClapAssetSource } from "./types" + +export function getClapAssetSourceType(input: string = ""): ClapAssetSource { + + const str = `${input || ""}`.trim() + + if (!str || !str.length) { + return "EMPTY" + } + + if (str.startsWith("https://") || str.startsWith("http://")) { + return "REMOTE" + } + + // note that "path" assets are potentially a security risk, they need to be treated with care + if (str.startsWith("/") || str.startsWith("../") || str.startsWith("./")) { + return "PATH" + } + + if (str.startsWith("data:")) { + return "DATA" + } + + return "PROMPT" +} \ No newline at end of file diff --git a/src/lib/clap/newClap.ts b/src/lib/clap/newClap.ts new file mode 100644 index 0000000000000000000000000000000000000000..5aca45e037f4476e21a814b818049b71ba19f7a4 --- /dev/null +++ b/src/lib/clap/newClap.ts @@ -0,0 +1,37 @@ + +import { v4 as uuidv4 } from "uuid" + +import { ClapMeta, ClapModel, ClapProject, ClapScene, ClapSegment } from "./types" +import { getValidNumber } from "@/lib/utils/getValidNumber" + +// generate an empty clap file, or copy one from a source +export function newClap(clap: { + meta?: Partial + models?: ClapModel[] + scenes?: ClapScene[] + segments?: ClapSegment[] + } = {}): ClapProject { + + const meta: ClapMeta = { + id: clap?.meta?.id === "string" ? clap.meta.id : uuidv4(), + title: clap?.meta?.title === "string" ? clap.meta.title : "", + description: typeof clap?.meta?.description === "string" ? clap.meta.description : "", + synopsis: typeof clap?.meta?.synopsis === "string" ? clap.meta.synopsis : "", + licence: typeof clap?.meta?.licence === "string" ? clap.meta.licence : "", + orientation: clap?.meta?.orientation === "portrait" ? "portrait" : clap?.meta?.orientation === "square" ? "square" : "landscape", + durationInMs: getValidNumber(clap?.meta?.durationInMs, 1000, Number.MAX_SAFE_INTEGER, 4000), + width: getValidNumber(clap?.meta?.width, 256, 8192, 1024), + height: getValidNumber(clap?.meta?.height, 256, 8192, 576), + defaultVideoModel: typeof clap?.meta?.defaultVideoModel === "string" ? clap.meta.defaultVideoModel : "SVD", + extraPositivePrompt: Array.isArray(clap?.meta?.extraPositivePrompt) ? clap.meta.extraPositivePrompt : [], + screenplay: typeof clap?.meta?.screenplay === "string" ? clap.meta.screenplay : "", + isLoop: typeof clap?.meta?.isLoop === "boolean" ? clap.meta.isLoop : false, + isInteractive: typeof clap?.meta?.isInteractive === "boolean" ? clap.meta.isInteractive : false, + } + + const models: ClapModel[] = clap?.models && Array.isArray(clap.models) ? clap.models : [] + const scenes: ClapScene[] = clap?.scenes && Array.isArray(clap.scenes) ? clap.scenes : [] + const segments: ClapSegment[] = clap?.segments && Array.isArray(clap.segments) ? clap.segments : [] + + return { meta, models, scenes, segments } +} diff --git a/src/lib/clap/newSegment.ts b/src/lib/clap/newSegment.ts new file mode 100644 index 0000000000000000000000000000000000000000..404617eb24924a386e52d974a14c291e7c9d76fe --- /dev/null +++ b/src/lib/clap/newSegment.ts @@ -0,0 +1,46 @@ +import { v4 as uuidv4 } from "uuid" + +import { generateSeed } from "../utils/generateSeed" +import { ClapSegment } from "./types" +import { isValidNumber } from "../utils/isValidNumber" + +export function newSegment(maybeSegment?: Partial) { + + const startTimeInMs = + isValidNumber(maybeSegment?.startTimeInMs) + ? (maybeSegment?.startTimeInMs || 0) + : 0 + + const assetDurationInMs = + isValidNumber(maybeSegment?.assetDurationInMs) + ? (maybeSegment?.assetDurationInMs || 0) + : 1000 + + const endTimeInMs = + isValidNumber(maybeSegment?.endTimeInMs) + ? (maybeSegment?.endTimeInMs || 0) + : (startTimeInMs + assetDurationInMs) + + const segment: ClapSegment = { + id: typeof maybeSegment?.id === "string" ? maybeSegment.id : uuidv4(), + track: isValidNumber(maybeSegment?.track) ? (maybeSegment?.track || 0) : 0, + startTimeInMs, + endTimeInMs, + category: typeof maybeSegment?.category === "string" ? maybeSegment.category : "generic", + modelId: typeof maybeSegment?.modelId === "string" ? maybeSegment.modelId : "", + sceneId: typeof maybeSegment?.sceneId === "string" ? maybeSegment.sceneId : "", + prompt: typeof maybeSegment?.prompt === "string" ? maybeSegment.prompt : "", + label: typeof maybeSegment?.label === "string" ? maybeSegment.label : "", + outputType: typeof maybeSegment?.outputType === "string" ? maybeSegment.outputType : "text", + renderId: typeof maybeSegment?.renderId === "string" ? maybeSegment.renderId : "", + status: typeof maybeSegment?.status === "string" ? maybeSegment.status : "to_generate", + assetUrl: typeof maybeSegment?.assetUrl === "string" ? maybeSegment.assetUrl : "", + assetDurationInMs, + createdBy: typeof maybeSegment?.createdBy === "string" ? maybeSegment.createdBy : "ai", + editedBy: typeof maybeSegment?.editedBy === "string" ? maybeSegment.editedBy : "ai", + outputGain: isValidNumber(maybeSegment?.outputGain) ? (maybeSegment?.outputGain || 0) : 0, + seed: isValidNumber(maybeSegment?.seed) ? (maybeSegment?.seed || 0) : generateSeed() + } + + return segment +} \ No newline at end of file diff --git a/src/lib/clap/parseClap.ts b/src/lib/clap/parseClap.ts new file mode 100644 index 0000000000000000000000000000000000000000..9b619b05781058f0f361cab87f6b572a66993acd --- /dev/null +++ b/src/lib/clap/parseClap.ts @@ -0,0 +1,319 @@ +import YAML from "yaml" +import { v4 as uuidv4 } from "uuid" + +import { ClapHeader, ClapMeta, ClapModel, ClapProject, ClapScene, ClapSegment } from "./types" +import { dataUriToBlob } from "@/lib/base64/dataUriToBlob" +import { getValidNumber } from "@/lib/utils/getValidNumber" + +type StringOrBlob = string | Blob + +/** + * Import a clap file from various data sources into an ClapProject + * + * Inputs can be: + * - a Clap project (which is an object) + * - an URL to a remote .clap file + * - a string containing a YAML array + * - a data uri containing a gzipped YAML array + * - a Blob containing a gzipped YAML array + * + * note: it is not really async, because for some reason YAML.parse is a blocking call like for JSON, + * there is no async version although we are now in the 20s not 90s + */ +export async function parseClap(src?: ClapProject | string | Blob, debug = false): Promise { + + try { + if ( + typeof src === "object" && + Array.isArray( (src as any)?.scenes) && + Array.isArray((src as any)?.models) + ) { + if (debug) { + console.log("parseClap: input is already a Clap file, nothing to do:", src) + } + // we can skip verification + return src as ClapProject + } + } catch (err) { + // well, this is not a clap project + } + + let stringOrBlob = (src || "") as StringOrBlob + + // both should work + const dataUriHeader1 = "data:application/x-gzip;base64," + const dataUriHeader2 = "data:application/octet-stream;base64," + + const inputIsString = typeof stringOrBlob === "string" + const inputIsDataUri = typeof stringOrBlob === "string" ? stringOrBlob.startsWith(dataUriHeader1) || stringOrBlob.startsWith(dataUriHeader2) : false + const inputIsRemoteFile = typeof stringOrBlob === "string" ? (stringOrBlob.startsWith("http://") || stringOrBlob.startsWith("https://")) : false + + let inputIsBlob = typeof stringOrBlob !== "string" + + let inputYamlArrayString = "" + + if (debug) { + console.log(`parseClap: pre-analysis: ${JSON.stringify({ + inputIsString, + inputIsBlob, + inputIsDataUri, + inputIsRemoteFile + }, null, 2)}`) + } + + if (typeof stringOrBlob === "string") { + if (debug) { + console.log("parseClap: input is a string ", stringOrBlob.slice(0, 120)) + } + if (inputIsDataUri) { + if (debug) { + console.log(`parseClap: input is a data uri archive`) + } + stringOrBlob = dataUriToBlob(stringOrBlob, "application/x-gzip") + if (debug) { + console.log(`parseClap: inputBlob = `, stringOrBlob) + } + inputIsBlob = true + } else if (inputIsRemoteFile) { + try { + if (debug) { + console.log(`parseClap: input is a remote .clap file`) + } + const res = await fetch(stringOrBlob) + stringOrBlob = await res.blob() + if (!stringOrBlob) { throw new Error("blob is empty") } + inputIsBlob = true + } catch (err) { + // url seems invalid + throw new Error(`failed to download the .clap file (${err})`) + } + } else { + if (debug) { + console.log("parseClap: input is a text string containing a YAML array") + } + inputYamlArrayString = stringOrBlob + inputIsBlob = false + } + } + + if (typeof stringOrBlob !== "string" && stringOrBlob) { + if (debug) { + console.log("parseClap: decompressing the blob..") + } + // Decompress the input blob using gzip + const decompressedStream = stringOrBlob.stream().pipeThrough(new DecompressionStream('gzip')) + + try { + // Convert the stream to text using a Response object + const decompressedOutput = new Response(decompressedStream) + // decompressedOutput.headers.set("Content-Type", "application/x-gzip") + if (debug) { + console.log("parseClap: decompressedOutput: ", decompressedOutput) + } + // const blobAgain = await decompressedOutput.blob() + inputYamlArrayString = await decompressedOutput.text() + + if (debug && inputYamlArrayString) { + console.log("parseClap: successfully decompressed the blob!") + } + } catch (err) { + const message = `parseClap: failed to decompress (${err})` + console.error(message) + throw new Error(message) + } + } + + // we don't need this anymore I think + // new Blob([inputStringOrBlob], { type: "application/x-yaml" }) + + let maybeArray: any = {} + try { + if (debug) { + console.log("parseClap: parsing the YAML array..") + } + // Parse YAML string to raw data + maybeArray = YAML.parse(inputYamlArrayString) + } catch (err) { + throw new Error("invalid clap file (input string is not YAML)") + } + + if (!Array.isArray(maybeArray) || maybeArray.length < 2) { + throw new Error("invalid clap file (need a clap format header block and project metadata block)") + } + + if (debug) { + console.log("parseClap: the YAML seems okay, continuing decoding..") + } + + const maybeClapHeader = maybeArray[0] as ClapHeader + + if (maybeClapHeader.format !== "clap-0") { + throw new Error("invalid clap file (sorry, but you can't make up version numbers like that)") + } + + + const maybeClapMeta = maybeArray[1] as ClapMeta + + const clapMeta: ClapMeta = { + id: typeof maybeClapMeta.title === "string" ? maybeClapMeta.id : uuidv4(), + title: typeof maybeClapMeta.title === "string" ? maybeClapMeta.title : "", + description: typeof maybeClapMeta.description === "string" ? maybeClapMeta.description : "", + synopsis: typeof maybeClapMeta.synopsis === "string" ? maybeClapMeta.synopsis : "", + licence: typeof maybeClapMeta.licence === "string" ? maybeClapMeta.licence : "", + orientation: maybeClapMeta.orientation === "portrait" ? "portrait" : maybeClapMeta.orientation === "square" ? "square" : "landscape", + durationInMs: getValidNumber(maybeClapMeta.durationInMs, 1000, Number.MAX_SAFE_INTEGER, 4000), + width: getValidNumber(maybeClapMeta.width, 128, 8192, 1024), + height: getValidNumber(maybeClapMeta.height, 128, 8192, 576), + defaultVideoModel: typeof maybeClapMeta.defaultVideoModel === "string" ? maybeClapMeta.defaultVideoModel : "SVD", + extraPositivePrompt: Array.isArray(maybeClapMeta.extraPositivePrompt) ? maybeClapMeta.extraPositivePrompt : [], + screenplay: typeof maybeClapMeta.screenplay === "string" ? maybeClapMeta.screenplay : "", + isLoop: typeof maybeClapMeta.isLoop === "boolean" ? maybeClapMeta.isLoop : false, + isInteractive: typeof maybeClapMeta.isInteractive === "boolean" ? maybeClapMeta.isInteractive : false, + } + + /* + in case we want to support streaming (mix of models and segments etc), we could do it this way: + + const maybeModelsOrSegments = rawData.slice(2) + maybeModelsOrSegments.forEach((unknownElement: any) => { + if (isValidNumber(unknownElement?.track)) { + maybeSegments.push(unknownElement as ClapSegment) + } else { + maybeModels.push(unknownElement as ClapModel) + } + }) + */ + + + const expectedNumberOfModels = maybeClapHeader.numberOfModels || 0 + const expectedNumberOfScenes = maybeClapHeader.numberOfScenes || 0 + const expectedNumberOfSegments = maybeClapHeader.numberOfSegments || 0 + + // note: we assume the order is strictly enforced! + // if you implement streaming (mix of models and segments) you will have to rewrite this! + + const afterTheHeaders = 2 + const afterTheModels = afterTheHeaders + expectedNumberOfModels + + const afterTheScenes = afterTheModels + expectedNumberOfScenes + + // note: if there are no expected models, maybeModels will be empty + const maybeModels = maybeArray.slice(afterTheHeaders, afterTheModels) as ClapModel[] + + // note: if there are no expected scenes, maybeScenes will be empty + const maybeScenes = maybeArray.slice(afterTheModels, afterTheScenes) as ClapScene[] + + const maybeSegments = maybeArray.slice(afterTheScenes) as ClapSegment[] + + const clapModels: ClapModel[] = maybeModels.map(({ + id, + category, + triggerName, + label, + description, + author, + thumbnailUrl, + seed, + assetSourceType, + assetUrl, + age, + gender, + region, + appearance, + voiceVendor, + voiceId, + }) => ({ + // TODO: we should verify each of those, probably + id, + category, + triggerName, + label, + description, + author, + thumbnailUrl, + seed, + assetSourceType, + assetUrl, + age, + gender, + region, + appearance, + voiceVendor, + voiceId, + })) + + const clapScenes: ClapScene[] = maybeScenes.map(({ + id, + scene, + line, + rawLine, + sequenceFullText, + sequenceStartAtLine, + sequenceEndAtLine, + startAtLine, + endAtLine, + events, + }) => ({ + id, + scene, + line, + rawLine, + sequenceFullText, + sequenceStartAtLine, + sequenceEndAtLine, + startAtLine, + endAtLine, + events: events.map(e => e) + })) + + const clapSegments: ClapSegment[] = maybeSegments.map(({ + id, + track, + startTimeInMs, + endTimeInMs, + category, + modelId, + sceneId, + prompt, + label, + outputType, + renderId, + status, + assetUrl, + assetDurationInMs, + createdBy, + editedBy, + outputGain, + seed, + }) => ({ + // TODO: we should verify each of those, probably + id, + track, + startTimeInMs, + endTimeInMs, + category, + modelId, + sceneId, + prompt, + label, + outputType, + renderId, + status, + assetUrl, + assetDurationInMs, + createdBy, + editedBy, + outputGain, + seed, + })) + + if (debug) { + console.log(`parseClap: successfully parsed ${clapModels.length} models, ${clapScenes.length} scenes and ${clapSegments.length} segments`) + } + return { + meta: clapMeta, + models: clapModels, + scenes: clapScenes, + segments: clapSegments + } +} diff --git a/src/lib/clap/serializeClap.ts b/src/lib/clap/serializeClap.ts new file mode 100644 index 0000000000000000000000000000000000000000..eab688c487f37661fe5b3d399b0413c464469f6b --- /dev/null +++ b/src/lib/clap/serializeClap.ts @@ -0,0 +1,169 @@ +import YAML from "yaml" +import { v4 as uuidv4 } from "uuid" + +import { getValidNumber } from "@/lib/utils/getValidNumber" + +import { ClapHeader, ClapMeta, ClapModel, ClapProject, ClapScene, ClapSegment } from "./types" + +export async function serializeClap({ + meta, // ClapMeta + models, // ClapModel[] + scenes, // ClapScene[] + segments, // ClapSegment[] +}: ClapProject): Promise { + + // we play it safe, and we verify the structure of the parameters, + // to make sure we generate a valid clap file + const clapModels: ClapModel[] = models.map(({ + id, + category, + triggerName, + label, + description, + author, + thumbnailUrl, + seed, + assetSourceType, + assetUrl, + age, + gender, + region, + appearance, + voiceVendor, + voiceId, + }) => ({ + id, + category, + triggerName, + label, + description, + author, + thumbnailUrl, + seed, + assetSourceType, + assetUrl, + age, + gender, + region, + appearance, + voiceVendor, + voiceId, + })) + + const clapScenes: ClapScene[] = scenes.map(({ + id, + scene, + line, + rawLine, + sequenceFullText, + sequenceStartAtLine, + sequenceEndAtLine, + startAtLine, + endAtLine, + events, + }) => ({ + id, + scene, + line, + rawLine, + sequenceFullText, + sequenceStartAtLine, + sequenceEndAtLine, + startAtLine, + endAtLine, + events: events.map(e => e) + })) + + const clapSegments: ClapSegment[] = segments.map(({ + id, + track, + startTimeInMs, + endTimeInMs, + category, + modelId, + sceneId, + prompt, + label, + outputType, + renderId, + status, + assetUrl, + assetDurationInMs, + createdBy, + editedBy, + outputGain, + seed, + }) => ({ + id, + track, + startTimeInMs, + endTimeInMs, + category, + modelId, + sceneId, + prompt, + label, + outputType, + renderId, + status, + assetUrl, + assetDurationInMs, + createdBy, + editedBy, + outputGain, + seed, + })) + + const clapHeader: ClapHeader = { + format: "clap-0", + numberOfModels: clapModels.length, + numberOfScenes: clapScenes.length, + numberOfSegments: clapSegments.length, + } + + const clapMeta: ClapMeta = { + id: meta.id || uuidv4(), + title: typeof meta.title === "string" ? meta.title : "Untitled", + description: typeof meta.description === "string" ? meta.description : "", + synopsis: typeof meta.synopsis === "string" ? meta.synopsis : "", + licence: typeof meta.licence === "string" ? meta.licence : "", + orientation: meta.orientation === "portrait" ? "portrait" : meta.orientation === "square" ? "square" : "landscape", + durationInMs: getValidNumber(meta.durationInMs, 1000, Number.MAX_SAFE_INTEGER, 4000), + width: getValidNumber(meta.width, 256, 8192, 1024), + height: getValidNumber(meta.height, 256, 8192, 576), + defaultVideoModel: typeof meta.defaultVideoModel === "string" ? meta.defaultVideoModel : "SVD", + extraPositivePrompt: Array.isArray(meta.extraPositivePrompt) ? meta.extraPositivePrompt : [], + screenplay: typeof meta.screenplay === "string" ? meta.screenplay : "", + isLoop: typeof meta.screenplay === "boolean" ? meta.screenplay : false, + isInteractive: typeof meta.isInteractive === "boolean" ? meta.isInteractive : false, + } + + const entries = [ + clapHeader, + clapMeta, + ...clapModels, + ...clapScenes, + ...clapSegments + ] + + const strigifiedResult = YAML.stringify(entries) + + // Convert the string to a Blob + const blobResult = new Blob([strigifiedResult], { type: "application/x-yaml" }) + + // Create a stream for the blob + const readableStream = blobResult.stream() + + // Compress the stream using gzip + const compressionStream = new CompressionStream('gzip') + const compressedStream = readableStream.pipeThrough(compressionStream) + + // Create a new blob from the compressed stream + const response = new Response(compressedStream) + + response.headers.set("Content-Type", "application/x-gzip") + + const compressedBlob = await response.blob() + + return compressedBlob +} \ No newline at end of file diff --git a/src/lib/clap/types.ts b/src/lib/clap/types.ts new file mode 100644 index 0000000000000000000000000000000000000000..15e4dcbab07659ce5e281cd051a7c91ff02d4d56 --- /dev/null +++ b/src/lib/clap/types.ts @@ -0,0 +1,203 @@ + +export type ClapSegmentCategory = + | "splat" + | "mesh" + | "depth" + | "event" + | "interface" + | "phenomenon" + | "video" + | "storyboard" + | "transition" + | "characters" + | "location" + | "time" + | "era" + | "lighting" + | "weather" + | "action" + | "music" + | "sound" + | "dialogue" + | "style" + | "camera" + | "generic" + +export type ClapOutputType = + | "text" + | "animation" + | "interface" + | "event" + | "phenomenon" + | "transition" + | "image" + | "video" + | "audio" + +export type ClapSegmentStatus = + | "to_generate" + | "to_interpolate" + | "to_upscale" + | "completed" + | "error" + +export type ClapAuthor = + | "auto" // the element was edited automatically using basic if/else logical rules + | "ai" // the element was edited using a large language model + | "human" // the element was edited by a human + +export type ClapAssetSource = + | "REMOTE" // http:// or https:// + + // note that "path" assets are potentially a security risk, they need to be treated with care + | "PATH" // a file path eg. /path or ./path/to/ or ../path/to/ + + | "DATA" // a data URI, starting with data: + + | "PROMPT" // by default, a plain text prompt + + | "EMPTY" + +export type ClapModelGender = + | "male" + | "female" + | "person" + | "object" + +export type ClapModelAppearance = "serious" | "neutral" | "friendly" | "chill" + +// this is used for accent, style.. +export type ClapModelRegion = + | "american" + | "british" + | "australian" + | "canadian" + | "indian" + | "french" + | "italian" + | "german" + | "chinese" + +// note: this is all very subjective, so please use good judgment +// +// "deep" might indicate a deeper voice tone, thicker, rich in harmonics +// in this context, it is used to indicate voices that could +// be associated with African American (AADOS) characters +// +// "high" could be used for some other countries, eg. asia +export type ClapModelTimbre = "high" | "neutral" | "deep" + +export type ClapVoiceVendor = "ElevenLabs" | "XTTS" + +export type ClapVoice = { + name: string + gender: ClapModelGender + age: number + region: ClapModelRegion + timbre: ClapModelTimbre + appearance: ClapModelAppearance + voiceVendor: ClapVoiceVendor + voiceId: string +} + +export type ClapHeader = { + format: "clap-0" + numberOfModels: number + numberOfScenes: number + numberOfSegments: number +} + +export type ClapMeta = { + id: string + title: string + description: string + synopsis: string + licence: string + orientation: string + + // the default duration of the experience + // the real one might last longer if made interactive + durationInMs: number + + width: number + height: number + defaultVideoModel: string + extraPositivePrompt: string[] + screenplay: string + isLoop: boolean + isInteractive: boolean +} + +export type ClapSceneEvent = { + id: string + type: "description" | "dialogue" | "action" + character?: string + description: string + behavior: string + startAtLine: number + endAtLine: number +} + +export type ClapScene = { + id: string + scene: string + line: string + rawLine: string + sequenceFullText: string + sequenceStartAtLine: number + sequenceEndAtLine: number + startAtLine: number + endAtLine: number + events: ClapSceneEvent[] +} + +export type ClapSegment = { + id: string + track: number // usually track 0 is the video, track 1 is the storyboard, track 2 is the camera + startTimeInMs: number + endTimeInMs: number + category: ClapSegmentCategory + modelId: string + sceneId: string + prompt: string + label: string + outputType: ClapOutputType + renderId: string + status: ClapSegmentStatus + assetUrl: string + assetDurationInMs: number + createdBy: ClapAuthor + editedBy: ClapAuthor + outputGain: number + seed: number +} + +export type ClapModel = { + id: string + category: ClapSegmentCategory + triggerName: string + label: string + description: string + author: string + thumbnailUrl: string + seed: number + + assetSourceType: ClapAssetSource + assetUrl: string + + // those are only used by certain types of models + age: number + gender: ClapModelGender + region: ClapModelRegion + appearance: ClapModelAppearance + voiceVendor: ClapVoiceVendor + voiceId: string +} + +export type ClapProject = { + meta: ClapMeta + models: ClapModel[] + scenes: ClapScene[] + segments: ClapSegment[] + // let's keep room for other stuff (screenplay etc) +} diff --git a/src/lib/invertImage.ts b/src/lib/invertImage.ts deleted file mode 100644 index c3464a9ff23f187121bcb87076888115a30d85e5..0000000000000000000000000000000000000000 --- a/src/lib/invertImage.ts +++ /dev/null @@ -1,33 +0,0 @@ -export async function invertImage(base64Image: string): Promise { - return new Promise((resolve, reject) => { - const img = new Image(); - img.src = base64Image; - img.onerror = reject; - img.onload = () => { - const canvas = document.createElement('canvas'); - const ctx = canvas.getContext('2d'); - - if (!ctx) { - reject('Unable to get canvas context'); - return; - } - - canvas.width = img.width; - canvas.height = img.height; - - ctx.drawImage(img, 0, 0, img.width, img.height); - - const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height); - - for (let i = 0; i < imageData.data.length; i += 4) { - imageData.data[i] = 255 - imageData.data[i]; // red - imageData.data[i + 1] = 255 - imageData.data[i + 1]; // green - imageData.data[i + 2] = 255 - imageData.data[i + 2]; // blue - } - - ctx.putImageData(imageData, 0, 0); - - resolve(canvas.toDataURL('image/png')); - }; - }); -} \ No newline at end of file diff --git a/src/lib/mergeLayers.ts b/src/lib/mergeLayers.ts deleted file mode 100644 index 59b5064364f95bf9ed981ac56ab5b2b9409c7d5a..0000000000000000000000000000000000000000 --- a/src/lib/mergeLayers.ts +++ /dev/null @@ -1,38 +0,0 @@ -/** - * This merges multiple layers into one image - * - * @param layersInBase64 - * @returns - */ -export async function mergeLayers(layersInBase64: string[]): Promise { - let canvas = document.createElement('canvas'); - let ctx = canvas.getContext('2d'); - - if (!ctx) { throw new Error(`couldn't get the 2D context`) } - - const loadImage = (src: string): Promise => { - return new Promise((resolve, reject) => { - const img = new Image(); - img.onload = () => resolve(img); - img.onerror = reject; - img.src = src; - }); - }; - - for (const base64Image of layersInBase64) { - const image = await loadImage(base64Image); - - // the default is 'source-over' and it would make sense if the background is transparent, - // but I think right now we want as a hack to use maybe 'overlay' - // ctx.globalCompositeOperation = 'source-over'; - ctx.globalCompositeOperation = 'overlay'; - - //if (!canvas.width || !canvas.height) { - canvas.width = image.width; - canvas.height = image.height; - //} - ctx.drawImage(image, 0, 0); - } - - return canvas.toDataURL(); -} \ No newline at end of file diff --git a/src/lib/utils.ts b/src/lib/utils/cn.ts similarity index 100% rename from src/lib/utils.ts rename to src/lib/utils/cn.ts diff --git a/src/lib/computePercentage.ts b/src/lib/utils/computePercentage.ts similarity index 100% rename from src/lib/computePercentage.ts rename to src/lib/utils/computePercentage.ts diff --git a/src/lib/config.ts b/src/lib/utils/config.ts similarity index 100% rename from src/lib/config.ts rename to src/lib/utils/config.ts diff --git a/src/lib/generateSeed.ts b/src/lib/utils/generateSeed.ts similarity index 100% rename from src/lib/generateSeed.ts rename to src/lib/utils/generateSeed.ts diff --git a/src/lib/utils/getValidNumber.ts b/src/lib/utils/getValidNumber.ts new file mode 100644 index 0000000000000000000000000000000000000000..841fc4afab03b33fbc6e6349b41d9099a3073cb3 --- /dev/null +++ b/src/lib/utils/getValidNumber.ts @@ -0,0 +1,10 @@ +export const getValidNumber = (something: any, minValue: number, maxValue: number, defaultValue: number) => { + const strValue = `${something || defaultValue}` + const numValue = Number(strValue) + const isValid = !isNaN(numValue) && isFinite(numValue) + if (!isValid) { + return defaultValue + } + return Math.max(minValue, Math.min(maxValue, numValue)) + +} \ No newline at end of file diff --git a/src/lib/utils/isValidNumber.ts b/src/lib/utils/isValidNumber.ts new file mode 100644 index 0000000000000000000000000000000000000000..a4e24e3aae75cd7851509aa8533cad1858b39792 --- /dev/null +++ b/src/lib/utils/isValidNumber.ts @@ -0,0 +1,7 @@ +export function isValidNumber(input: any) { + return ( + typeof (input) === "number" && + isFinite(input) && + !isNaN(input) + ) +} \ No newline at end of file diff --git a/src/lib/pick.ts b/src/lib/utils/pick.ts similarity index 100% rename from src/lib/pick.ts rename to src/lib/utils/pick.ts diff --git a/src/lib/sleep.ts b/src/lib/utils/sleep.ts similarity index 100% rename from src/lib/sleep.ts rename to src/lib/utils/sleep.ts