import gradio as gr import cv2 import torch import numpy as np from diffusers import StableDiffusionPipeline,AutoPipelineForImage2Image,AutoencoderTiny from transformers import AutoProcessor, AutoModel, AutoTokenizer from PIL import Image # # # def predict(prompt, frame): # generator = torch.manual_seed(params.seed) # steps = params.steps # strength = params.strength # if int(steps * strength) < 1: # steps = math.ceil(1 / max(0.10, strength)) # # prompt = params.prompt # prompt_embeds = None # # results = self.pipe( # image=frame, # prompt_embeds=prompt_embeds, # prompt=prompt, # negative_prompt=params.negative_prompt, # generator=generator, # strength=strength, # num_inference_steps=steps, # guidance_scale=1.1, # width=params.width, # height=params.height, # output_type="pil", # ) # # nsfw_content_detected = ( # results.nsfw_content_detected[0] # if "nsfw_content_detected" in results # else False # ) # if nsfw_content_detected: # return None # result_image = results.images[0] # # return result_image # # def process_frame(frame, prompt="A futuristic landscape"): # """Process a single frame using the real-time latent consistency model.""" # # # Convert frame to PIL image # image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).resize((512, 512)) # # # Apply Real-Time Latent Consistency Model # result = realtime_pipe(prompt=prompt, image=image, strength=0.5, guidance_scale=7.5).images[0] # return np.array(result) def video_stream(prompt): """Captures video feed from webcam and sends to the AI model.""" cap = cv2.VideoCapture(0) while cap.isOpened(): ret, frame = cap.read() if not ret: break frame = process_frame(frame, prompt) yield frame # Return processed frame cap.release() # Create Gradio App with gr.Blocks() as demo: gr.Markdown("## 🎨 Real-Time AI-Enhanced Webcam using Latent Consistency Model (LCM)") with gr.Row(): webcam_feed = gr.Camera(streaming=True, label="Live Webcam") processed_image = gr.Image(label="AI-Enhanced Webcam Feed") with gr.Row(): canvas_output = gr.Image(interactive=True, label="Canvas - Processed Image Output") prompt_input = gr.Textbox(label="Real-Time LCM Prompt", value="A futuristic landscape") start_button = gr.Button("Start Real-Time AI Enhancement") #start_button.click(fn=video_stream, inputs=[prompt_input], outputs=[processed_image, canvas_output]) demo.launch(share=True)