import gradio as gr import cv2 import torch import numpy as np from diffusers import StableDiffusionPipeline from PIL import Image # Load the Real-Time Latent Consistency Model (LCM) device = "cuda" if torch.cuda.is_available() else "cpu" lcm_pipe = StableDiffusionPipeline.from_pretrained("latent-consistency/lcm-lora-sdv1-5").to(device) def process_frame(image, prompt="A futuristic landscape", negative_prompt="not blurry"): """Modify the input image using the real-time latent consistency model (LCM).""" image = image.resize((512, 512)) result = lcm_pipe(prompt=prompt, negative_prompt=negative_prompt, image=image, num_inference_steps=4, guidance_scale=7.5).images[0] return np.array(result) def video_stream(prompt, negative_prompt): """Captures video feed from webcam and sends it to LCM in real time.""" cap = cv2.VideoCapture(0) while cap.isOpened(): ret, frame = cap.read() if not ret: break image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) frame = process_frame(image, prompt, negative_prompt) yield frame cap.release() # Create Gradio App with gr.Blocks() as demo: gr.Markdown("## 🎨 Real-Time AI-Enhanced Webcam using Latent Consistency Model (LCM)") with gr.Row(): webcam_feed = gr.Camera(streaming=True, label="Live Webcam") processed_image = gr.Image(label="AI-Enhanced Webcam Feed") with gr.Row(): image_input = gr.Image(type="pil", label="Upload Image for Processing") canvas_output = gr.Image(interactive=True, label="Canvas - Processed Image Output") prompt_input = gr.Textbox(label="Real-Time LCM Prompt", value="A futuristic landscape") negative_prompt_input = gr.Textbox(label="Negative Prompt", value="") start_button = gr.Button("Start Real-Time AI Enhancement") process_button = gr.Button("Process Uploaded Image") start_button.click(fn=video_stream, inputs=[prompt_input, negative_prompt_input], outputs=[processed_image, canvas_output]) process_button.click(fn=process_frame, inputs=[image_input, prompt_input, negative_prompt_input], outputs=[canvas_output]) demo.launch(share=True)