import gradio as gr from transformers import pipeline import sounddevice as sd # For microphone input from diffusers import DiffusionPipeline # Load the diffuser pipeline with LORA weights pipeline = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4") pipeline.load_lora_weights("MdEndan/tinysketch-fine-tuned") def generate_image(text): """Converts speech to text, generates an image using diffuser pipeline, and displays the result.""" # Speech-to-text using a pre-trained pipeline (replace with your choice) speech_pipe = pipeline("automatic-speech-recognition") try: # Record audio from microphone (adjust duration and sample rate if needed) duration = 5 # Record for 5 seconds fs = 16000 # Sample rate print("Speak now...") myrecording = sd.rec(duration * fs, samplerate=fs, channels=1) sd.wait() print("Recording stopped") # Convert audio to WAV for compatibility with some pipelines sd.write("recording.wav", myrecording, fs) # Transcribe speech with open("recording.wav", "rb") as f: audio_bytes = f.read() speech_output = speech_pipe(audio_bytes, return_tensors="pt")["sequences"] text = speech_output[0].tolist() # Extract the transcribed text except Exception as e: print(f"Error during speech recognition: {e}") text = "Error: Speech recognition failed." # Ensure text input is a string if not isinstance(text, str): text = str(text) # Generate image using diffuser pipeline try: image = pipeline(text).images[0] return image except Exception as e: print(f"Error during image generation: {e}") return None # Gradio interface with microphone and image display interface = gr.Interface( fn=generate_image, inputs=gr.Audio(sources=["microphone"]), outputs=gr.Image(thumbnail=True), title="Speak & Create: Text-to-Image with Microphone Input (LORA)", description="Speak your description and see an image generated using a fine-tuned model!", ) # Handle potential errors during Gradio launch try: # Request access to the microphone (might require user permission) interface.launch(share=True, capture_audio=True) except Exception as e: print(f"Error launching Gradio interface: {e}")