import logging import os import re import warnings import gradio as gr import requests import torch from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline from templates import starting_app_code, update_iframe_js, copy_snippet_js, download_code_js, load_js # Filter the UserWarning raised by the audio component. warnings.filterwarnings("ignore", message='Trying to convert audio automatically from int32 to 16-bit int format') logging.basicConfig( level=logging.INFO, # Set the logging level to INFO or any other desired level format="%(asctime)s - %(message)s", # Define the log message format datefmt="%Y-%m-%d %H:%M:%S", # Define the timestamp format ) logger = logging.getLogger("my_logger") HF_TOKEN = os.getenv("HF_TOKEN") if not HF_TOKEN: raise Exception("HF_TOKEN environment variable is required to call remote API.") API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta" headers = {"Authorization": f"Bearer {HF_TOKEN}"} def init_speech_to_text_model(): device = "cuda:0" if torch.cuda.is_available() else "cpu" torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 model_id = "distil-whisper/distil-medium.en" model = AutoModelForSpeechSeq2Seq.from_pretrained( model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True ) model.to(device) processor = AutoProcessor.from_pretrained(model_id) return pipeline( "automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, max_new_tokens=128, torch_dtype=torch_dtype, device=device, ) whisper_pipe = init_speech_to_text_model() code_pattern = re.compile(r'```python\n(.*?)```', re.DOTALL) def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() def generate_text(code, prompt): logger.info(f"Calling API with prompt:\n{prompt}") prompt = f"```python\n{code}```\nGiven the code above return only updated code for the following request:\n{prompt}\n<|assistant|>" params = {"max_new_tokens": 512} output = query({ "inputs": prompt, "parameters": params, }) if 'error' in output: logger.warning(f'Language model call failed: {output["error"]}') raise gr.Warning(f'Language model call failed: {output["error"]}') logger.info(f'API RESPONSE\n{output[0]["generated_text"]}') assistant_reply = output[0]["generated_text"].split('<|assistant|>')[1] match = re.search(code_pattern, assistant_reply) if not match: return assistant_reply, code, None new_code = match.group(1) logger.info(f'NEW CODE:\nnew_code') return assistant_reply, new_code, None def transcribe(audio): result = whisper_pipe(audio) return result["text"], None def copy_notify(code): gr.Info("App code snippet copied!") with gr.Blocks() as demo: gr.Markdown("