import gradio as gr import torch import spacy import os import whisper os.system('pip install https://huggingface.co/Armandoliv/es_pipeline/resolve/main/es_pipeline-any-py3-none-any.whl') device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model_whisper = whisper.load_model("small") nlp_ner = spacy.load("es_pipeline") def main_generator(youtube_id:str): YouTubeID = youtube_id.split("https://www.youtube.com/watch?v=") # if len(YouTubeID)>1: YouTubeID = YouTubeID[1] else: YouTubeID ='XfyGv-xwjlI' OutputFile = f'test_audio_youtube_{YouTubeID}.m4a' os.system(f"youtube-dl -o {OutputFile} {YouTubeID} --extract-audio --restrict-filenames -f 'bestaudio[ext=m4a]'") result = model_whisper.transcribe(OutputFile) text = result['text'] doc = nlp_ner(text) output_list = [] for ent in doc.ents: result_dict = { 'entity': ent.label_, 'word': ent.text, 'start':ent.start_char, 'end': ent.end_char } output_list.append(result_dict) return {"text": text, "entities": output_list} inputs = [gr.Textbox(lines=1, placeholder="Link of youtube video here...", label="Input")] outputs = gr.HighlightedText() title="ASR FOR SPANISH MEDICAL RECORDS" description = "This demo uses AI Models to create an AUDIO ANNOTATION FOR MEDICAL RECORDS " examples = ['https://www.youtube.com/watch?v=xOZM-1p-jAk'] io = gr.Interface(fn=main_generator, inputs=inputs, outputs=outputs, title=title, description = description, examples = examples, css= """.gr-button-primary { background: -webkit-linear-gradient( 70deg, #355764 0%, #55a8a1 100% ) !important; background: #355764; background: linear-gradient( 90deg, #355764 0%, #55a8a1 100% ) !important; background: -moz-linear-gradient( 90deg, #355764 0%, #55a8a1 100% ) !important; background: -webkit-linear-gradient( 90deg, #355764 0%, #55a8a1 100% ) !important; color:white !important}""" ) io.launch()