Spaces:

Armandoliv
/

whisper-biomedical-ner

Runtime error

App Files Files Community

whisper-biomedical-ner / app.py

Armandoliv

Update app.py

8ec2336 verified 6 months ago

raw

history blame contribute delete

2.16 kB

	import gradio as gr
	import torch
	import spacy
	import os
	import whisper

	os.system('pip install https://huggingface.co/Armandoliv/es_pipeline/resolve/main/es_pipeline-any-py3-none-any.whl')
	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
	model_whisper = whisper.load_model("small")
	nlp_ner = spacy.load("es_pipeline")

	def main_generator(youtube_id:str):
	YouTubeID = youtube_id.split("https://www.youtube.com/watch?v=") #
	if len(YouTubeID)>1:
	YouTubeID = YouTubeID[1]
	else:
	YouTubeID ='XfyGv-xwjlI'

	OutputFile = f'test_audio_youtube_{YouTubeID}.m4a'

	os.system(f"youtube-dl -o {OutputFile} {YouTubeID} --extract-audio --restrict-filenames -f 'bestaudio[ext=m4a]'")

	result = model_whisper.transcribe(OutputFile)
	text = result['text']
	doc = nlp_ner(text)

	output_list = []
	for ent in doc.ents:
	result_dict = {
	'entity': ent.label_,
	'word': ent.text,
	'start':ent.start_char,
	'end': ent.end_char
	}
	output_list.append(result_dict)

	return {"text": text, "entities": output_list}
	inputs = [gr.Textbox(lines=1, placeholder="Link of youtube video here...", label="Input")]
	outputs = gr.HighlightedText()
	title="ASR FOR SPANISH MEDICAL RECORDS"
	description = "This demo uses AI Models to create an AUDIO ANNOTATION FOR MEDICAL RECORDS "
	examples = ['https://www.youtube.com/watch?v=xOZM-1p-jAk']

	io = gr.Interface(fn=main_generator, inputs=inputs, outputs=outputs, title=title, description = description, examples = examples,

	css= """.gr-button-primary { background: -webkit-linear-gradient(
	70deg, #355764 0%, #55a8a1 100% ) !important; background: #355764;
	background: linear-gradient(
	90deg, #355764 0%, #55a8a1 100% ) !important;
	background: -moz-linear-gradient( 90deg, #355764 0%, #55a8a1 100% ) !important;
	background: -webkit-linear-gradient(
	90deg, #355764 0%, #55a8a1 100% ) !important;
	color:white !important}"""
	)

	io.launch()