File size: 3,454 Bytes
1687c0f a86eedf f417b2c 1687c0f f417b2c 1687c0f f417b2c 1687c0f cd8db40 1687c0f cd8db40 f417b2c cd8db40 1687c0f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import gradio as gr
from transformers import RobertaForQuestionAnswering
from transformers import BertForQuestionAnswering
from transformers import AutoTokenizer
from transformers import pipeline
import soundfile as sf
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import sox
import subprocess
def read_file_and_process(wav_file):
filename = wav_file.split('.')[0]
filename_16k = filename + "16k.wav"
resampler(wav_file, filename_16k)
speech, _ = sf.read(filename_16k)
inputs = processor(speech, sampling_rate=16_000, return_tensors="pt", padding=True)
return inputs
def resampler(input_file_path, output_file_path):
command = (
f"ffmpeg -hide_banner -loglevel panic -i {input_file_path} -ar 16000 -ac 1 -bits_per_raw_sample 16 -vn "
f"{output_file_path}"
)
subprocess.call(command, shell=True)
def parse_transcription(logits):
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.decode(predicted_ids[0], skip_special_tokens=True)
return transcription
def parse(wav_file):
input_values = read_file_and_process(wav_file)
with torch.no_grad():
logits = model(**input_values).logits
user_question = parse_transcription(logits)
return user_question
model_id = "jonatasgrosman/wav2vec2-large-xlsr-53-persian"
processor = Wav2Vec2Processor.from_pretrained(model_id)
model = Wav2Vec2ForCTC.from_pretrained(model_id)
model1 = RobertaForQuestionAnswering.from_pretrained("pedramyazdipoor/persian_xlm_roberta_large")
tokenizer1 = AutoTokenizer.from_pretrained("pedramyazdipoor/persian_xlm_roberta_large")
roberta_large = pipeline(task='question-answering', model=model1, tokenizer=tokenizer1)
def Q_A(text=None, audio=None, context):
if text is None:
question = parse(audio)
elif audio is None:
question = text
answer_pedram = roberta_large({"question":question, "context":context})['answer']
return answer_pedram
# Create title, description and article strings
title = "Question and answer based on Roberta model develop by nima asl toghiri"
description = "سیستم پردازش زبانی پرسش و پاسخ"
article = "آموزش داده شده با مدل زبانی روبرتا"
demo = gr.Interface(fn=Q_A, # mapping function from input to output
inputs=[gr.Textbox(label='پرسش خود را وارد کنید:', show_label=True, text_align='right', lines=2),
gr.Audio(source="microphone", type="filepath",
label="لطفا دکمه ضبط صدا را بزنید و شروع به صحبت کنید و بعذ از اتمام صحبت دوباره دکمه ضبط را فشار دهید.",
show_download_button=True,
show_edit_button=True,),
gr.Textbox(label='متن منبع خود را وارد کنید', show_label=True, text_align='right', lines=8)], # what are the inputs?
outputs=gr.Text(show_copy_button=True), # what are the outputs?
# our fn has two outputs, therefore we have two outputs
# Create examples list from "examples/" directory
title=title,
description=description,
article=article)
# Launch the demo!
demo.launch(share=True) |