Spaces:
Runtime error
Runtime error
import gradio as gr | |
import numpy as np | |
import librosa | |
import requests | |
import torch | |
import torchaudio | |
import math | |
import os | |
import soundfile as sf | |
from glob import glob | |
from pytube import YouTube | |
from transformers import ( | |
Wav2Vec2CTCTokenizer, | |
Wav2Vec2FeatureExtractor, | |
Wav2Vec2Processor, | |
Wav2Vec2ForCTC, | |
TrainingArguments, | |
Trainer, | |
pipeline | |
) | |
processor = Wav2Vec2Processor.from_pretrained("airesearch/wav2vec2-large-xlsr-53-th") | |
model = Wav2Vec2ForCTC.from_pretrained("BALAKA/wav2vec2-large-xlsr-53-thai") | |
demo = gr.Blocks() | |
def check(sentence): | |
found = [] | |
negative = ["กระดอ", "กระทิง", "กระสัน", "กระหรี่", "กรีด", "กวนส้นตีน", "กะหรี่", "กินขี้ปี้เยี่ยว", "ขายตัว", "ขี้", "ขโมย", "ข่มขืน", "ควย", "ควาย", "คอขาด", "ฆ่า", "จังไร", "จัญไร", "ฉิบหาย", "ฉี่", "ชั่ว", "ชาติหมา", "ชิงหมาเกิด", "ชิบหาย", "ช้างเย็ด", "ดาก", "ตอแหล", "ตัดหัว", "ตัดหำ", "ตาย", "ตีกัน", "ทรมาน", "ทาส", "ทุเรศ", "นรก", "บีบคอ", "ปากหมา", "ปี้กัน", "พ่อง", "พ่อมึง", "ฟักยู", "ฟาย", "ยัดแม่", "ยิงกัน", "ระยำ", "ดอกทอง", "โสเภณี", "ล่อกัน", "ศพ", "สถุล", | |
"สทุน", "สัด", "สันดาน", "สัส", "สาด", "ส้นตีน", "หน้าตัวเมืย", "ส้นตีน", "หมอย", "หรรม", "หัวแตก", "หำ", "หน้าหี", "น่าหี", "อนาจาร", "อัปปรี", "อีช้าง", "อีปลาวาฬ", "อีสัด", "อีหน้าหี", "อีหมา", "ห่า", "อับปรี", "เฆี่ยน", "เงี่ยน", "เจี๊ยว", "เชี่ย", "เด้า", "เผด็จการ", "เยี่ยว", "เย็ด", "เลือด", "เสือก", "เหล้า", "เหี้ย", "เอากัน", "แดก", "แตด", "แทง", "แม่ง", "แม่มึง", "แรด", "โคตร", "โง่", "โป๊", "โรคจิต", "ใจหมา", "ไอเข้", "ไอ้ขึ้หมา", "ไอ้บ้า", "ไอ้หมา", "เวร", "เวน"] | |
negative = list(dict.fromkeys(negative)) | |
for i in negative: | |
if sentence.find(i) != -1: | |
found.append(i) | |
return found | |
def resample(file_path): | |
speech_array, sampling_rate = torchaudio.load(file_path) | |
resampler = torchaudio.transforms.Resample(sampling_rate, 16000) | |
return resampler(speech_array)[0].numpy() | |
def tran_script(file_path): | |
if type(file_path) == 'str': | |
speech = resample(file_path) | |
inputs = processor(speech, sampling_rate=16_000, | |
return_tensors="pt", padding=True) | |
logits = model(inputs.input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
predicted_sentence = processor.batch_decode(predicted_ids) | |
return predicted_sentence | |
else: | |
now_path = glob('/home/user/app/split_*.mp3') | |
sentence = [] | |
for i in range(file_path - 1): | |
now_path = f'/content/split_{i+1}.mp3' | |
speech = resample(now_path) | |
inputs = processor(speech, sampling_rate=16_000, | |
return_tensors="pt", padding=True) | |
logits = model(inputs.input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
predicted_sentence = processor.batch_decode(predicted_ids) | |
sentence.append(predicted_sentence) | |
return sentence | |
def split_file(file_path): | |
speech, sample_rate = librosa.load(file_path) | |
buffer = 5 * sample_rate | |
samples_total = len(speech) | |
samples_wrote = 0 | |
counter = 1 | |
while samples_wrote < samples_total: | |
if buffer > (samples_total - samples_wrote): | |
buffer = samples_total - samples_wrote | |
block = speech[samples_wrote: (samples_wrote + buffer)] | |
out_filename = "split_" + str(counter) + ".mp3" | |
sf.write(out_filename, block, sample_rate) | |
counter += 1 | |
samples_wrote += buffer | |
return counter | |
def process(file_path): | |
if librosa.get_duration(filename=file_path) <= 5: | |
sentence = tran_script(file_path) | |
sentence = str(sentence).replace(' ', '').strip("[]grt") | |
return '[0.00-0.05] found : ' + check(sentence) | |
counter = split_file(file_path) | |
sentence = tran_script(counter) | |
result = '' | |
for index, item in enumerate(sentence): | |
now_sentence = item[0] | |
now_sentence = str(item).replace(' ', '').strip("[]grt") | |
now_sentence = check(now_sentence) | |
if now_sentence: | |
time = (index)*5 | |
minutes = math.floor(time / 60) | |
hours = math.floor(minutes/60) | |
seconds = time % 60 | |
minutes = str(minutes).zfill(2) | |
hours = str(hours).zfill(2) | |
fist_seconds = str(seconds).zfill(2) | |
last_seconds = str(seconds+5).zfill(2) | |
text = f'found at {hours}h {minutes}m {fist_seconds}-{last_seconds}seconds found {now_sentence}' | |
result += text + '\n' | |
return result | |
def youtube_loader(link): | |
yt = YouTube(str(link)) | |
video = yt.streams.filter(only_audio=True).first() | |
out_file = video.download(output_path='mp3') | |
os.rename(out_file, '/home/user/app/mp3/youtube.mp3') | |
return process('/home/user/app/mp3/youtube.mp3') | |
def twitch_loader(link): | |
os.system(f"twitch-dl download -q audio_only {link} --output twitch.wav") | |
return process('/home/user/app/twitch.wav') | |
with demo: | |
gr.Markdown("Select your input type.") | |
with gr.Tabs(): | |
with gr.TabItem("From your voice."): | |
with gr.Row(): | |
voice = gr.Audio(source="microphone", type="filepath", | |
optional=True, labe="Start record your voice here.") | |
voice_output = gr.Textbox() | |
text_button1 = gr.Button("Flip") | |
with gr.TabItem("From your file."): | |
with gr.Row(): | |
file_input = gr.Audio(type="filepath", optional=True, labe="Drop your audio file here.") | |
file_output = gr.Textbox() | |
text_button4 = gr.Button("Flip") | |
with gr.TabItem("From youtube"): | |
with gr.Row(): | |
youtube_input = gr.Textbox( | |
label="Insert your youtube link here.", placeholder='https://www.youtube.com/watch?v=dQw4w9WgXcQ') | |
youtube_output = gr.Textbox() | |
text_button2 = gr.Button("Flip") | |
with gr.TabItem("From twitch"): | |
with gr.Row(): | |
twitch_input = gr.Textbox(label="Insert your twitch link or ID here.", | |
placeholder='https://www.twitch.tv/videos/1823056925 or 1823056925') | |
twitch_output = gr.Textbox() | |
text_button3 = gr.Button("Flip") | |
text_button1.click(process, inputs=voice, outputs=voice_output) | |
text_button2.click(youtube_loader, inputs=youtube_input, | |
outputs=youtube_output) | |
text_button3.click(twitch_loader, inputs=twitch_input, | |
outputs=twitch_output) | |
text_button4.click(process, inputs=file_input, | |
outputs=file_output) | |
demo.launch(enable_queue=True) | |