File size: 4,141 Bytes
3a75737
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import os
import transformers
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
from transformers import AutoModelForSeq2SeqLM, pipeline
from huggingface_hub import login
import gradio as gr
import numpy as np

new_model = "tensorgirl/finetuned-gemma"
model = AutoModelForCausalLM.from_pretrained(new_model, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(new_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

generator = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        device_map="auto",
    )

model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
device = 0 if torch.cuda.is_available() else -1

def translate(text, src_lang, tgt_lang):

    translation_pipeline = pipeline("translation", model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, max_length=400, device=device)
    result = translation_pipeline(text)
    return result[0]['translation_text']

def English(audio):

    transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
    sr, y = audio
    y = y.astype(np.float32)
    y = np.max(np.abs(y))

    return transcriber({"sampling_rate": sr, "raw": y})["text"]

def Hindi(audio):

    transcriber = pipeline("automatic-speech-recognition", model="theainerd/Wav2Vec2-large-xlsr-hindi")
    sr, y = audio
    y = y.astype(np.float32)
    y = np.max(np.abs(y))

    text = transcriber({"sampling_rate":sr, "raw":y})["text"]

    return translate(text, "hin_Deva", "eng_Latn")


def Telegu(audio):

    transcriber = pipeline("automatic-speech-recognition", model="anuragshas/wav2vec2-large-xlsr-53-telugu")
    sr, y = audio
    y = y.astype(np.float32)
    y = np.max(np.abs(y))

    text = transcriber({"sampling_rate":sr, "raw":y})["text"]

    return translate(text, "tel_Telu", "eng_Latn")

def Tamil(audio):

    transcriber = pipeline("automatic-speech-recognition", model="Harveenchadha/vakyansh-wav2vec2-tamil-tam-250")
    sr, y = audio
    y = y.astype(np.float32)
    y = np.max(np.abs(y))

    text = transcriber({"sampling_rate":sr, "raw":y})["text"]

    return translate(text, "tam_Taml", "eng_Latn")

def Kannada(audio):

    transcriber = pipeline("automatic-speech-recognition", model="vasista22/whisper-kannada-medium")
    sr, y = audio
    y = y.astype(np.float32)
    y = np.max(np.abs(y))

    text = transcriber({"sampling_rate":sr, "raw":y})["text"]

    return translate(text, "kan_Knda", "eng_Latn")

def predict(audio, language):

    if language == English:
        message = English(audio)

    if language == Hindi:
        message = Hindi(audio)

    if language == Telegu:
        message = Telegu(audio)

    if language == Tamil:
        message = Tamil(audio)

    if language == Kannada:
        message = Kannada(audio)

    print(message)

    sequences = generator(
            message,
            max_length=200,
            do_sample=False,
            top_k=10,
            num_return_sequences=1,
            eos_token_id=tokenizer.eos_token_id,)

    answer = ""
    for seq in sequences:
            answer = answer + seq['generated_text'] + " "

    print(answer)
    if language == English:
        return answer

    if language == Hindi:
        return translate(text,eng_Latn, hin_Deva)

    if language == Telegu:
        return translate(text,eng_Latn, tel_Telu)

    if language == Tamil:
        return translate(text, eng_Latn, tam_Taml)

    if language == Kannada:
        return translate(text, eng_Latn, kan_Knda)

    return answer

demo = gr.Interface(
    predict,
    [gr.Audio(),
    gr.Dropdown(
            ["Hindi", "Telegu", "Tamil", "Kannada", "English"], label="Language", info="Please select language of your choice"
        )],
    "text",
    title = "Farmers-Helper-Bot",
    description = "Ask your queries in your regional Language",
    theme=gr.themes.Soft()
)

demo.launch(share=True)