tensorgirl commited on
Commit
3a75737
·
verified ·
1 Parent(s): a53ddde

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +149 -0
  2. requirements.txt +0 -0
app.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import transformers
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
5
+ from transformers import AutoModelForSeq2SeqLM, pipeline
6
+ from huggingface_hub import login
7
+ import gradio as gr
8
+ import numpy as np
9
+
10
+ new_model = "tensorgirl/finetuned-gemma"
11
+ model = AutoModelForCausalLM.from_pretrained(new_model, trust_remote_code=True)
12
+ tokenizer = AutoTokenizer.from_pretrained(new_model, trust_remote_code=True)
13
+ tokenizer.pad_token = tokenizer.eos_token
14
+
15
+ generator = transformers.pipeline(
16
+ "text-generation",
17
+ model=model,
18
+ tokenizer=tokenizer,
19
+ torch_dtype=torch.bfloat16,
20
+ trust_remote_code=True,
21
+ device_map="auto",
22
+ )
23
+
24
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
25
+ tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
26
+ device = 0 if torch.cuda.is_available() else -1
27
+
28
+ def translate(text, src_lang, tgt_lang):
29
+
30
+ translation_pipeline = pipeline("translation", model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, max_length=400, device=device)
31
+ result = translation_pipeline(text)
32
+ return result[0]['translation_text']
33
+
34
+ def English(audio):
35
+
36
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
37
+ sr, y = audio
38
+ y = y.astype(np.float32)
39
+ y = np.max(np.abs(y))
40
+
41
+ return transcriber({"sampling_rate": sr, "raw": y})["text"]
42
+
43
+ def Hindi(audio):
44
+
45
+ transcriber = pipeline("automatic-speech-recognition", model="theainerd/Wav2Vec2-large-xlsr-hindi")
46
+ sr, y = audio
47
+ y = y.astype(np.float32)
48
+ y = np.max(np.abs(y))
49
+
50
+ text = transcriber({"sampling_rate":sr, "raw":y})["text"]
51
+
52
+ return translate(text, "hin_Deva", "eng_Latn")
53
+
54
+
55
+ def Telegu(audio):
56
+
57
+ transcriber = pipeline("automatic-speech-recognition", model="anuragshas/wav2vec2-large-xlsr-53-telugu")
58
+ sr, y = audio
59
+ y = y.astype(np.float32)
60
+ y = np.max(np.abs(y))
61
+
62
+ text = transcriber({"sampling_rate":sr, "raw":y})["text"]
63
+
64
+ return translate(text, "tel_Telu", "eng_Latn")
65
+
66
+ def Tamil(audio):
67
+
68
+ transcriber = pipeline("automatic-speech-recognition", model="Harveenchadha/vakyansh-wav2vec2-tamil-tam-250")
69
+ sr, y = audio
70
+ y = y.astype(np.float32)
71
+ y = np.max(np.abs(y))
72
+
73
+ text = transcriber({"sampling_rate":sr, "raw":y})["text"]
74
+
75
+ return translate(text, "tam_Taml", "eng_Latn")
76
+
77
+ def Kannada(audio):
78
+
79
+ transcriber = pipeline("automatic-speech-recognition", model="vasista22/whisper-kannada-medium")
80
+ sr, y = audio
81
+ y = y.astype(np.float32)
82
+ y = np.max(np.abs(y))
83
+
84
+ text = transcriber({"sampling_rate":sr, "raw":y})["text"]
85
+
86
+ return translate(text, "kan_Knda", "eng_Latn")
87
+
88
+ def predict(audio, language):
89
+
90
+ if language == English:
91
+ message = English(audio)
92
+
93
+ if language == Hindi:
94
+ message = Hindi(audio)
95
+
96
+ if language == Telegu:
97
+ message = Telegu(audio)
98
+
99
+ if language == Tamil:
100
+ message = Tamil(audio)
101
+
102
+ if language == Kannada:
103
+ message = Kannada(audio)
104
+
105
+ print(message)
106
+
107
+ sequences = generator(
108
+ message,
109
+ max_length=200,
110
+ do_sample=False,
111
+ top_k=10,
112
+ num_return_sequences=1,
113
+ eos_token_id=tokenizer.eos_token_id,)
114
+
115
+ answer = ""
116
+ for seq in sequences:
117
+ answer = answer + seq['generated_text'] + " "
118
+
119
+ print(answer)
120
+ if language == English:
121
+ return answer
122
+
123
+ if language == Hindi:
124
+ return translate(text,eng_Latn, hin_Deva)
125
+
126
+ if language == Telegu:
127
+ return translate(text,eng_Latn, tel_Telu)
128
+
129
+ if language == Tamil:
130
+ return translate(text, eng_Latn, tam_Taml)
131
+
132
+ if language == Kannada:
133
+ return translate(text, eng_Latn, kan_Knda)
134
+
135
+ return answer
136
+
137
+ demo = gr.Interface(
138
+ predict,
139
+ [gr.Audio(),
140
+ gr.Dropdown(
141
+ ["Hindi", "Telegu", "Tamil", "Kannada", "English"], label="Language", info="Please select language of your choice"
142
+ )],
143
+ "text",
144
+ title = "Farmers-Helper-Bot",
145
+ description = "Ask your queries in your regional Language",
146
+ theme=gr.themes.Soft()
147
+ )
148
+
149
+ demo.launch(share=True)
requirements.txt ADDED
File without changes