Spaces:

tensorgirl
/

Farmers_Helper_Bot

Sleeping

App Files Files Community

Farmers_Helper_Bot / app.py

tensorgirl

Upload 2 files

3a75737 verified 9 months ago

raw

history blame

4.14 kB

	import os
	import transformers
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
	from transformers import AutoModelForSeq2SeqLM, pipeline
	from huggingface_hub import login
	import gradio as gr
	import numpy as np

	new_model = "tensorgirl/finetuned-gemma"
	model = AutoModelForCausalLM.from_pretrained(new_model, trust_remote_code=True)
	tokenizer = AutoTokenizer.from_pretrained(new_model, trust_remote_code=True)
	tokenizer.pad_token = tokenizer.eos_token

	generator = transformers.pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	device_map="auto",
	)

	model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
	tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
	device = 0 if torch.cuda.is_available() else -1

	def translate(text, src_lang, tgt_lang):

	translation_pipeline = pipeline("translation", model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, max_length=400, device=device)
	result = translation_pipeline(text)
	return result[0]['translation_text']

	def English(audio):

	transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
	sr, y = audio
	y = y.astype(np.float32)
	y = np.max(np.abs(y))

	return transcriber({"sampling_rate": sr, "raw": y})["text"]

	def Hindi(audio):

	transcriber = pipeline("automatic-speech-recognition", model="theainerd/Wav2Vec2-large-xlsr-hindi")
	sr, y = audio
	y = y.astype(np.float32)
	y = np.max(np.abs(y))

	text = transcriber({"sampling_rate":sr, "raw":y})["text"]

	return translate(text, "hin_Deva", "eng_Latn")


	def Telegu(audio):

	transcriber = pipeline("automatic-speech-recognition", model="anuragshas/wav2vec2-large-xlsr-53-telugu")
	sr, y = audio
	y = y.astype(np.float32)
	y = np.max(np.abs(y))

	text = transcriber({"sampling_rate":sr, "raw":y})["text"]

	return translate(text, "tel_Telu", "eng_Latn")

	def Tamil(audio):

	transcriber = pipeline("automatic-speech-recognition", model="Harveenchadha/vakyansh-wav2vec2-tamil-tam-250")
	sr, y = audio
	y = y.astype(np.float32)
	y = np.max(np.abs(y))

	text = transcriber({"sampling_rate":sr, "raw":y})["text"]

	return translate(text, "tam_Taml", "eng_Latn")

	def Kannada(audio):

	transcriber = pipeline("automatic-speech-recognition", model="vasista22/whisper-kannada-medium")
	sr, y = audio
	y = y.astype(np.float32)
	y = np.max(np.abs(y))

	text = transcriber({"sampling_rate":sr, "raw":y})["text"]

	return translate(text, "kan_Knda", "eng_Latn")

	def predict(audio, language):

	if language == English:
	message = English(audio)

	if language == Hindi:
	message = Hindi(audio)

	if language == Telegu:
	message = Telegu(audio)

	if language == Tamil:
	message = Tamil(audio)

	if language == Kannada:
	message = Kannada(audio)

	print(message)

	sequences = generator(
	message,
	max_length=200,
	do_sample=False,
	top_k=10,
	num_return_sequences=1,
	eos_token_id=tokenizer.eos_token_id,)

	answer = ""
	for seq in sequences:
	answer = answer + seq['generated_text'] + " "

	print(answer)
	if language == English:
	return answer

	if language == Hindi:
	return translate(text,eng_Latn, hin_Deva)

	if language == Telegu:
	return translate(text,eng_Latn, tel_Telu)

	if language == Tamil:
	return translate(text, eng_Latn, tam_Taml)

	if language == Kannada:
	return translate(text, eng_Latn, kan_Knda)

	return answer

	demo = gr.Interface(
	predict,
	[gr.Audio(),
	gr.Dropdown(
	["Hindi", "Telegu", "Tamil", "Kannada", "English"], label="Language", info="Please select language of your choice"
	)],
	"text",
	title = "Farmers-Helper-Bot",
	description = "Ask your queries in your regional Language",
	theme=gr.themes.Soft()
	)

	demo.launch(share=True)