Spaces:

tensorgirl
/

Farmers_Helper_Bot

Running

File size: 4,141 Bytes

3a75737

import os
import transformers
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
from transformers import AutoModelForSeq2SeqLM, pipeline
from huggingface_hub import login
import gradio as gr
import numpy as np

new_model = "tensorgirl/finetuned-gemma"
model = AutoModelForCausalLM.from_pretrained(new_model, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(new_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

generator = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        device_map="auto",
    )

model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
device = 0 if torch.cuda.is_available() else -1

def translate(text, src_lang, tgt_lang):

    translation_pipeline = pipeline("translation", model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, max_length=400, device=device)
    result = translation_pipeline(text)
    return result[0]['translation_text']

def English(audio):

    transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
    sr, y = audio
    y = y.astype(np.float32)
    y = np.max(np.abs(y))

    return transcriber({"sampling_rate": sr, "raw": y})["text"]

def Hindi(audio):

    transcriber = pipeline("automatic-speech-recognition", model="theainerd/Wav2Vec2-large-xlsr-hindi")
    sr, y = audio
    y = y.astype(np.float32)
    y = np.max(np.abs(y))

    text = transcriber({"sampling_rate":sr, "raw":y})["text"]

    return translate(text, "hin_Deva", "eng_Latn")


def Telegu(audio):

    transcriber = pipeline("automatic-speech-recognition", model="anuragshas/wav2vec2-large-xlsr-53-telugu")
    sr, y = audio
    y = y.astype(np.float32)
    y = np.max(np.abs(y))

    text = transcriber({"sampling_rate":sr, "raw":y})["text"]

    return translate(text, "tel_Telu", "eng_Latn")

def Tamil(audio):

    transcriber = pipeline("automatic-speech-recognition", model="Harveenchadha/vakyansh-wav2vec2-tamil-tam-250")
    sr, y = audio
    y = y.astype(np.float32)
    y = np.max(np.abs(y))

    text = transcriber({"sampling_rate":sr, "raw":y})["text"]

    return translate(text, "tam_Taml", "eng_Latn")

def Kannada(audio):

    transcriber = pipeline("automatic-speech-recognition", model="vasista22/whisper-kannada-medium")
    sr, y = audio
    y = y.astype(np.float32)
    y = np.max(np.abs(y))

    text = transcriber({"sampling_rate":sr, "raw":y})["text"]

    return translate(text, "kan_Knda", "eng_Latn")

def predict(audio, language):

    if language == English:
        message = English(audio)

    if language == Hindi:
        message = Hindi(audio)

    if language == Telegu:
        message = Telegu(audio)

    if language == Tamil:
        message = Tamil(audio)

    if language == Kannada:
        message = Kannada(audio)

    print(message)

    sequences = generator(
            message,
            max_length=200,
            do_sample=False,
            top_k=10,
            num_return_sequences=1,
            eos_token_id=tokenizer.eos_token_id,)

    answer = ""
    for seq in sequences:
            answer = answer + seq['generated_text'] + " "

    print(answer)
    if language == English:
        return answer

    if language == Hindi:
        return translate(text,eng_Latn, hin_Deva)

    if language == Telegu:
        return translate(text,eng_Latn, tel_Telu)

    if language == Tamil:
        return translate(text, eng_Latn, tam_Taml)

    if language == Kannada:
        return translate(text, eng_Latn, kan_Knda)

    return answer

demo = gr.Interface(
    predict,
    [gr.Audio(),
    gr.Dropdown(
            ["Hindi", "Telegu", "Tamil", "Kannada", "English"], label="Language", info="Please select language of your choice"
        )],
    "text",
    title = "Farmers-Helper-Bot",
    description = "Ask your queries in your regional Language",
    theme=gr.themes.Soft()
)

demo.launch(share=True)