speech_analyzer / app.py
raygiles3's picture
Update app.py
bf3436c verified
raw
history blame
2.28 kB
import torch
import os
import gradio as gr
#from langchain.llms import OpenAI
from langchain.llms import HuggingFaceHub
from transformers import pipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from ibm_watson_machine_learning.foundation_models import Model
from ibm_watson_machine_learning.foundation_models.extensions.langchain import WatsonxLLM
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
my_credentials = {
"url" : "https://us-south.ml.cloud.ibm.com"
}
params = {
GenParams.MAX_NEW_TOKENS: 800, # The maximum number of tokens that the model can generate in a single run.
GenParams.TEMPERATURE: 0.1, # A parameter that controls the randomness of the token generation. A lower value makes the generation more deterministic, while a higher value introduces more randomness.
}
LLAMA2_model = Model(
model_id= 'meta-llama/llama-2-70b-chat',
credentials=my_credentials,
params=params,
project_id="skills-network",
)
llm = WatsonxLLM(LLAMA2_model)
#######------------- Prompt Template-------------####
temp = """
<s><<SYS>>
List the key points with details from the context:
[INST] The context : {context} [/INST]
<</SYS>>
"""
pt = PromptTemplate(
input_variables=["context"],
template= temp)
prompt_to_LLAMA2 = LLMChain(llm=llm, prompt=pt)
#######------------- Speech2text-------------####
def transcript_audio(audio_file):
# Initialize the speech recognition pipeline
pipe = pipeline(
"automatic-speech-recognition",
model="openai/whisper-tiny.en",
chunk_length_s=30,
)
# Transcribe the audio file and return the result
transcript_txt = pipe(audio_file, batch_size=8)["text"]
result = prompt_to_LLAMA2.run(transcript_txt)
return result
#######------------- Gradio-------------####
audio_input = gr.Audio(sources="upload", type="filepath")
output_text = gr.Textbox()
iface = gr.Interface(fn= transcript_audio,
inputs= audio_input, outputs= output_text,
title= "Audio Transcription App",
description= "Upload the audio file")
iface.launch(server_name="0.0.0.0", server_port=7860)