Spaces:

Redmind
/

Audio_transcription

Sleeping

File size: 6,004 Bytes

import gradio as gr

import assemblyai
from transformers import pipeline

# Initialize sentiment analysis model
sentiment_analyzer = pipeline("text-classification", model="Vasanth/tamil-sentiment-distilbert")

# Placeholder transcription functions - Replace with your actual API implementation
def whisper_openai_transcribe(audio_file):
    import whisper
    
    # Load the Whisper model for transcription
    whisper_model = whisper.load_model("large")  # Use 'base', 'small', 'medium', or 'large' depending on your need
    
    """
    Transcribe audio file using Whisper.
    """
    # Transcribe the audio file
    result = whisper_model.transcribe(audio_file)
    
    # Get the transcribed text
    transcribed_text = result['text']
    
    
    
    return transcribed_text

def deepgram_transcribe(audio_file):
    from deepgram import DeepgramClient, PrerecordedOptions, Filesource

    DEEPGRAM_API_KEY = "aeb38b43a17867c59928e6a103ac75c06d4e896d"
    
    with open(audio_file, "rb") as file:
        buffer_data = file.read()

    payload: FileSource = {
            "buffer": buffer_data,
        }
    
    try:
        deepgram = DeepgramClient(DEEPGRAM_API_KEY)
        
        options = PrerecordedOptions(
            model="enhanced",
            language="ta",
            smart_format=True,
            punctuate=True,
            paragraphs=True,
            utterances=True,
            keywords=[":"],
            diarize=True,
        )
    
        response = deepgram.listen.rest.v("1").transcribe_file(payload, options)
        print(response.to_json(indent=4))
        transcript = response['results']['channels'][0]['alternatives'][0]['transcript']
        print(transcript)
        return transcript
        
        
    except Exception as e:
        print(f"Exception: {e}")

    

def assemblyai_transcribe(audio_file):
    import assemblyai as aai

    # Replace with your API key
    aai.settings.api_key = "96206c6070cf4157b84f4f8eb66b5903"
    
    # URL of the file to transcribe
    #FILE_URL = "https://assemblyaiusercontent.com/playground/ECw2Ncu7btO.mp3"
    #FILE_URL = "C:/lakshmi/AI usecases/tamil_audio1.mp3"
    
    # You can also transcribe a local file by passing in a file path
    # FILE_URL = './path/to/file.mp3'
    
    # You can set additional parameters for the transcription
    config = aai.TranscriptionConfig(
      speech_model=aai.SpeechModel.nano,
      
      language_detection=True
    )
    
    transcriber = aai.Transcriber(config=config)
    transcript = transcriber.transcribe(audio_file)
    
    if transcript.status == aai.TranscriptStatus.error:
        print(transcript.error)
    else:
        print(transcript.text)
    # Load a pre-trained sentiment analysis model for Tamil
    #test_script = "இந்த செய்தி மிகவும் சோகம் மிகுந்தது.இந்த செய்தி நன்றாக உள்ளது."
    """from transformers import pipeline
    sentiment_analyzer = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
    result = sentiment_analyzer(transcript.text)
    print(result)
    
    lines = test_script.split('.')  # Split the transcript into lines
    sentiment_results = []
    
    for line in lines:
        line = line.strip()  # Remove leading/trailing whitespace
        if line:  # Only analyze non-empty lines
            sentiment = sentiment_analyzer(line)
            sentiment_results.append((line, sentiment))
    print(sentiment_results)
    # Write the Tamil text to a file
    with open("tamil_text1.txt", "w", encoding="utf-8") as file:
        file.write(transcript.text)
    
    # Write the sentiment analysis results to a file 
    
    # Write the list of dictionaries in a human-readable format
    with open("tamil_result.txt", 'w', encoding='utf-8') as file:
        for result in sentiment_results:
            file.write(f"Label: {result[0]}, Score: {result[1]}\n")
    """
    return transcript.text

# Sentiment analysis function
def analyze_sentiment(text):
    sentiment = sentiment_analyzer(text)
    return sentiment[0]['label'], sentiment[0]['score']

# Main function to process audio and sentiment analysis
def process_transcription_and_sentiment(audio_file, model_choice):
    # Transcription
    if model_choice == "Whisper OpenAI":
        transcription = whisper_openai_transcribe(audio_file)
    elif model_choice == "Deepgram API":
        transcription = deepgram_transcribe(audio_file)
    elif model_choice == "Assembly AI API":
        transcription = assemblyai_transcribe(audio_file)
    
    # Sentiment analysis
    sentiment_label, sentiment_score = analyze_sentiment(transcription)
    
    return transcription, f"Sentiment: {sentiment_label} with score {sentiment_score}"

# Gradio interface setup
def create_interface():
    with gr.Blocks() as demo:
        gr.Markdown("### Audio Transcription and Sentiment Analysis")
        
        with gr.Row():
            audio_input = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio File")
            model_choice = gr.Dropdown(
                choices=["Whisper OpenAI", "Deepgram API", "Assembly AI API"],
                label="Choose Transcription Model",
                value="Whisper OpenAI"
            )
        
        # Textboxes for transcription and sentiment analysis
        transcription_output = gr.Textbox(label="Transcription", lines=5)
        sentiment_output = gr.Textbox(label="Sentiment Analysis", lines=5)
        
        # Submit button
        submit_button = gr.Button("Process")
        
        # When the button is clicked, call the `process_transcription_and_sentiment` function
        submit_button.click(process_transcription_and_sentiment, 
                            [audio_input, model_choice], 
                            [transcription_output, sentiment_output])
        
    demo.launch()

if __name__ == "__main__":
    create_interface()