Spaces:

Ngadou
/

Audio_Scam_Detection

Running

App Files Files Community

Audio_Scam_Detection / app.py

Ngadou

Update app.py

c2d51d1 over 1 year ago

raw

history blame contribute delete

2.16 kB

	import gradio as gr
	import time
	import openai
	import json
	import os
	from transformers import pipeline
	from transformers import AutoProcessor, AutoModelForCTC

	processor = AutoProcessor.from_pretrained("facebook/wav2vec2-large-robust-ft-libri-960h")
	model = AutoModelForCTC.from_pretrained("facebook/wav2vec2-large-robust-ft-libri-960h")

	# asr_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-robust-ft-libri-960h")

	openai.api_key = os.environ.get('OPENAI_KEY')

	def classify_audio(audio):
	# Transcribe the audio to text
	# audio_transcript = asr_pipeline(audio)["text"]
	# audio_transcript = audio_transcript.lower()

	input_values = processor(audio, return_tensors="pt", padding="longest").input_values
	# retrieve logits
	logits = model(input_values).logits

	# take argmax and decode
	predicted_ids = torch.argmax(logits, dim=-1)
	transcription = processor.batch_decode(predicted_ids)

	messages = [
	{"role": "system", "content": "Is this chat a scam, spam or is safe? Only answer in JSON format with 'classification': '' as string and 'reasons': '' as the most plausible reasons why. The reason should be explaning to the potential victim why the conversation is probably a scam"},
	{"role": "user", "content": transcription},
	]

	# Call the OpenAI API to generate a response
	response = openai.ChatCompletion.create(
	model="gpt-4", # Replace with the actual GPT-4 model ID
	messages=messages
	)

	# Extract the generated text
	text = response.choices[0].message['content']
	text = json.loads(text)

	# Get the decision and reasons from the JSON dictionary
	decision = text["classification"]
	reasons = text["reasons"]

	# Return the transcription and the prediction as a dictionary
	return transcription, decision, reasons

	gr.Interface(
	fn=classify_audio,
	inputs=gr.inputs.Audio(source="upload", type="numpy"),
	outputs=[
	gr.outputs.Textbox(label="Transcription"),
	gr.outputs.Textbox(label="Classification"),
	gr.outputs.Textbox(label="Reason"),
	],
	live=True
	).launch()