Rehman1603 commited on
Commit
e007232
·
verified ·
1 Parent(s): 27fb763

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -0
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import torch
4
+ import whisper
5
+ from TTS.api import TTS
6
+ from torch.serialization import add_safe_globals
7
+ from TTS.tts.configs.xtts_config import XttsConfig
8
+ from TTS.tts.models.xtts import XttsAudioConfig, XttsArgs
9
+ from TTS.config.shared_configs import BaseDatasetConfig
10
+ from fetch_data import get_botpress_response
11
+ import requests, os, json
12
+ import warnings
13
+ import gradio as gr
14
+ import librosa
15
+ import torch
16
+ import numpy as np
17
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
18
+
19
+ warnings.filterwarnings("ignore")
20
+
21
+ # Load model and processor
22
+ processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
23
+ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
24
+
25
+ def process_media(media_id, access_token, phone_no_id, phone_no,business_id):
26
+ """Process media ID to send audio message."""
27
+ # Fetch media URL
28
+ media_url = get_media_url(media_id, access_token, phone_no_id)
29
+ if not media_url:
30
+ return "Failed to fetch media URL."
31
+
32
+ # Download the voice note
33
+ audio_path = download_voice_note(media_url, access_token)
34
+ if not audio_path:
35
+ return "Failed to download voice note."
36
+
37
+ # Transcribe audio
38
+ transcription = audio_transcribe(audio_path)
39
+ if not transcription:
40
+ return "Hi"
41
+ return transcription
42
+
43
+ def audio_transcribe(audio_path):
44
+ try:
45
+ # Load and preprocess audio
46
+ audio, sampling_rate = librosa.load(audio_path, sr=16000)
47
+ input_values = processor(audio, return_tensors="pt", sampling_rate=16000).input_values
48
+
49
+ # Perform speech-to-text
50
+ with torch.no_grad():
51
+ logits = model(input_values).logits
52
+ predicted_ids = torch.argmax(logits, dim=-1)
53
+ transcription = processor.batch_decode(predicted_ids)[0]
54
+
55
+ return transcription
56
+ except Exception as e:
57
+ return f"Error: {str(e)}"
58
+
59
+
60
+ def get_media_url(media_id, access_token, phone_no_id):
61
+ """Fetch media URL from Facebook Graph API."""
62
+ url = f"https://graph.facebook.com/v21.0/{media_id}?phone_number_id={phone_no_id}"
63
+ headers = {'Authorization': f'Bearer {access_token}'}
64
+
65
+ response = requests.get(url, headers=headers)
66
+ if response.status_code == 200:
67
+ return response.json().get('url', None)
68
+ return None
69
+
70
+ def download_voice_note(url, access_token):
71
+ """Download a voice note and store it in the current directory."""
72
+ headers = {"Authorization": f"Bearer {access_token}"}
73
+
74
+ response = requests.get(url, headers=headers, stream=True)
75
+ if response.status_code == 200:
76
+ file_name = "voice_note.mp3"
77
+ file_path = os.path.join(os.getcwd(), file_name) # Save to current directory
78
+
79
+ with open(file_path, 'wb') as f:
80
+ for chunk in response.iter_content(chunk_size=8192):
81
+ f.write(chunk)
82
+
83
+ print(f"✅ Download complete: {file_path}")
84
+ return file_path
85
+ else:
86
+ print(f"❌ Failed to download file. Status code: {response.status_code}")
87
+ return None
88
+
89
+ def upload_audio(audio_path, access_token, phone_no_id):
90
+ """Upload an audio file to WhatsApp Business API."""
91
+ url = f"https://graph.facebook.com/v21.0/{phone_no_id}/media"
92
+ payload = {'messaging_product': 'whatsapp'}
93
+
94
+ with open(audio_path, 'rb') as audio_file:
95
+ files = [('file', ('output.ogg', audio_file, 'audio/ogg'))]
96
+ headers = {'Authorization': f'Bearer {access_token}'}
97
+
98
+ response = requests.post(url, headers=headers, data=payload, files=files)
99
+ print(f"response is {response.text}")
100
+ return response.json().get('id', None)
101
+
102
+ def send_audio(phone_no, audio_id, access_token, phone_no_id):
103
+ """Send an audio message via WhatsApp API."""
104
+ url = f"https://graph.facebook.com/v21.0/{phone_no_id}/messages"
105
+
106
+ payload = json.dumps({
107
+ "messaging_product": "whatsapp",
108
+ "recipient_type": "individual",
109
+ "to": phone_no,
110
+ "type": "audio",
111
+ "audio": {"id": audio_id}
112
+ })
113
+ headers = {'Content-Type': 'application/json', 'Authorization': f'Bearer {access_token}'}
114
+
115
+ try:
116
+ response = requests.post(url, headers=headers, data=payload)
117
+ response.raise_for_status()
118
+ print(response.text)
119
+ return response.json()
120
+ except requests.exceptions.RequestException as e:
121
+ return str(e)
122
+
123
+ # Gradio Interface
124
+ iface = gr.Interface(
125
+ fn=process_media,
126
+ inputs=[
127
+ gr.Textbox(label="Media ID"),
128
+ gr.Textbox(label="Access Token"),
129
+ gr.Textbox(label="Phone Number ID"),
130
+ gr.Textbox(label="Recipient Phone Number"),
131
+ gr.Textbox(label="Business ID")
132
+ ],
133
+ outputs="text",
134
+ title="WhatsApp Audio Processor",
135
+ description="Enter Media ID, Access Token, Phone Number ID, and Recipient Phone Number to process and send audio."
136
+ )
137
+ iface.launch(debug=True)