oceansweep commited on
Commit
b927143
1 Parent(s): 7d17bca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -32
app.py CHANGED
@@ -76,17 +76,27 @@ config.read('config.txt')
76
 
77
  # API Keys
78
  anthropic_api_key = config.get('API', 'anthropic_api_key', fallback=None)
 
 
79
  cohere_api_key = config.get('API', 'cohere_api_key', fallback=None)
 
 
80
  groq_api_key = config.get('API', 'groq_api_key', fallback=None)
 
 
81
  openai_api_key = config.get('API', 'openai_api_key', fallback=None)
 
 
82
  huggingface_api_key = config.get('API', 'huggingface_api_key', fallback=None)
 
 
83
 
84
  # Models
85
  anthropic_model = config.get('API', 'anthropic_model', fallback='claude-3-sonnet-20240229')
86
  cohere_model = config.get('API', 'cohere_model', fallback='command-r-plus')
87
  groq_model = config.get('API', 'groq_model', fallback='FIXME')
88
  openai_model = config.get('API', 'openai_model', fallback='gpt-4-turbo')
89
- huggingface_model = config.get('API', 'huggingface_model', fallback='microsoft/Phi-3-mini-128k-instruct')
90
 
91
  # Local-Models
92
  kobold_api_IP = config.get('Local-API', 'kobold_api_IP', fallback='http://127.0.0.1:5000/api/v1/generate')
@@ -756,10 +766,10 @@ def speaker_diarize(video_file_path, segments, embedding_model = "pyannote/embed
756
  #
757
  #
758
 
759
- # Summarize with OpenAI ChatGPT
760
  def extract_text_from_segments(segments):
761
- logging.debug(f"openai: extracting text from {segments}")
762
  text = ' '.join([segment['text'] for segment in segments])
 
763
  return text
764
 
765
 
@@ -1154,6 +1164,33 @@ def save_summary_to_file(summary, file_path):
1154
  # Only to be used when configured with Gradio for HF Space
1155
  def summarize_with_huggingface(api_key, file_path):
1156
  logging.debug(f"huggingface: Summarization process starting...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1157
  if api_key == "":
1158
  api_key = os.environ.get("HF_TOKEN")
1159
  print("HUGGINGFACE API KEY CHECK: " + api_key)
@@ -1167,15 +1204,9 @@ def summarize_with_huggingface(api_key, file_path):
1167
 
1168
  api_key = os.environ.get('HF_TOKEN')
1169
  print("HUGGINGFACE API KEY CHECK #2: " + api_key)
1170
- headers = {
1171
- "Authorization": f"Bearer {api_key}"
1172
- }
1173
- model = "microsoft/Phi-3-mini-128k-instruct"
1174
- API_URL = f"https://api-inference.huggingface.co/models/{model}"
1175
- data = {
1176
- "inputs": text,
1177
- "parameters": {"max_length": 512, "min_length": 100} # You can adjust max_length and min_length as needed
1178
- }
1179
 
1180
  logging.debug("huggingface: Submitting request...")
1181
  response = requests.post(API_URL, headers=headers, json=data)
@@ -1200,6 +1231,16 @@ def summarize_with_huggingface(api_key, file_path):
1200
 
1201
 
1202
 
 
 
 
 
 
 
 
 
 
 
1203
  def launch_ui(demo_mode=False):
1204
  def process_transcription(json_data):
1205
  if json_data:
@@ -1207,24 +1248,6 @@ def launch_ui(demo_mode=False):
1207
  else:
1208
  return ""
1209
 
1210
- # dropdown.change(None, dropdown, None, _js=js)
1211
- # toggle_dark.click(
1212
- # None,
1213
- # _js="""
1214
- # () => {
1215
- # document.body.classList.toggle('dark');
1216
- # document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)'
1217
- # }
1218
- # """,
1219
- # )
1220
-
1221
- inputs = [
1222
- gr.components.Textbox(label="URL"),
1223
- gr.components.Number(value=2, label="Number of Speakers"),
1224
- gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model"),
1225
- gr.components.Number(value=0, label="Offset")
1226
- ]
1227
-
1228
  if not demo_mode:
1229
  inputs.extend([
1230
  gr.components.Dropdown(choices=["huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], value="huggingface", label="API Name"),
@@ -1233,12 +1256,19 @@ def launch_ui(demo_mode=False):
1233
  gr.components.Checkbox(value=False, label="Download Video")
1234
  ])
1235
 
 
 
 
 
 
 
 
1236
  iface = gr.Interface(
1237
  fn=lambda *args: process_url(*args, demo_mode=demo_mode),
1238
  inputs=inputs,
1239
  outputs=[
1240
  gr.components.Textbox(label="Transcription", value=lambda: "", max_lines=10),
1241
- gr.components.Textbox(label="Summary"),
1242
  gr.components.File(label="Download Transcription as JSON"),
1243
  gr.components.File(label="Download Summary as text", visible=lambda summary_file_path: summary_file_path is not None)
1244
  ],
@@ -1415,7 +1445,6 @@ if __name__ == "__main__":
1415
  parser.add_argument('input_path', type=str, help='Path or URL of the video', nargs='?')
1416
  parser.add_argument('-v','--video', action='store_true', help='Download the video instead of just the audio')
1417
  parser.add_argument('-api', '--api_name', type=str, help='API name for summarization (optional)')
1418
- parser.add_argument('-key', '--api_key', type=str, help='API key for summarization (optional)')
1419
  parser.add_argument('-ns', '--num_speakers', type=int, default=2, help='Number of speakers (default: 2)')
1420
  parser.add_argument('-wm', '--whisper_model', type=str, default='small.en', help='Whisper model (default: small.en)')
1421
  parser.add_argument('-off', '--offset', type=int, default=0, help='Offset in seconds (default: 0)')
 
76
 
77
  # API Keys
78
  anthropic_api_key = config.get('API', 'anthropic_api_key', fallback=None)
79
+ logging.debug(f"Loaded Anthropic API Key: {anthropic_api_key}")
80
+
81
  cohere_api_key = config.get('API', 'cohere_api_key', fallback=None)
82
+ logging.debug(f"Loaded cohere API Key: {cohere_api_key}")
83
+
84
  groq_api_key = config.get('API', 'groq_api_key', fallback=None)
85
+ logging.debug(f"Loaded groq API Key: {groq_api_key}")
86
+
87
  openai_api_key = config.get('API', 'openai_api_key', fallback=None)
88
+ logging.debug(f"Loaded openAI Face API Key: {openai_api_key}")
89
+
90
  huggingface_api_key = config.get('API', 'huggingface_api_key', fallback=None)
91
+ logging.debug(f"Loaded HuggingFace Face API Key: {huggingface_api_key}")
92
+
93
 
94
  # Models
95
  anthropic_model = config.get('API', 'anthropic_model', fallback='claude-3-sonnet-20240229')
96
  cohere_model = config.get('API', 'cohere_model', fallback='command-r-plus')
97
  groq_model = config.get('API', 'groq_model', fallback='FIXME')
98
  openai_model = config.get('API', 'openai_model', fallback='gpt-4-turbo')
99
+ huggingface_model = config.get('API', 'huggingface_model', fallback='CohereForAI/c4ai-command-r-plus')
100
 
101
  # Local-Models
102
  kobold_api_IP = config.get('Local-API', 'kobold_api_IP', fallback='http://127.0.0.1:5000/api/v1/generate')
 
766
  #
767
  #
768
 
 
769
  def extract_text_from_segments(segments):
770
+ logging.debug(f"Main: extracting text from {segments}")
771
  text = ' '.join([segment['text'] for segment in segments])
772
+ logging.debug(f"Main: Successfully extracted text from {segments}")
773
  return text
774
 
775
 
 
1164
  # Only to be used when configured with Gradio for HF Space
1165
  def summarize_with_huggingface(api_key, file_path):
1166
  logging.debug(f"huggingface: Summarization process starting...")
1167
+
1168
+ model = "microsoft/Phi-3-mini-128k-instruct"
1169
+ API_URL = f"https://api-inference.huggingface.co/models/{model}"
1170
+ headers = {"Authorization": f"Bearer {api_key}"}
1171
+
1172
+ with open(file_path, 'r') as file:
1173
+ segments = json.load(file)
1174
+ text = ''.join([segment['text'] for segment in segments])
1175
+
1176
+ # FIXME adjust max_length and min_length as needed
1177
+ data = {
1178
+ "inputs": text,
1179
+ "parameters": {"max_length": 4096, "min_length": 100}
1180
+ }
1181
+
1182
+ for attempt in range(max_retries):
1183
+ response = requests.post(API_URL, headers=headers, json=data)
1184
+ if response.status_code == 200:
1185
+ summary = response.json()[0]['summary_text']
1186
+ return summary, None
1187
+ elif response.status_code == 503:
1188
+ response_data = response.json()
1189
+ wait_time = response_data.get('estimated_time', 10)
1190
+ return None, f"Model is loading, retrying in {int(wait_time)} seconds..."
1191
+ # Sleep before retrying....
1192
+ time.sleep(wait_time)
1193
+
1194
  if api_key == "":
1195
  api_key = os.environ.get("HF_TOKEN")
1196
  print("HUGGINGFACE API KEY CHECK: " + api_key)
 
1204
 
1205
  api_key = os.environ.get('HF_TOKEN')
1206
  print("HUGGINGFACE API KEY CHECK #2: " + api_key)
1207
+
1208
+
1209
+
 
 
 
 
 
 
1210
 
1211
  logging.debug("huggingface: Submitting request...")
1212
  response = requests.post(API_URL, headers=headers, json=data)
 
1231
 
1232
 
1233
 
1234
+ def process_text(api_key,text_file):
1235
+ summary,message = summarize_with_huggingface(api_key,text_file)
1236
+ if summary:
1237
+ # Show summary on success
1238
+ return "Summary:",summary
1239
+ else:
1240
+ # Inform user about load/wait time
1241
+ return "Notice:",message
1242
+
1243
+
1244
  def launch_ui(demo_mode=False):
1245
  def process_transcription(json_data):
1246
  if json_data:
 
1248
  else:
1249
  return ""
1250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1251
  if not demo_mode:
1252
  inputs.extend([
1253
  gr.components.Dropdown(choices=["huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], value="huggingface", label="API Name"),
 
1256
  gr.components.Checkbox(value=False, label="Download Video")
1257
  ])
1258
 
1259
+ inputs = [
1260
+ gr.components.Textbox(label="URL"),
1261
+ gr.components.Number(value=2, label="Number of Speakers"),
1262
+ gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model"),
1263
+ gr.components.Number(value=0, label="Offset")
1264
+ ]
1265
+
1266
  iface = gr.Interface(
1267
  fn=lambda *args: process_url(*args, demo_mode=demo_mode),
1268
  inputs=inputs,
1269
  outputs=[
1270
  gr.components.Textbox(label="Transcription", value=lambda: "", max_lines=10),
1271
+ gr.components.Textbox(label="Summary or Status Message"),
1272
  gr.components.File(label="Download Transcription as JSON"),
1273
  gr.components.File(label="Download Summary as text", visible=lambda summary_file_path: summary_file_path is not None)
1274
  ],
 
1445
  parser.add_argument('input_path', type=str, help='Path or URL of the video', nargs='?')
1446
  parser.add_argument('-v','--video', action='store_true', help='Download the video instead of just the audio')
1447
  parser.add_argument('-api', '--api_name', type=str, help='API name for summarization (optional)')
 
1448
  parser.add_argument('-ns', '--num_speakers', type=int, default=2, help='Number of speakers (default: 2)')
1449
  parser.add_argument('-wm', '--whisper_model', type=str, default='small.en', help='Whisper model (default: small.en)')
1450
  parser.add_argument('-off', '--offset', type=int, default=0, help='Offset in seconds (default: 0)')