Spaces:
Running
Running
oceansweep
commited on
Commit
•
d6b96dc
1
Parent(s):
62a9bae
Update app.py
Browse files
app.py
CHANGED
@@ -45,23 +45,22 @@ import yt_dlp
|
|
45 |
#
|
46 |
#
|
47 |
# Usage:
|
48 |
-
# Transcribe a single URL:
|
49 |
-
# python diarize.py https://example.com/video.mp4
|
50 |
#
|
51 |
-
#
|
52 |
-
#
|
53 |
#
|
54 |
-
#
|
55 |
-
#
|
56 |
#
|
57 |
-
#
|
58 |
-
#
|
59 |
-
#
|
60 |
-
#
|
61 |
-
#
|
62 |
-
#
|
63 |
-
#
|
64 |
-
#
|
|
|
65 |
#
|
66 |
###
|
67 |
|
@@ -350,7 +349,7 @@ def process_local_file(file_path):
|
|
350 |
# Video Download/Handling
|
351 |
#
|
352 |
|
353 |
-
def process_url(input_path, num_speakers=2, whisper_model="small.en", offset=0, api_name=None, api_key=None, vad_filter=False, download_video_flag=False, demo_mode=True):
|
354 |
if demo_mode:
|
355 |
api_name = "huggingface"
|
356 |
api_key = os.environ.get(HF_TOKEN)
|
@@ -793,7 +792,7 @@ def summarize_with_openai(api_key, file_path, model):
|
|
793 |
}
|
794 |
|
795 |
logging.debug("openai: Preparing data + prompt for submittal")
|
796 |
-
|
797 |
data = {
|
798 |
"model": model,
|
799 |
"messages": [
|
@@ -803,7 +802,7 @@ def summarize_with_openai(api_key, file_path, model):
|
|
803 |
},
|
804 |
{
|
805 |
"role": "user",
|
806 |
-
"content":
|
807 |
}
|
808 |
],
|
809 |
"max_tokens": 4096, # Adjust tokens as needed
|
@@ -846,7 +845,7 @@ def summarize_with_claude(api_key, file_path, model):
|
|
846 |
logging.debug("anthropic: Prepping data + prompt for submittal")
|
847 |
user_message = {
|
848 |
"role": "user",
|
849 |
-
"content": f"{text} \n\n\n\
|
850 |
}
|
851 |
|
852 |
data = {
|
@@ -913,10 +912,10 @@ def summarize_with_cohere(api_key, file_path, model):
|
|
913 |
'Authorization': f'Bearer {api_key}'
|
914 |
}
|
915 |
|
916 |
-
|
917 |
data = {
|
918 |
"chat_history": [
|
919 |
-
{"role": "USER", "message":
|
920 |
],
|
921 |
"message": "Please provide a summary.",
|
922 |
"model": model,
|
@@ -964,12 +963,12 @@ def summarize_with_groq(api_key, file_path, model):
|
|
964 |
'Content-Type': 'application/json'
|
965 |
}
|
966 |
|
967 |
-
|
968 |
data = {
|
969 |
"messages": [
|
970 |
{
|
971 |
"role": "user",
|
972 |
-
"content":
|
973 |
}
|
974 |
],
|
975 |
"model": model
|
@@ -1021,12 +1020,13 @@ def summarize_with_llama(api_url, file_path, token):
|
|
1021 |
headers['Authorization'] = f'Bearer {token}'
|
1022 |
|
1023 |
|
1024 |
-
|
|
|
1025 |
data = {
|
1026 |
-
"prompt":
|
1027 |
}
|
1028 |
|
1029 |
-
logging.debug("llama: Submitting request to API endpoint")
|
1030 |
print("llama: Submitting request to API endpoint")
|
1031 |
response = requests.post(api_url, headers=headers, json=data)
|
1032 |
response_data = response.json()
|
@@ -1064,13 +1064,13 @@ def summarize_with_kobold(api_url, file_path):
|
|
1064 |
'content-type': 'application/json',
|
1065 |
}
|
1066 |
# FIXME
|
1067 |
-
|
1068 |
-
logging.debug(
|
1069 |
# Values literally c/p from the api docs....
|
1070 |
data = {
|
1071 |
"max_context_length": 8096,
|
1072 |
"max_length": 4096,
|
1073 |
-
"prompt":
|
1074 |
}
|
1075 |
|
1076 |
logging.debug("kobold: Submitting request to API endpoint")
|
@@ -1114,9 +1114,9 @@ def summarize_with_oobabooga(api_url, file_path):
|
|
1114 |
'content-type': 'application/json',
|
1115 |
}
|
1116 |
|
1117 |
-
prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a french bakery baking cakes. It is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are my favorite."
|
1118 |
-
#
|
1119 |
-
|
1120 |
|
1121 |
data = {
|
1122 |
"mode": "chat",
|
@@ -1268,6 +1268,7 @@ def launch_ui(demo_mode=False):
|
|
1268 |
gr.components.Textbox(label="URL of video to be Transcribed/Summarized"),
|
1269 |
gr.components.Number(value=2, label="Number of Speakers (for Diarization)"),
|
1270 |
gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model (Can ignore this)"),
|
|
|
1271 |
gr.components.Number(value=0, label="Offset time to start transcribing from\n\n (helpful if you only want part of a video/lecture)")
|
1272 |
]
|
1273 |
|
@@ -1316,6 +1317,7 @@ def launch_ui(demo_mode=False):
|
|
1316 |
####################################################################################################################################
|
1317 |
# Main()
|
1318 |
#
|
|
|
1319 |
def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False, demo_mode=False):
|
1320 |
if input_path is None and args.user_interface:
|
1321 |
return []
|
|
|
45 |
#
|
46 |
#
|
47 |
# Usage:
|
|
|
|
|
48 |
#
|
49 |
+
# Download Audio only from URL -> Transcribe audio:
|
50 |
+
# python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s`
|
51 |
#
|
52 |
+
# Download Audio+Video from URL -> Transcribe audio from Video:**
|
53 |
+
# python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s`
|
54 |
#
|
55 |
+
# Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` (llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:**
|
56 |
+
# python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s -api <your choice of API>` - Make sure to put your API key into `config.txt` under the appropriate API variable
|
57 |
+
#
|
58 |
+
# Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
|
59 |
+
# python summarize.py ./local/file_on_your/system --api_name <API_name>`
|
60 |
+
#
|
61 |
+
# Run it as a WebApp**
|
62 |
+
# python summarize.py -gui` - This requires you to either stuff your API keys into the `config.txt` file, or pass them into the app every time you want to use it.
|
63 |
+
# Can be helpful for setting up a shared instance, but not wanting people to perform inference on your server.
|
64 |
#
|
65 |
###
|
66 |
|
|
|
349 |
# Video Download/Handling
|
350 |
#
|
351 |
|
352 |
+
def process_url(input_path, num_speakers=2, whisper_model="small.en", offset=0, api_name=None, api_key=None, vad_filter=False, download_video_flag=False,custom_prompt=None, demo_mode=True):
|
353 |
if demo_mode:
|
354 |
api_name = "huggingface"
|
355 |
api_key = os.environ.get(HF_TOKEN)
|
|
|
792 |
}
|
793 |
|
794 |
logging.debug("openai: Preparing data + prompt for submittal")
|
795 |
+
openai_prompt = f"{text} \n\n\n\n{prompt_text}"
|
796 |
data = {
|
797 |
"model": model,
|
798 |
"messages": [
|
|
|
802 |
},
|
803 |
{
|
804 |
"role": "user",
|
805 |
+
"content": openai_prompt
|
806 |
}
|
807 |
],
|
808 |
"max_tokens": 4096, # Adjust tokens as needed
|
|
|
845 |
logging.debug("anthropic: Prepping data + prompt for submittal")
|
846 |
user_message = {
|
847 |
"role": "user",
|
848 |
+
"content": f"{text} \n\n\n\n{prompt_text}"
|
849 |
}
|
850 |
|
851 |
data = {
|
|
|
912 |
'Authorization': f'Bearer {api_key}'
|
913 |
}
|
914 |
|
915 |
+
cohere_prompt = f"{text} \n\n\n\n{prompt_text}"
|
916 |
data = {
|
917 |
"chat_history": [
|
918 |
+
{"role": "USER", "message": cohere_prompt}
|
919 |
],
|
920 |
"message": "Please provide a summary.",
|
921 |
"model": model,
|
|
|
963 |
'Content-Type': 'application/json'
|
964 |
}
|
965 |
|
966 |
+
groq_prompt = f"{text} \n\n\n\n{prompt_text}"
|
967 |
data = {
|
968 |
"messages": [
|
969 |
{
|
970 |
"role": "user",
|
971 |
+
"content": groq_prompt
|
972 |
}
|
973 |
],
|
974 |
"model": model
|
|
|
1020 |
headers['Authorization'] = f'Bearer {token}'
|
1021 |
|
1022 |
|
1023 |
+
llama_prompt = f"{text} \n\n\n\n{prompt_text}"
|
1024 |
+
logging.debug(f"llama: Complete prompt is: {llama_prompt}")
|
1025 |
data = {
|
1026 |
+
"prompt": llama_prompt
|
1027 |
}
|
1028 |
|
1029 |
+
#logging.debug(f"llama: Submitting request to API endpoint {llama_prompt}")
|
1030 |
print("llama: Submitting request to API endpoint")
|
1031 |
response = requests.post(api_url, headers=headers, json=data)
|
1032 |
response_data = response.json()
|
|
|
1064 |
'content-type': 'application/json',
|
1065 |
}
|
1066 |
# FIXME
|
1067 |
+
kobold_prompt = f"{text} \n\n\n\n{prompt_text}"
|
1068 |
+
logging.debug(kobold_prompt)
|
1069 |
# Values literally c/p from the api docs....
|
1070 |
data = {
|
1071 |
"max_context_length": 8096,
|
1072 |
"max_length": 4096,
|
1073 |
+
"prompt": kobold_prompt,
|
1074 |
}
|
1075 |
|
1076 |
logging.debug("kobold: Submitting request to API endpoint")
|
|
|
1114 |
'content-type': 'application/json',
|
1115 |
}
|
1116 |
|
1117 |
+
#prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a french bakery baking cakes. It is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are my favorite."
|
1118 |
+
#prompt_text += f"\n\n{text}" # Uncomment this line if you want to include the text variable
|
1119 |
+
ooba_prompt = f"{text}\n\n\n\n{prompt_text}"
|
1120 |
|
1121 |
data = {
|
1122 |
"mode": "chat",
|
|
|
1268 |
gr.components.Textbox(label="URL of video to be Transcribed/Summarized"),
|
1269 |
gr.components.Number(value=2, label="Number of Speakers (for Diarization)"),
|
1270 |
gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model (Can ignore this)"),
|
1271 |
+
gr.components.Textbox(label="Custom Prompt", value="Please provide a detailed, bulleted list of the points made throughout the transcribed video and any supporting arguments made for said points", lines=3),
|
1272 |
gr.components.Number(value=0, label="Offset time to start transcribing from\n\n (helpful if you only want part of a video/lecture)")
|
1273 |
]
|
1274 |
|
|
|
1317 |
####################################################################################################################################
|
1318 |
# Main()
|
1319 |
#
|
1320 |
+
|
1321 |
def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False, demo_mode=False):
|
1322 |
if input_path is None and args.user_interface:
|
1323 |
return []
|