abidlabs HF staff commited on
Commit
add165b
Β·
1 Parent(s): 88322f7
Files changed (5) hide show
  1. __pycache__/utils.cpython-312.pyc +0 -0
  2. app.py +55 -44
  3. clean.py +12 -0
  4. transcribe.py +1 -0
  5. utils.py +13 -2
__pycache__/utils.cpython-312.pyc CHANGED
Binary files a/__pycache__/utils.cpython-312.pyc and b/__pycache__/utils.cpython-312.pyc differ
 
app.py CHANGED
@@ -4,51 +4,62 @@ import transcribe
4
 
5
  with gr.Blocks(theme="base") as demo:
6
  gr.Markdown("<center><h1> πŸ”Š Transcription Delight </h1></center>")
7
- with gr.Tabs(selected="result") as tabs:
8
- with gr.Tab("Input"):
9
- with gr.Row():
10
- with gr.Column():
11
- source = gr.Radio(label="Source type", choices=[("Audio", "audio"), ("Video", "video"), ("YouTube URL", "youtube")], value="audio")
12
- @gr.render(inputs=source)
13
- def show_source(s):
14
- if s == "audio":
15
- source_component = gr.Audio(type="filepath")
16
- elif s == "video":
17
- source_component = gr.Video()
18
- else:
19
- source_component = gr.Textbox(placeholder="https://www.youtube.com/watch?v=44vi31hehw4")
20
- preview = gr.HTML(label="Video preview")
21
- source_component.change(utils.convert_to_embed_url, source_component, preview)
22
- transcribe_btn.click(
23
- lambda : gr.Tabs(selected="result"),
24
- None,
25
- tabs
26
- ).then(
27
- utils.generate_audio,
28
- [source, source_component],
29
- [download_audio],
30
- show_progress="minimal"
31
- ).then(
32
- transcribe.transcribe,
33
- [download_audio],
34
- [preliminary_transcript],
35
- show_progress="hidden"
36
- )
37
 
38
- with gr.Column():
39
- gr.Dropdown(label="Languages", choices=["(Autodetect)", "English"], value="(Autodetect)")
40
- gr.CheckboxGroup(label="Cleanup Transcript with LLM", choices=["Remove typos", "Separate into paragraphs"])
41
- gr.Checkbox(label="Diarize Speakers (coming soon)", interactive=False)
42
 
43
- transcribe_btn = gr.Button("Transcribe audio ✨", variant="primary")
44
- source.change(utils.transcribe_button, source, transcribe_btn)
45
-
46
- with gr.Tab("Result", id="result"):
47
- with gr.Row():
48
- with gr.Column():
49
- download_audio = gr.DownloadButton("Downloading Audio File (please wait...)", variant="primary", interactive=False, size="sm")
50
- preliminary_transcript = gr.Textbox(info="Preliminary transcript", lines=10, show_copy_button=True, show_label=False, interactive=False)
51
- with gr.Column():
52
- gr.Markdown("*Final transcript will appear here*")
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  demo.launch()
 
4
 
5
  with gr.Blocks(theme="base") as demo:
6
  gr.Markdown("<center><h1> πŸ”Š Transcription Delight </h1></center>")
7
+ gr.Markdown("### Step 1: Generate Raw Transcript")
8
+ with gr.Row():
9
+ with gr.Column():
10
+ source = gr.Radio(label="Source type", choices=[("Audio", "audio"), ("Video", "video"), ("YouTube URL", "youtube")], value="audio")
11
+ @gr.render(inputs=source)
12
+ def show_source(s):
13
+ if s == "audio":
14
+ source_component = gr.Audio(type="filepath")
15
+ elif s == "video":
16
+ source_component = gr.Video()
17
+ else:
18
+ source_component = gr.Textbox(placeholder="https://www.youtube.com/watch?v=44vi31hehw4")
19
+ preview = gr.HTML(label="Video preview")
20
+ source_component.change(utils.convert_to_embed_url, source_component, preview)
21
+ # transcribe_btn.click(
22
+ # lambda : gr.Tabs(selected="result"),
23
+ # None,
24
+ # tabs
25
+ # ).then(
26
+ # utils.generate_audio,
27
+ # [source, source_component],
28
+ # [download_audio],
29
+ # show_progress="minimal"
30
+ # ).then(
31
+ # transcribe.transcribe,
32
+ # [download_audio],
33
+ # [preliminary_transcript],
34
+ # show_progress="hidden"
35
+ # )
 
36
 
37
+ with gr.Column():
38
+ transcribe_btn = gr.Button("Transcribe audio πŸ“œ", variant="primary")
39
+ preliminary_transcript = gr.Textbox(info="Raw transcript", lines=10, show_copy_button=True, show_label=False, interactive=False)
 
40
 
41
+
42
+ source.change(utils.transcribe_button, source, transcribe_btn)
43
+
44
+ gr.Markdown("### Step 2: Clean with an LLM")
45
+ with gr.Row():
46
+ with gr.Column():
47
+ cleanup_options = gr.CheckboxGroup(label="Cleanup Transcript with LLM", choices=["Remove typos", "Separate into paragraphs"])
48
+ llm_prompt = gr.Textbox(label="LLM Prompt", visible=False, lines=3)
49
+ cleanup_options.change(
50
+ utils.generate_prompt,
51
+ cleanup_options,
52
+ llm_prompt
53
+ )
54
+
55
+ with gr.Column():
56
+ clean_btn = gr.Button("Clean transcript ✨", variant="primary", interactive=False)
57
+ gr.Markdown("*Final transcript will appear here*")
58
+ # with gr.Tab("Result", id="result"):
59
+ # with gr.Row():
60
+ # with gr.Column():
61
+ # download_audio = gr.DownloadButton("Downloading Audio File (please wait...)", variant="primary", interactive=False, size="sm")
62
+ # preliminary_transcript = gr.Textbox(info="Raw transcript", lines=10, show_copy_button=True, show_label=False, interactive=False)
63
+ # with gr.Column():
64
 
65
  demo.launch()
clean.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient
2
+
3
+ MODEL_NAME = "meta-llama/Meta-Llama-3-70b-Instruct"
4
+
5
+ def clean_transcript(prompt, transcript):
6
+ messages = [
7
+ {"role": "user", "content": PROMPT}
8
+ ]
9
+ client = InferenceClient(model=MODEL_NAME)
10
+ for c in client.chat_completion(messages, max_tokens=200, stream=True):
11
+ token = c.choices[0].delta.content
12
+ print(token, end="")
transcribe.py CHANGED
@@ -25,3 +25,4 @@ def transcribe_audio_in_chunks(audio_path, chunk_length_ms):
25
  transcription = transcribe_segment(chunk, i)
26
  yield transcription
27
 
 
 
25
  transcription = transcribe_segment(chunk, i)
26
  yield transcription
27
 
28
+
utils.py CHANGED
@@ -43,9 +43,9 @@ def convert_video_to_audio(input_file):
43
 
44
  def transcribe_button(source):
45
  if source == "audio":
46
- return gr.Button("Transcribe audio ✨")
47
  else:
48
- return gr.Button("Transcribe video ✨")
49
 
50
  def generate_audio(source, source_file):
51
  if source == "audio":
@@ -57,3 +57,14 @@ def generate_audio(source, source_file):
57
  gr.Info("Downloading audio from YouTube...")
58
  audio_file = download_audio_from_youtube(source_file)
59
  return gr.DownloadButton("Downloading Audio File", value=audio_file, interactive=True)
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  def transcribe_button(source):
45
  if source == "audio":
46
+ return gr.Button("Transcribe audio πŸ“œ")
47
  else:
48
+ return gr.Button("Transcribe video πŸ“œ")
49
 
50
  def generate_audio(source, source_file):
51
  if source == "audio":
 
57
  gr.Info("Downloading audio from YouTube...")
58
  audio_file = download_audio_from_youtube(source_file)
59
  return gr.DownloadButton("Downloading Audio File", value=audio_file, interactive=True)
60
+
61
+ def generate_prompt(cleanup):
62
+ if not cleanup:
63
+ return gr.Textbox(visible=False)
64
+ elif cleanup == ["Remove typos"]:
65
+ return gr.Textbox(visible=True, placeholder="The following is a raw transcript from an automatic transcription system. Remove the typos from the transcript.")
66
+ elif cleanup == ["Separate into paragraphs"]:
67
+ return gr.Textbox(visible=True, placeholder="The following is a raw transcript from an automatic transcription system. Separate the transcript into paragraphs based on logical breaks.")
68
+ elif cleanup == ["Remove typos", "Separate into paragraphs"]:
69
+ return gr.Textbox(visible=True, placeholder="The following is a raw transcript from an automatic transcription system. Remove the typos and separate the transcript into paragraphs based on logical breaks.")
70
+