neonwatty commited on
Commit
39c54da
1 Parent(s): 34d4792

Update youtube_shorts_transcript_downloader/app.py

Browse files
youtube_shorts_transcript_downloader/app.py CHANGED
@@ -2,26 +2,34 @@ import streamlit as st
2
  import pandas as pd
3
  from youtube_shorts_transcript_downloader.transcripts import get_batch_transcripts
4
 
5
-
6
  st.title("YT Shorts Transcript Downloader")
7
  st.markdown(
8
- "instructions: upload a text file with valid youtube urls, one per line, to fetch transcripts"
9
  )
10
 
11
 
12
  base = st.container(border=True)
13
  with base:
14
- x, col1, col2 = st.columns([3, 20, 5])
 
15
  with col1:
 
 
 
16
  uploaded_file = st.file_uploader("Choose a File", type=["txt"])
17
- col2, col3, col4 = st.columns([3, 2, 3])
18
- with col2:
19
- trans_button_val = st.button(label="fetch transcripts", type="primary")
20
- with col3:
21
- empty_container = st.container()
22
- with col4:
23
- placeholder = st.empty()
 
 
 
24
 
 
25
 
26
  @st.cache_data
27
  def convert_df(df: pd.DataFrame) -> "csv":
@@ -34,8 +42,9 @@ def button_logic(youtube_short_urls: list) -> None:
34
  batch_transcripts = get_batch_transcripts(youtube_short_urls)
35
  df = pd.DataFrame(batch_transcripts)
36
  converted_dv = convert_df(df)
 
37
 
38
- with col4:
39
  st.download_button(
40
  label="Download transcripts",
41
  data=converted_dv,
@@ -49,14 +58,34 @@ def button_logic(youtube_short_urls: list) -> None:
49
  # default_file_path = main_dir + "/data/input/test_input.txt"
50
  youtube_short_urls = []
51
  if uploaded_file is not None:
 
 
 
 
 
52
  if uploaded_file.type == "text/plain":
53
  from io import StringIO
54
-
55
  stringio = StringIO(uploaded_file.read().decode("utf-8"))
56
  for line in stringio:
57
  youtube_short_urls.append(line.strip())
 
58
  # else:
59
  # youtube_short_urls = parse_input_file(default_file_path)
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  with st.spinner(text="transcript pull in progress..."):
62
- button_logic(youtube_short_urls)
 
2
  import pandas as pd
3
  from youtube_shorts_transcript_downloader.transcripts import get_batch_transcripts
4
 
5
+ st.set_page_config(page_title="YT Shorts Transcript Downloader", layout="wide")
6
  st.title("YT Shorts Transcript Downloader")
7
  st.markdown(
8
+ "instructions: enter in urls separated by commas or upload a text file with one url per line"
9
  )
10
 
11
 
12
  base = st.container(border=True)
13
  with base:
14
+ col1, sep_col, col2 = st.columns([5, 2, 5])
15
+
16
  with col1:
17
+ text_urls = st.text_area("youtube shorts urls", value="", placeholder="enter urls separated by commas - for example: https://www.youtube.com/shorts/o7a9hx-Pqyo, https://www.youtube.com/shorts/xkAYLnIsfX4")
18
+
19
+ with col2:
20
  uploaded_file = st.file_uploader("Choose a File", type=["txt"])
21
+
22
+ col3, col4, col5 = st.columns([3, 2, 3])
23
+ with col3:
24
+ trans_button_val = st.button(label="fetch transcripts", type="primary")
25
+ with col4:
26
+ empty_container = st.container()
27
+ with col5:
28
+ placeholder = st.empty()
29
+
30
+ download_area = st.container()
31
 
32
+ # https://www.youtube.com/shorts/o7a9hx-Pqyo, https://www.youtube.com/shorts/xkAYLnIsfX4
33
 
34
  @st.cache_data
35
  def convert_df(df: pd.DataFrame) -> "csv":
 
42
  batch_transcripts = get_batch_transcripts(youtube_short_urls)
43
  df = pd.DataFrame(batch_transcripts)
44
  converted_dv = convert_df(df)
45
+ st.write(df.head(1).to_dict())
46
 
47
+ with download_area:
48
  st.download_button(
49
  label="Download transcripts",
50
  data=converted_dv,
 
58
  # default_file_path = main_dir + "/data/input/test_input.txt"
59
  youtube_short_urls = []
60
  if uploaded_file is not None:
61
+ if text_urls is not None:
62
+ if len(text_urls.strip()) > 0:
63
+ st.warning("you can enter urls manually or from file but not both", icon="⚠️")
64
+ st.stop()
65
+
66
  if uploaded_file.type == "text/plain":
67
  from io import StringIO
68
+
69
  stringio = StringIO(uploaded_file.read().decode("utf-8"))
70
  for line in stringio:
71
  youtube_short_urls.append(line.strip())
72
+
73
  # else:
74
  # youtube_short_urls = parse_input_file(default_file_path)
75
 
76
+ if text_urls is not None:
77
+ if len(text_urls.strip()) > 0:
78
+ if uploaded_file is not None:
79
+ st.warning("you can enter urls manually or from file but not both", icon="⚠️")
80
+ st.stop()
81
+
82
+ try:
83
+ text_urls_split = text_urls.split(",")
84
+ text_urls_split = [v.strip() for v in text_urls_split]
85
+ youtube_short_urls = text_urls_split
86
+ except:
87
+ st.warning("please check your manually entered urls", icon="⚠️")
88
+ st.stop()
89
+
90
  with st.spinner(text="transcript pull in progress..."):
91
+ button_logic(youtube_short_urls)