JBHF commited on
Commit
c296411
1 Parent(s): 2dd36d5

app-08-04-2024-17u50m.py

Browse files
Files changed (1) hide show
  1. app-08-04-2024-17u50m.py +177 -0
app-08-04-2024-17u50m.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # JBHF/VERTAAL-APP-EAGLE-SHELTER/app.py - 26-03-2024 17u50m CET
2
+
3
+ # https://github.com/theevann/streamlit-audiorecorder
4
+ # An audio Recorder for streamlit
5
+ #
6
+ # Description
7
+ # Audio recorder component for streamlit.
8
+ # It creates a button to start the recording and takes three arguments:
9
+ # the start button text, the stop button text, and the pause button text.
10
+ # If the pause button text is not specified, the pause button is not displayed.
11
+ #
12
+ # Parameters
13
+ # The signature of the component is:
14
+ # audiorecorder(start_prompt="Start recording", stop_prompt="Stop recording", pause_prompt="", key=None):
15
+ # The prompt parameters are self-explanatory, and the optional key parameter is used internally by streamlit
16
+ # to properly distinguish multiple audiorecorders on the page.
17
+ #
18
+ # Return value
19
+ # The component's return value is a pydub AudioSegment.
20
+ #
21
+ # All AudioSegment methods are available, in particular you can:
22
+ # - Play the audio in the frontend with st.audio(audio.export().read())
23
+ # - Save the audio to a file with audio.export("audio.wav", format="wav")
24
+ # JB: Waarom zie ik in mijn HF Spaces omgeving de file "audio.wav" niet terug ?
25
+ # JB: 08-04-2024 - Mogelijk is caching al voldoende (anders file persistence)#
26
+ # Zie hiervoor:
27
+ #
28
+ # CACHING:
29
+ # ========
30
+ # STREAMLIT - Caching overview - Streamlit Docs - 07-04-2024 !!!!!
31
+ # https://docs.streamlit.io/develop/concepts/architecture/caching
32
+ #
33
+ # EVERNOTE :
34
+ # https://www.evernote.com/shard/s313/nl/41973486/31880952-8bd9-41ef-8047-ca844143e833/
35
+ # STREAMLIT - Caching overview - Streamlit Docs - 07-04-2024 !!!!!
36
+ #
37
+ # 08-04-2024
38
+ #
39
+ # EN
40
+ #
41
+ # PERSISTENCE:
42
+ # ============
43
+ # HF SPACES STREAMLIT APPS - GET PASSWORDS AND ACCESS TOKENS FROM HF ENVIRONMENT ! - PERSISTENT STORAGE ON HF SPACES ! - EAGLE SHELTER VERTAAL APP ETC ! - app.py · julien-c/persistent-data at main - 20-03-2024 !!!!! !!!!! !!!!!
44
+ # https://huggingface.co/spaces/julien-c/persistent-data/blob/main/app.py
45
+ #
46
+ # ——->
47
+ #
48
+ # DUPLICATED TO:
49
+ # https://huggingface.co/spaces/JBHF/persistent-data?logs=container
50
+ #
51
+ # EVERNOTE :
52
+ # https://www.evernote.com/shard/s313/nl/41973486/1b07098e-3376-4316-abb3-b3d0996ebf03/
53
+ # HF SPACES STREAMLIT APPS - GET PASSWORDS AND ACCESS TOKENS FROM HF ENVIRONMENT ! - PERSISTENT STORAGE ON HF SPACES ! - EAGLE SHELTER VERTAAL APP ETC ! - app.py · julien-c/persistent-data at main - 20-03-2024 !!!!! !!!!! !!!!!
54
+ #
55
+ # 08-04-2024
56
+ #
57
+
58
+ import streamlit as st
59
+
60
+
61
+
62
+ ###########################################################################################################
63
+ # TEST
64
+ # ZIE:
65
+ # infer_faster_whisper_large_v2 (CPU VERSIE !) 08-04-2024-COLAB-CPU-PYTHON3-tvscitechtalk.ipynb
66
+ # https://colab.research.google.com/drive/1EreiFx825oIrR2P43XSXjHXx01EWi6ZH#scrollTo=vuLjbPxexPDj&uniqifier=5
67
+
68
+ from faster_whisper import WhisperModel
69
+
70
+ model_size = "large-v2"
71
+
72
+ # Run on GPU with FP16
73
+ # model = WhisperModel(model_size, device="cuda", compute_type="float16") # ORIGINAL, DRAAIT OP COLAB T4 GPU OK
74
+
75
+ # TEST: Run on CPU
76
+ # model = WhisperModel(model_size, device="cpu", compute_type="float16") # JB, DRAAIT OP COLAB CPU OK ?
77
+ # ValueError: Requested float16 compute type, but the target device or backend do not support efficient float16 computation.
78
+ #
79
+ # st.write("Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")")
80
+ # model = WhisperModel(model_size, device="cpu") # , compute_type="float16") # JB, DRAAIT OP COLAB CPU OK: JA; HF SPACES STREAMLIT FREE TIER: JB OK !
81
+ # JB: Dit gebruikt mijn HF Token !
82
+ # st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")")
83
+
84
+ st.write("Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\")")
85
+ model = WhisperModel(model_size, device="cpu", compute_type="int8") # , compute_type="float16") # JB
86
+ # JB: Dit gebruikt mijn HF Token !
87
+ # st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")")
88
+ # LOADING OF model = WhisperModel(model_size, device="cpu") TAKES ABOUT 1 MINUTE ON HF SPACES STREAMLIT FREE TIER
89
+ #
90
+ st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\")")
91
+ # LOADING OF model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\") TAKES ABOUT 33 sec (Na RERUN 1 minute) ON HF SPACES STREAMLIT FREE TIER
92
+
93
+
94
+ # USING:
95
+ # model = WhisperModel(model_size, device="cpu", compute_type="int8") # JB
96
+ # segments, info = model.transcribe("sam_altman_lex_podcast_367.flac", beam_size=1)
97
+ # /content/Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3
98
+ segments, info = model.transcribe("Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3", beam_size=1)
99
+ # print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
100
+ st.write("Detected language '%s' with probability %f" % (info.language, info.language_probability))
101
+ st.write("")
102
+ st.write("info.all_language_probs : ", info.all_language_probs)
103
+ st.write("len(info.all_language_probs): ", len(info.all_language_probs))
104
+ # 99
105
+
106
+ st.write("")
107
+
108
+ st.write("info: ", info)
109
+
110
+ # Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3 :
111
+ st.write("info.duration: ", info.duration)
112
+ # 233.8249375
113
+ # time: 3.98 ms (started: 2024-03-15 10:55:15 +00:00)
114
+ minutes = int(233.8249375 / 60)
115
+ seconds = 233.8249375 - minutes*60
116
+ st.write(minutes," minutes and ", seconds, " seconds")
117
+
118
+
119
+ for segment in segments:
120
+ # print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
121
+ st.write("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
122
+
123
+ st.write("---------------------------------------------------------------------")
124
+
125
+ text_to_transcribe = ""
126
+ st.write("TOTAL TEXT TO TRANSCRIBE:")
127
+ for segment in segments:
128
+ st.write(segment.text)
129
+ text_to_transcribe = text_to_transcribe + " " + segment
130
+ # print(segment)
131
+
132
+ st.write("text_to_transcribe: ", text_to_transcribe)
133
+
134
+
135
+ ###########################################################################################################
136
+
137
+
138
+ #
139
+ # Installation:
140
+ # pip install streamlit-audiorecorder
141
+ # Note: This package uses ffmpeg, so it should be installed for this audiorecorder to work properly.
142
+ #
143
+ # On ubuntu/debian: sudo apt update && sudo apt install ffmpeg
144
+ # On mac: brew install ffmpeg
145
+
146
+ import streamlit as st
147
+ from audiorecorder import audiorecorder
148
+
149
+ st.title("Audio Recorder")
150
+ # audiorecorder(start_prompt="Start recording", stop_prompt="Stop recording", pause_prompt="", key=None):
151
+ audio = audiorecorder("Click to record", "Click to stop recording", "Click to pause recording")
152
+
153
+
154
+ # JB:
155
+ # https://docs.streamlit.io/develop/concepts/architecture/caching
156
+ # @st.cache_data
157
+ @st.cache_resource # 👈 Add the caching decorator
158
+ def audio_export(audio_wav_file, format):
159
+ # audio.export("audio.wav", format="wav") # ORIGINAL
160
+ audio.export(audio_wav_file, format=format)
161
+
162
+ if len(audio) > 0:
163
+ # To play audio in frontend:
164
+ st.audio(audio.export().read())
165
+
166
+ # To save audio to a file, use pydub export method:
167
+ # https://docs.streamlit.io/develop/concepts/architecture/caching
168
+ # @st.cache_data
169
+ # @st.cache_data
170
+ # audio.export("audio.wav", format="wav") # ORIGINAL
171
+ audio_export("audio.wav", format="wav") # JB 08-04-2024
172
+
173
+ # To get audio properties, use pydub AudioSegment properties:
174
+ st.write(f"Frame rate: {audio.frame_rate}, Frame width: {audio.frame_width}, Duration: {audio.duration_seconds} seconds")
175
+
176
+
177
+ st.button("Rerun")