Spaces:
Running
Running
app-08-04-2024-17u50m.py
Browse files- app-08-04-2024-17u50m.py +177 -0
app-08-04-2024-17u50m.py
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# JBHF/VERTAAL-APP-EAGLE-SHELTER/app.py - 26-03-2024 17u50m CET
|
2 |
+
|
3 |
+
# https://github.com/theevann/streamlit-audiorecorder
|
4 |
+
# An audio Recorder for streamlit
|
5 |
+
#
|
6 |
+
# Description
|
7 |
+
# Audio recorder component for streamlit.
|
8 |
+
# It creates a button to start the recording and takes three arguments:
|
9 |
+
# the start button text, the stop button text, and the pause button text.
|
10 |
+
# If the pause button text is not specified, the pause button is not displayed.
|
11 |
+
#
|
12 |
+
# Parameters
|
13 |
+
# The signature of the component is:
|
14 |
+
# audiorecorder(start_prompt="Start recording", stop_prompt="Stop recording", pause_prompt="", key=None):
|
15 |
+
# The prompt parameters are self-explanatory, and the optional key parameter is used internally by streamlit
|
16 |
+
# to properly distinguish multiple audiorecorders on the page.
|
17 |
+
#
|
18 |
+
# Return value
|
19 |
+
# The component's return value is a pydub AudioSegment.
|
20 |
+
#
|
21 |
+
# All AudioSegment methods are available, in particular you can:
|
22 |
+
# - Play the audio in the frontend with st.audio(audio.export().read())
|
23 |
+
# - Save the audio to a file with audio.export("audio.wav", format="wav")
|
24 |
+
# JB: Waarom zie ik in mijn HF Spaces omgeving de file "audio.wav" niet terug ?
|
25 |
+
# JB: 08-04-2024 - Mogelijk is caching al voldoende (anders file persistence)#
|
26 |
+
# Zie hiervoor:
|
27 |
+
#
|
28 |
+
# CACHING:
|
29 |
+
# ========
|
30 |
+
# STREAMLIT - Caching overview - Streamlit Docs - 07-04-2024 !!!!!
|
31 |
+
# https://docs.streamlit.io/develop/concepts/architecture/caching
|
32 |
+
#
|
33 |
+
# EVERNOTE :
|
34 |
+
# https://www.evernote.com/shard/s313/nl/41973486/31880952-8bd9-41ef-8047-ca844143e833/
|
35 |
+
# STREAMLIT - Caching overview - Streamlit Docs - 07-04-2024 !!!!!
|
36 |
+
#
|
37 |
+
# 08-04-2024
|
38 |
+
#
|
39 |
+
# EN
|
40 |
+
#
|
41 |
+
# PERSISTENCE:
|
42 |
+
# ============
|
43 |
+
# HF SPACES STREAMLIT APPS - GET PASSWORDS AND ACCESS TOKENS FROM HF ENVIRONMENT ! - PERSISTENT STORAGE ON HF SPACES ! - EAGLE SHELTER VERTAAL APP ETC ! - app.py · julien-c/persistent-data at main - 20-03-2024 !!!!! !!!!! !!!!!
|
44 |
+
# https://huggingface.co/spaces/julien-c/persistent-data/blob/main/app.py
|
45 |
+
#
|
46 |
+
# ——->
|
47 |
+
#
|
48 |
+
# DUPLICATED TO:
|
49 |
+
# https://huggingface.co/spaces/JBHF/persistent-data?logs=container
|
50 |
+
#
|
51 |
+
# EVERNOTE :
|
52 |
+
# https://www.evernote.com/shard/s313/nl/41973486/1b07098e-3376-4316-abb3-b3d0996ebf03/
|
53 |
+
# HF SPACES STREAMLIT APPS - GET PASSWORDS AND ACCESS TOKENS FROM HF ENVIRONMENT ! - PERSISTENT STORAGE ON HF SPACES ! - EAGLE SHELTER VERTAAL APP ETC ! - app.py · julien-c/persistent-data at main - 20-03-2024 !!!!! !!!!! !!!!!
|
54 |
+
#
|
55 |
+
# 08-04-2024
|
56 |
+
#
|
57 |
+
|
58 |
+
import streamlit as st
|
59 |
+
|
60 |
+
|
61 |
+
|
62 |
+
###########################################################################################################
|
63 |
+
# TEST
|
64 |
+
# ZIE:
|
65 |
+
# infer_faster_whisper_large_v2 (CPU VERSIE !) 08-04-2024-COLAB-CPU-PYTHON3-tvscitechtalk.ipynb
|
66 |
+
# https://colab.research.google.com/drive/1EreiFx825oIrR2P43XSXjHXx01EWi6ZH#scrollTo=vuLjbPxexPDj&uniqifier=5
|
67 |
+
|
68 |
+
from faster_whisper import WhisperModel
|
69 |
+
|
70 |
+
model_size = "large-v2"
|
71 |
+
|
72 |
+
# Run on GPU with FP16
|
73 |
+
# model = WhisperModel(model_size, device="cuda", compute_type="float16") # ORIGINAL, DRAAIT OP COLAB T4 GPU OK
|
74 |
+
|
75 |
+
# TEST: Run on CPU
|
76 |
+
# model = WhisperModel(model_size, device="cpu", compute_type="float16") # JB, DRAAIT OP COLAB CPU OK ?
|
77 |
+
# ValueError: Requested float16 compute type, but the target device or backend do not support efficient float16 computation.
|
78 |
+
#
|
79 |
+
# st.write("Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")")
|
80 |
+
# model = WhisperModel(model_size, device="cpu") # , compute_type="float16") # JB, DRAAIT OP COLAB CPU OK: JA; HF SPACES STREAMLIT FREE TIER: JB OK !
|
81 |
+
# JB: Dit gebruikt mijn HF Token !
|
82 |
+
# st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")")
|
83 |
+
|
84 |
+
st.write("Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\")")
|
85 |
+
model = WhisperModel(model_size, device="cpu", compute_type="int8") # , compute_type="float16") # JB
|
86 |
+
# JB: Dit gebruikt mijn HF Token !
|
87 |
+
# st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")")
|
88 |
+
# LOADING OF model = WhisperModel(model_size, device="cpu") TAKES ABOUT 1 MINUTE ON HF SPACES STREAMLIT FREE TIER
|
89 |
+
#
|
90 |
+
st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\")")
|
91 |
+
# LOADING OF model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\") TAKES ABOUT 33 sec (Na RERUN 1 minute) ON HF SPACES STREAMLIT FREE TIER
|
92 |
+
|
93 |
+
|
94 |
+
# USING:
|
95 |
+
# model = WhisperModel(model_size, device="cpu", compute_type="int8") # JB
|
96 |
+
# segments, info = model.transcribe("sam_altman_lex_podcast_367.flac", beam_size=1)
|
97 |
+
# /content/Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3
|
98 |
+
segments, info = model.transcribe("Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3", beam_size=1)
|
99 |
+
# print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
|
100 |
+
st.write("Detected language '%s' with probability %f" % (info.language, info.language_probability))
|
101 |
+
st.write("")
|
102 |
+
st.write("info.all_language_probs : ", info.all_language_probs)
|
103 |
+
st.write("len(info.all_language_probs): ", len(info.all_language_probs))
|
104 |
+
# 99
|
105 |
+
|
106 |
+
st.write("")
|
107 |
+
|
108 |
+
st.write("info: ", info)
|
109 |
+
|
110 |
+
# Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3 :
|
111 |
+
st.write("info.duration: ", info.duration)
|
112 |
+
# 233.8249375
|
113 |
+
# time: 3.98 ms (started: 2024-03-15 10:55:15 +00:00)
|
114 |
+
minutes = int(233.8249375 / 60)
|
115 |
+
seconds = 233.8249375 - minutes*60
|
116 |
+
st.write(minutes," minutes and ", seconds, " seconds")
|
117 |
+
|
118 |
+
|
119 |
+
for segment in segments:
|
120 |
+
# print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
|
121 |
+
st.write("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
|
122 |
+
|
123 |
+
st.write("---------------------------------------------------------------------")
|
124 |
+
|
125 |
+
text_to_transcribe = ""
|
126 |
+
st.write("TOTAL TEXT TO TRANSCRIBE:")
|
127 |
+
for segment in segments:
|
128 |
+
st.write(segment.text)
|
129 |
+
text_to_transcribe = text_to_transcribe + " " + segment
|
130 |
+
# print(segment)
|
131 |
+
|
132 |
+
st.write("text_to_transcribe: ", text_to_transcribe)
|
133 |
+
|
134 |
+
|
135 |
+
###########################################################################################################
|
136 |
+
|
137 |
+
|
138 |
+
#
|
139 |
+
# Installation:
|
140 |
+
# pip install streamlit-audiorecorder
|
141 |
+
# Note: This package uses ffmpeg, so it should be installed for this audiorecorder to work properly.
|
142 |
+
#
|
143 |
+
# On ubuntu/debian: sudo apt update && sudo apt install ffmpeg
|
144 |
+
# On mac: brew install ffmpeg
|
145 |
+
|
146 |
+
import streamlit as st
|
147 |
+
from audiorecorder import audiorecorder
|
148 |
+
|
149 |
+
st.title("Audio Recorder")
|
150 |
+
# audiorecorder(start_prompt="Start recording", stop_prompt="Stop recording", pause_prompt="", key=None):
|
151 |
+
audio = audiorecorder("Click to record", "Click to stop recording", "Click to pause recording")
|
152 |
+
|
153 |
+
|
154 |
+
# JB:
|
155 |
+
# https://docs.streamlit.io/develop/concepts/architecture/caching
|
156 |
+
# @st.cache_data
|
157 |
+
@st.cache_resource # 👈 Add the caching decorator
|
158 |
+
def audio_export(audio_wav_file, format):
|
159 |
+
# audio.export("audio.wav", format="wav") # ORIGINAL
|
160 |
+
audio.export(audio_wav_file, format=format)
|
161 |
+
|
162 |
+
if len(audio) > 0:
|
163 |
+
# To play audio in frontend:
|
164 |
+
st.audio(audio.export().read())
|
165 |
+
|
166 |
+
# To save audio to a file, use pydub export method:
|
167 |
+
# https://docs.streamlit.io/develop/concepts/architecture/caching
|
168 |
+
# @st.cache_data
|
169 |
+
# @st.cache_data
|
170 |
+
# audio.export("audio.wav", format="wav") # ORIGINAL
|
171 |
+
audio_export("audio.wav", format="wav") # JB 08-04-2024
|
172 |
+
|
173 |
+
# To get audio properties, use pydub AudioSegment properties:
|
174 |
+
st.write(f"Frame rate: {audio.frame_rate}, Frame width: {audio.frame_width}, Duration: {audio.duration_seconds} seconds")
|
175 |
+
|
176 |
+
|
177 |
+
st.button("Rerun")
|