Spaces:
Running
Running
File size: 9,286 Bytes
691ed18 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
# JBHF/VERTAAL-APP-EAGLE-SHELTER/app.py - 08-04-2024, 19u00m
# WERKT AL: DE OPGENOMEN AUDIO MBV DEZE APP, audio.wav, HOEFT NIET PERSÉ GEPERSISTEERD TE WORDEN !!!!!!
# https://github.com/theevann/streamlit-audiorecorder
# An audio Recorder for streamlit
#
# Description
# Audio recorder component for streamlit.
# It creates a button to start the recording and takes three arguments:
# the start button text, the stop button text, and the pause button text.
# If the pause button text is not specified, the pause button is not displayed.
#
# Parameters
# The signature of the component is:
# audiorecorder(start_prompt="Start recording", stop_prompt="Stop recording", pause_prompt="", key=None):
# The prompt parameters are self-explanatory, and the optional key parameter is used internally by streamlit
# to properly distinguish multiple audiorecorders on the page.
#
# Return value
# The component's return value is a pydub AudioSegment.
#
# All AudioSegment methods are available, in particular you can:
# - Play the audio in the frontend with st.audio(audio.export().read())
# - Save the audio to a file with audio.export("audio.wav", format="wav")
# JB: Waarom zie ik in mijn HF Spaces omgeving de file "audio.wav" niet terug ?
# JB: 08-04-2024 - Mogelijk is caching al voldoende (anders file persistence)#
# Zie hiervoor:
#
# CACHING:
# ========
# STREAMLIT - Caching overview - Streamlit Docs - 07-04-2024 !!!!!
# https://docs.streamlit.io/develop/concepts/architecture/caching
#
# EVERNOTE :
# https://www.evernote.com/shard/s313/nl/41973486/31880952-8bd9-41ef-8047-ca844143e833/
# STREAMLIT - Caching overview - Streamlit Docs - 07-04-2024 !!!!!
#
# 08-04-2024
#
# EN
#
# PERSISTENCE:
# ============
# HF SPACES STREAMLIT APPS - GET PASSWORDS AND ACCESS TOKENS FROM HF ENVIRONMENT ! - PERSISTENT STORAGE ON HF SPACES ! - EAGLE SHELTER VERTAAL APP ETC ! - app.py · julien-c/persistent-data at main - 20-03-2024 !!!!! !!!!! !!!!!
# https://huggingface.co/spaces/julien-c/persistent-data/blob/main/app.py
#
# ——->
#
# DUPLICATED TO:
# https://huggingface.co/spaces/JBHF/persistent-data?logs=container
#
# EVERNOTE :
# https://www.evernote.com/shard/s313/nl/41973486/1b07098e-3376-4316-abb3-b3d0996ebf03/
# HF SPACES STREAMLIT APPS - GET PASSWORDS AND ACCESS TOKENS FROM HF ENVIRONMENT ! - PERSISTENT STORAGE ON HF SPACES ! - EAGLE SHELTER VERTAAL APP ETC ! - app.py · julien-c/persistent-data at main - 20-03-2024 !!!!! !!!!! !!!!!
#
# 08-04-2024
#
###########################################################################################################
#
# Installation:
# pip install streamlit-audiorecorder
# Note: This package uses ffmpeg, so it should be installed for this audiorecorder to work properly.
#
# On ubuntu/debian: sudo apt update && sudo apt install ffmpeg
# On mac: brew install ffmpeg
import streamlit as st
from audiorecorder import audiorecorder
st.title("Audio Recorder")
# audiorecorder(start_prompt="Start recording", stop_prompt="Stop recording", pause_prompt="", key=None):
audio = audiorecorder("Click to record", "Click to stop recording", "Click to pause recording")
# JB:
# https://docs.streamlit.io/develop/concepts/architecture/caching
# @st.cache_data
@st.cache_resource # 👈 Add the caching decorator
def audio_export(audio_wav_file, format):
# audio.export("audio.wav", format="wav") # ORIGINAL
audio.export(audio_wav_file, format=format)
if len(audio) > 0:
# To play audio in frontend:
st.audio(audio.export().read())
# To save audio to a file, use pydub export method:
# https://docs.streamlit.io/develop/concepts/architecture/caching
# @st.cache_data
# @st.cache_data
# audio.export("audio.wav", format="wav") # ORIGINAL
audio_export("audio.wav", format="wav") # JB 08-04-2024
# To get audio properties, use pydub AudioSegment properties:
st.write(f"Frame rate: {audio.frame_rate}, Frame width: {audio.frame_width}, Duration: {audio.duration_seconds} seconds")
st.button("Rerun")
###########################################################################################################
###########################################################################################################
# TEST
# ZIE:
# infer_faster_whisper_large_v2 (CPU VERSIE !) 08-04-2024-COLAB-CPU-PYTHON3-tvscitechtalk.ipynb
# https://colab.research.google.com/drive/1EreiFx825oIrR2P43XSXjHXx01EWi6ZH#scrollTo=vuLjbPxexPDj&uniqifier=5
from faster_whisper import WhisperModel
model_size = "large-v2"
# Run on GPU with FP16
# model = WhisperModel(model_size, device="cuda", compute_type="float16") # ORIGINAL, DRAAIT OP COLAB T4 GPU OK
# TEST: Run on CPU
# model = WhisperModel(model_size, device="cpu", compute_type="float16") # JB, DRAAIT OP COLAB CPU OK ?
# ValueError: Requested float16 compute type, but the target device or backend do not support efficient float16 computation.
#
# st.write("Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")")
# model = WhisperModel(model_size, device="cpu") # , compute_type="float16") # JB, DRAAIT OP COLAB CPU OK: JA; HF SPACES STREAMLIT FREE TIER: JB OK !
# JB: Dit gebruikt mijn HF Token !
# st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")")
st.write("Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\")")
model = WhisperModel(model_size, device="cpu", compute_type="int8") # , compute_type="float16") # JB
# JB: Dit gebruikt mijn HF Token !
# st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")")
# LOADING OF model = WhisperModel(model_size, device="cpu") TAKES ABOUT 1 MINUTE ON HF SPACES STREAMLIT FREE TIER
#
st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\")")
# LOADING OF model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\") TAKES ABOUT 33 sec (Na RERUN 1 minute) ON HF SPACES STREAMLIT FREE TIER
# USING:
# model = WhisperModel(model_size, device="cpu", compute_type="int8") # JB
# segments, info = model.transcribe("sam_altman_lex_podcast_367.flac", beam_size=1)
# /content/Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3
# segments, info = model.transcribe("Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3", beam_size=1)
# TEST:
segments, info = model.transcribe("audio.wav", beam_size=1) # DIT WERKT: GEDURENDE DE SESSIE BLIJFT audio.wav FILE BESCHIKBAAR IN DEZE APP !!!!!
# print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
st.write("Detected language '%s' with probability %f" % (info.language, info.language_probability))
st.write("")
st.write("info.all_language_probs : ", info.all_language_probs)
st.write("len(info.all_language_probs): ", len(info.all_language_probs))
# 99
st.write("")
st.write("info: ", info)
# Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3 :
st.write("info.duration: ", info.duration)
# 233.8249375
# time: 3.98 ms (started: 2024-03-15 10:55:15 +00:00)
# minutes = int(info.duration / 60)
# seconds = info.duration - minutes*60
minutes = int(info.duration / 60)
seconds = info.duration - minutes*60
st.write(minutes," minutes and ", seconds, " seconds")
text_to_transcribe = ""
for segment in segments:
# print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
st.write("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
text_to_transcribe = text_to_transcribe + " " + segment.text
st.write("---------------------------------------------------------------------")
#text_to_transcribe = ""
#st.write("TOTAL TEXT TO TRANSCRIBE:")
#for segment in segments:
# st.write(segment.text)
# text_to_transcribe = text_to_transcribe + " " + segment
# # print(segment)
st.write("text_to_transcribe: ", text_to_transcribe)
# DAADWERKELIJK MET MIC OPGENOMEN EN GETRANSCRIBEERD STUKJE OEKRAÍENSE TEKST TER TEST
# OM HIERONDER NAAR NEDERLANDS TE VERTALEN MBV LLM MIXTRAL-8x7b-GROQ! :
# text_to_transcribe:
# князем Данилом Романовичем біля Звенигорода і названий на честь його сина Лева Сьогодні Львів має площу 155 квадратних кілометрів з безліччю громадських будинків, кафе, магазинів
###########################################################################################################
# VERTALING
# DAADWERKELIJK MET MIC OPGENOMEN EN GETRANSCRIBEERD STUKJE OEKRAÍENSE TEKST TER TEST
# OM HIERONDER NAAR NEDERLANDS TE VERTALEN MBV LLM MIXTRAL-8x7b-GROQ! :
# text_to_transcribe:
# князем Данилом Романовичем біля Звенигорода і названий на честь його сина Лева Сьогодні Львів має площу 155 квадратних кілометрів з безліччю громадських будинків, кафе, магазинів
# ...
########################################################################################################### |