# JBHF/VERTAAL-APP-EAGLE-SHELTER/app.py - 08-04-2024, 19u00m # WERKT AL: DE OPGENOMEN AUDIO MBV DEZE APP, audio.wav, HOEFT NIET PERSÉ GEPERSISTEERD TE WORDEN !!!!!! # https://github.com/theevann/streamlit-audiorecorder # An audio Recorder for streamlit # # Description # Audio recorder component for streamlit. # It creates a button to start the recording and takes three arguments: # the start button text, the stop button text, and the pause button text. # If the pause button text is not specified, the pause button is not displayed. # # Parameters # The signature of the component is: # audiorecorder(start_prompt="Start recording", stop_prompt="Stop recording", pause_prompt="", key=None): # The prompt parameters are self-explanatory, and the optional key parameter is used internally by streamlit # to properly distinguish multiple audiorecorders on the page. # # Return value # The component's return value is a pydub AudioSegment. # # All AudioSegment methods are available, in particular you can: # - Play the audio in the frontend with st.audio(audio.export().read()) # - Save the audio to a file with audio.export("audio.wav", format="wav") # JB: Waarom zie ik in mijn HF Spaces omgeving de file "audio.wav" niet terug ? # JB: 08-04-2024 - Mogelijk is caching al voldoende (anders file persistence)# # Zie hiervoor: # # CACHING: # ======== # STREAMLIT - Caching overview - Streamlit Docs - 07-04-2024 !!!!! # https://docs.streamlit.io/develop/concepts/architecture/caching # # EVERNOTE : # https://www.evernote.com/shard/s313/nl/41973486/31880952-8bd9-41ef-8047-ca844143e833/ # STREAMLIT - Caching overview - Streamlit Docs - 07-04-2024 !!!!! # # 08-04-2024 # # EN # # PERSISTENCE: # ============ # HF SPACES STREAMLIT APPS - GET PASSWORDS AND ACCESS TOKENS FROM HF ENVIRONMENT ! - PERSISTENT STORAGE ON HF SPACES ! - EAGLE SHELTER VERTAAL APP ETC ! - app.py · julien-c/persistent-data at main - 20-03-2024 !!!!! !!!!! !!!!! # https://huggingface.co/spaces/julien-c/persistent-data/blob/main/app.py # # ——-> # # DUPLICATED TO: # https://huggingface.co/spaces/JBHF/persistent-data?logs=container # # EVERNOTE : # https://www.evernote.com/shard/s313/nl/41973486/1b07098e-3376-4316-abb3-b3d0996ebf03/ # HF SPACES STREAMLIT APPS - GET PASSWORDS AND ACCESS TOKENS FROM HF ENVIRONMENT ! - PERSISTENT STORAGE ON HF SPACES ! - EAGLE SHELTER VERTAAL APP ETC ! - app.py · julien-c/persistent-data at main - 20-03-2024 !!!!! !!!!! !!!!! # # 08-04-2024 # ########################################################################################################### # # Installation: # pip install streamlit-audiorecorder # Note: This package uses ffmpeg, so it should be installed for this audiorecorder to work properly. # # On ubuntu/debian: sudo apt update && sudo apt install ffmpeg # On mac: brew install ffmpeg import streamlit as st from audiorecorder import audiorecorder st.title("Audio Recorder") # audiorecorder(start_prompt="Start recording", stop_prompt="Stop recording", pause_prompt="", key=None): audio = audiorecorder("Click to record", "Click to stop recording", "Click to pause recording") # JB: # https://docs.streamlit.io/develop/concepts/architecture/caching # @st.cache_data @st.cache_resource # 👈 Add the caching decorator def audio_export(audio_wav_file, format): # audio.export("audio.wav", format="wav") # ORIGINAL audio.export(audio_wav_file, format=format) if len(audio) > 0: # To play audio in frontend: st.audio(audio.export().read()) # To save audio to a file, use pydub export method: # https://docs.streamlit.io/develop/concepts/architecture/caching # @st.cache_data # @st.cache_data # audio.export("audio.wav", format="wav") # ORIGINAL audio_export("audio.wav", format="wav") # JB 08-04-2024 # To get audio properties, use pydub AudioSegment properties: st.write(f"Frame rate: {audio.frame_rate}, Frame width: {audio.frame_width}, Duration: {audio.duration_seconds} seconds") st.button("Rerun") ########################################################################################################### ########################################################################################################### # TEST # ZIE: # infer_faster_whisper_large_v2 (CPU VERSIE !) 08-04-2024-COLAB-CPU-PYTHON3-tvscitechtalk.ipynb # https://colab.research.google.com/drive/1EreiFx825oIrR2P43XSXjHXx01EWi6ZH#scrollTo=vuLjbPxexPDj&uniqifier=5 from faster_whisper import WhisperModel model_size = "large-v2" # Run on GPU with FP16 # model = WhisperModel(model_size, device="cuda", compute_type="float16") # ORIGINAL, DRAAIT OP COLAB T4 GPU OK # TEST: Run on CPU # model = WhisperModel(model_size, device="cpu", compute_type="float16") # JB, DRAAIT OP COLAB CPU OK ? # ValueError: Requested float16 compute type, but the target device or backend do not support efficient float16 computation. # # st.write("Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")") # model = WhisperModel(model_size, device="cpu") # , compute_type="float16") # JB, DRAAIT OP COLAB CPU OK: JA; HF SPACES STREAMLIT FREE TIER: JB OK ! # JB: Dit gebruikt mijn HF Token ! # st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")") st.write("Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\")") model = WhisperModel(model_size, device="cpu", compute_type="int8") # , compute_type="float16") # JB # JB: Dit gebruikt mijn HF Token ! # st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")") # LOADING OF model = WhisperModel(model_size, device="cpu") TAKES ABOUT 1 MINUTE ON HF SPACES STREAMLIT FREE TIER # st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\")") # LOADING OF model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\") TAKES ABOUT 33 sec (Na RERUN 1 minute) ON HF SPACES STREAMLIT FREE TIER # USING: # model = WhisperModel(model_size, device="cpu", compute_type="int8") # JB # segments, info = model.transcribe("sam_altman_lex_podcast_367.flac", beam_size=1) # /content/Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3 # segments, info = model.transcribe("Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3", beam_size=1) # TEST: segments, info = model.transcribe("audio.wav", beam_size=1) # DIT WERKT: GEDURENDE DE SESSIE BLIJFT audio.wav FILE BESCHIKBAAR IN DEZE APP !!!!! # print("Detected language '%s' with probability %f" % (info.language, info.language_probability)) st.write("Detected language '%s' with probability %f" % (info.language, info.language_probability)) st.write("") st.write("info.all_language_probs : ", info.all_language_probs) st.write("len(info.all_language_probs): ", len(info.all_language_probs)) # 99 st.write("") st.write("info: ", info) # Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3 : st.write("info.duration: ", info.duration) # 233.8249375 # time: 3.98 ms (started: 2024-03-15 10:55:15 +00:00) # minutes = int(info.duration / 60) # seconds = info.duration - minutes*60 minutes = int(info.duration / 60) seconds = info.duration - minutes*60 st.write(minutes," minutes and ", seconds, " seconds") text_to_transcribe = "" for segment in segments: # print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)) st.write("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)) text_to_transcribe = text_to_transcribe + " " + segment.text st.write("---------------------------------------------------------------------") #text_to_transcribe = "" #st.write("TOTAL TEXT TO TRANSCRIBE:") #for segment in segments: # st.write(segment.text) # text_to_transcribe = text_to_transcribe + " " + segment # # print(segment) st.write("text_to_transcribe: ", text_to_transcribe) # DAADWERKELIJK MET MIC OPGENOMEN EN GETRANSCRIBEERD STUKJE OEKRAÍENSE TEKST TER TEST # OM HIERONDER NAAR NEDERLANDS TE VERTALEN MBV LLM MIXTRAL-8x7b-GROQ! : # text_to_transcribe: # князем Данилом Романовичем біля Звенигорода і названий на честь його сина Лева Сьогодні Львів має площу 155 квадратних кілометрів з безліччю громадських будинків, кафе, магазинів ########################################################################################################### # VERTALING # DAADWERKELIJK MET MIC OPGENOMEN EN GETRANSCRIBEERD STUKJE OEKRAÍENSE TEKST TER TEST # OM HIERONDER NAAR NEDERLANDS TE VERTALEN MBV LLM MIXTRAL-8x7b-GROQ! : # text_to_transcribe: # князем Данилом Романовичем біля Звенигорода і названий на честь його сина Лева Сьогодні Львів має площу 155 квадратних кілометрів з безліччю громадських будинків, кафе, магазинів # ... ###########################################################################################################