Spaces:

JBHF
/

VERTAAL-APP-EAGLE-SHELTER

Running

File size: 9,286 Bytes

691ed18

# JBHF/VERTAAL-APP-EAGLE-SHELTER/app.py - 08-04-2024, 19u00m 
# WERKT AL: DE OPGENOMEN AUDIO MBV DEZE APP, audio.wav, HOEFT NIET PERSÉ GEPERSISTEERD TE WORDEN !!!!!!

# https://github.com/theevann/streamlit-audiorecorder
# An audio Recorder for streamlit
#
# Description
# Audio recorder component for streamlit.
# It creates a button to start the recording and takes three arguments:
# the start button text, the stop button text, and the pause button text.
# If the pause button text is not specified, the pause button is not displayed.
#
# Parameters
# The signature of the component is:
# audiorecorder(start_prompt="Start recording", stop_prompt="Stop recording", pause_prompt="", key=None):
# The prompt parameters are self-explanatory, and the optional key parameter is used internally by streamlit 
# to properly distinguish multiple audiorecorders on the page.
#
# Return value
# The component's return value is a pydub AudioSegment.
#
# All AudioSegment methods are available, in particular you can:
# - Play the audio in the frontend with st.audio(audio.export().read())
# - Save the audio to a file with audio.export("audio.wav", format="wav")
#   JB: Waarom zie ik in mijn HF Spaces omgeving de file "audio.wav" niet terug ?
#   JB: 08-04-2024 - Mogelijk is caching al voldoende (anders file persistence)#
#                    Zie hiervoor:
#
# CACHING:
# ========
# STREAMLIT - Caching overview - Streamlit Docs - 07-04-2024 !!!!!
# https://docs.streamlit.io/develop/concepts/architecture/caching
#
# EVERNOTE :
# https://www.evernote.com/shard/s313/nl/41973486/31880952-8bd9-41ef-8047-ca844143e833/
# STREAMLIT - Caching overview - Streamlit Docs - 07-04-2024 !!!!!
#
# 08-04-2024
#
# EN
#
# PERSISTENCE:
# ============
# HF SPACES STREAMLIT APPS - GET PASSWORDS AND ACCESS TOKENS FROM HF ENVIRONMENT ! - PERSISTENT STORAGE ON HF SPACES ! - EAGLE SHELTER VERTAAL APP ETC ! - app.py · julien-c/persistent-data at main - 20-03-2024 !!!!! !!!!! !!!!!
# https://huggingface.co/spaces/julien-c/persistent-data/blob/main/app.py
#
# ——->
#
# DUPLICATED TO:
# https://huggingface.co/spaces/JBHF/persistent-data?logs=container
#
# EVERNOTE :
# https://www.evernote.com/shard/s313/nl/41973486/1b07098e-3376-4316-abb3-b3d0996ebf03/
# HF SPACES STREAMLIT APPS - GET PASSWORDS AND ACCESS TOKENS FROM HF ENVIRONMENT ! - PERSISTENT STORAGE ON HF SPACES ! - EAGLE SHELTER VERTAAL APP ETC ! - app.py · julien-c/persistent-data at main - 20-03-2024 !!!!! !!!!! !!!!!
#
# 08-04-2024
#


###########################################################################################################
#
# Installation:
# pip install streamlit-audiorecorder
# Note: This package uses ffmpeg, so it should be installed for this audiorecorder to work properly.
#
# On ubuntu/debian: sudo apt update && sudo apt install ffmpeg
# On mac: brew install ffmpeg

import streamlit as st
from audiorecorder import audiorecorder

st.title("Audio Recorder")
# audiorecorder(start_prompt="Start recording", stop_prompt="Stop recording", pause_prompt="", key=None):
audio = audiorecorder("Click to record", "Click to stop recording", "Click to pause recording")


# JB:
# https://docs.streamlit.io/develop/concepts/architecture/caching
# @st.cache_data
@st.cache_resource  # 👈 Add the caching decorator
def audio_export(audio_wav_file, format):
    # audio.export("audio.wav", format="wav") # ORIGINAL
    audio.export(audio_wav_file, format=format)

if len(audio) > 0:
    # To play audio in frontend:
    st.audio(audio.export().read())  

    # To save audio to a file, use pydub export method:
    # https://docs.streamlit.io/develop/concepts/architecture/caching
    # @st.cache_data
    # @st.cache_data
    # audio.export("audio.wav", format="wav") # ORIGINAL
    audio_export("audio.wav", format="wav")   # JB 08-04-2024

    # To get audio properties, use pydub AudioSegment properties:
    st.write(f"Frame rate: {audio.frame_rate}, Frame width: {audio.frame_width}, Duration: {audio.duration_seconds} seconds")


st.button("Rerun")
###########################################################################################################


###########################################################################################################
# TEST
# ZIE:
# infer_faster_whisper_large_v2 (CPU VERSIE !) 08-04-2024-COLAB-CPU-PYTHON3-tvscitechtalk.ipynb
# https://colab.research.google.com/drive/1EreiFx825oIrR2P43XSXjHXx01EWi6ZH#scrollTo=vuLjbPxexPDj&uniqifier=5

from faster_whisper import WhisperModel

model_size = "large-v2"

# Run on GPU with FP16
# model = WhisperModel(model_size, device="cuda", compute_type="float16") # ORIGINAL, DRAAIT OP COLAB T4 GPU OK

# TEST: Run on CPU
# model = WhisperModel(model_size, device="cpu", compute_type="float16") # JB, DRAAIT OP COLAB CPU OK ?
# ValueError: Requested float16 compute type, but the target device or backend do not support efficient float16 computation.
#
# st.write("Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")")
# model = WhisperModel(model_size, device="cpu") # , compute_type="float16") # JB, DRAAIT OP COLAB CPU OK: JA; HF SPACES STREAMLIT FREE TIER: JB OK !
# JB: Dit gebruikt mijn HF Token !
# st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")")

st.write("Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\")")
model = WhisperModel(model_size, device="cpu", compute_type="int8") # , compute_type="float16") # JB
# JB: Dit gebruikt mijn HF Token !
# st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")")
# LOADING OF model = WhisperModel(model_size, device="cpu") TAKES ABOUT 1 MINUTE ON HF SPACES STREAMLIT FREE TIER
#
st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\")")
# LOADING OF model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\") TAKES ABOUT 33 sec (Na RERUN 1 minute) ON HF SPACES STREAMLIT FREE TIER


# USING:
# model = WhisperModel(model_size, device="cpu", compute_type="int8") # JB
# segments, info = model.transcribe("sam_altman_lex_podcast_367.flac", beam_size=1)

# /content/Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3
# segments, info = model.transcribe("Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3", beam_size=1)
# TEST:
segments, info = model.transcribe("audio.wav", beam_size=1) # DIT WERKT: GEDURENDE DE SESSIE BLIJFT audio.wav FILE BESCHIKBAAR IN DEZE APP !!!!!


# print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
st.write("Detected language '%s' with probability %f" % (info.language, info.language_probability))
st.write("")
st.write("info.all_language_probs     : ", info.all_language_probs)
st.write("len(info.all_language_probs): ", len(info.all_language_probs))
# 99

st.write("")

st.write("info: ", info)

# Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3 :
st.write("info.duration: ", info.duration)
# 233.8249375
# time: 3.98 ms (started: 2024-03-15 10:55:15 +00:00)
# minutes = int(info.duration / 60)
# seconds = info.duration - minutes*60
minutes = int(info.duration / 60)
seconds = info.duration - minutes*60

st.write(minutes," minutes and ", seconds, " seconds")


text_to_transcribe = ""
for segment in segments:
    # print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
    st.write("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
    text_to_transcribe = text_to_transcribe + " " + segment.text

st.write("---------------------------------------------------------------------")

#text_to_transcribe = ""
#st.write("TOTAL TEXT TO TRANSCRIBE:")
#for segment in segments:
#    st.write(segment.text)
#    text_to_transcribe = text_to_transcribe + " " + segment
#    # print(segment)

st.write("text_to_transcribe: ", text_to_transcribe)
# DAADWERKELIJK MET MIC OPGENOMEN EN GETRANSCRIBEERD STUKJE OEKRAÍENSE TEKST TER TEST 
# OM HIERONDER NAAR NEDERLANDS TE VERTALEN MBV LLM MIXTRAL-8x7b-GROQ! :
# text_to_transcribe: 
# князем Данилом Романовичем біля Звенигорода і названий на честь його сина Лева Сьогодні Львів має площу 155 квадратних кілометрів з безліччю громадських будинків, кафе, магазинів


###########################################################################################################
# VERTALING
# DAADWERKELIJK MET MIC OPGENOMEN EN GETRANSCRIBEERD STUKJE OEKRAÍENSE TEKST TER TEST 
# OM HIERONDER NAAR NEDERLANDS TE VERTALEN MBV LLM MIXTRAL-8x7b-GROQ! :
# text_to_transcribe: 
# князем Данилом Романовичем біля Звенигорода і названий на честь його сина Лева Сьогодні Львів має площу 155 квадратних кілометрів з безліччю громадських будинків, кафе, магазинів
# ...






###########################################################################################################