PFA-Demo / app.py
wissemkarous's picture
init
4d4096e verified
raw
history blame
8.01 kB
# import streamlit as st
# import os
# from utils.demo import load_video, ctc_decode
# from utils.two_stream_infer import load_model
# import os
# from scripts.extract_lip_coordinates import generate_lip_coordinates
# import options as opt
# st.set_page_config(layout="wide")
# model = load_model()
# st.title("Lipreading final year project Demo")
# st.info(
# "The inference speed is very slow on Huggingface spaces due to it being processed entirely on CPU ",
# icon="ℹ️",
# )
# # Generating a list of options or videos
# options = os.listdir(os.path.join("app_input"))
# selected_video = st.selectbox("Choose video", options)
# col1, col2 = st.columns(2)
# with col1:
# file_path = os.path.join("app_input", selected_video)
# video_name = selected_video.split(".")[0]
# os.system(f"ffmpeg -i {file_path} -vcodec libx264 {video_name}.mp4 -y")
# # Rendering inside of the app
# video = open(f"{video_name}.mp4", "rb")
# video_bytes = video.read()
# st.video(video_bytes)
# with col1, st.spinner("Splitting video into frames"):
# video, img_p, files = load_video(f"{video_name}.mp4", opt.device)
# prediction_video = video
# st.markdown(f"Frames Generated:\n{files}")
# frames_generated = True
# with col1, st.spinner("Generating Lip Landmark Coordinates"):
# coordinates = generate_lip_coordinates(f"{video_name}_samples")
# prediction_coordinates = coordinates
# st.markdown(f"Coordinates Generated:\n{coordinates}")
# coordinates_generated = True
# with col2:
# st.info("Ready to make prediction!")
# generate = st.button("Generate")
# if generate:
# with col2, st.spinner("Generating..."):
# y = model(
# prediction_video[None, ...].to(opt.device),
# prediction_coordinates[None, ...].to(opt.device),
# )
# txt = ctc_decode(y[0])
# st.text(txt[-1])
# st.info("Author ©️ : wissem karous ")
# st.info("Made with ❤️ ")
///////////////////
# import streamlit as st
# import os
# from utils.demo import load_video, ctc_decode
# from utils.two_stream_infer import load_model
# from scripts.extract_lip_coordinates import generate_lip_coordinates
# import options as opt
# st.set_page_config(layout="wide")
# model = load_model()
# st.title("Lipreading final year project Demo")
# st.info(
# "The inference speed is very slow on Huggingface spaces due to it being processed entirely on CPU ",
# icon="ℹ️",
# )
# # Generating a list of options or videos
# options = sorted(os.listdir(os.path.join("app_input"))) # Ensure the list is sorted
# selected_video = st.selectbox("Choose video", options)
# # Find the index of the selected video and calculate the index of the next video
# selected_index = options.index(selected_video)
# next_video_index = (selected_index + 1) % len(options) # Ensures looping back to start
# next_video = options[next_video_index]
# col1, col2 = st.columns(2)
# # Function to display video in a column
# def display_video(column, video_path, video_name):
# os.system(f"ffmpeg -i {video_path} -vcodec libx264 {video_name}.mp4 -y")
# video = open(f"{video_name}.mp4", "rb")
# video_bytes = video.read()
# column.video(video_bytes)
# # Displaying the selected video in the first column
# with col1:
# file_path = os.path.join("app_input", selected_video)
# video_name = selected_video.split(".")[0]
# display_video(col1, file_path, video_name)
# # Displaying the next video in the second column
# with col2:
# st.info("Expected Result !")
# next_file_path = os.path.join("app_input", next_video)
# next_video_name = next_video.split(".")[0]
# display_video(col2, next_file_path, next_video_name)
# # Assuming further processing (like generating predictions) is only intended for the first (selected) video
# with col1, st.spinner("Processing video..."):
# video, img_p, files = load_video(f"{video_name}.mp4", opt.device)
# coordinates = generate_lip_coordinates(f"{video_name}_samples")
# # Assuming 'frames_generated' and 'coordinates_generated' are used for control flow or further processing
# frames_generated = True
# coordinates_generated = True
# if frames_generated and coordinates_generated:
# st.markdown(f"Frames Generated for {video_name}:\n{files}")
# st.markdown(f"Coordinates Generated for {video_name}:\n{coordinates}")
# with col2:
# st.info("Ready to make prediction!")
# generate = st.button("Generate")
# if generate:
# with st.spinner("Generating..."):
# y = model(
# video[None, ...].to(opt.device),
# coordinates[None, ...].to(opt.device),
# )
# txt = ctc_decode(y[0])
# st.text(txt[-1])
# st.info("Author ©️ : Wissem Karous ")
# st.info("Made with ❤️")
/////////////////////
import streamlit as st
import os
import cv2
from utils.demo import load_video, ctc_decode
from utils.two_stream_infer import load_model
from scripts.extract_lip_coordinates import generate_lip_coordinates
import options as opt
st.set_page_config(layout="wide")
model = load_model()
st.title("Lipreading Final Year Project Demo")
st.info(
"The inference speed is very slow on Huggingface spaces due to it being processed entirely on CPU ",
icon="ℹ️",
)
# Generating a list of options or videos
options = sorted(os.listdir(os.path.join("app_input"))) # Ensure the list is sorted
selected_video = st.selectbox("Choose video", options)
# Find the index of the selected video and calculate the index of the next video
selected_index = options.index(selected_video)
next_video_index = (selected_index + 1) % len(options) # Ensures looping back to start
next_video = options[next_video_index]
col1, col2 = st.columns(2)
# Function to display video in a column with resizing
def display_video(column, video_path, video_name):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
new_width = 320 # Adjust this value for desired width
new_height = int((new_width / width) * height)
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame = cv2.resize(frame, (new_width, new_height))
column.image(frame, channels="BGR")
cap.release()
# Displaying the selected video in the first column
with col1:
file_path = os.path.join("app_input", selected_video)
video_name = selected_video.split(".")[0]
display_video(col1, file_path, video_name)
# Displaying the next video in the second column
with col2:
st.info("Expected Result !")
next_file_path = os.path.join("app_input", next_video)
next_video_name = next_video.split(".")[0]
display_video(col2, next_file_path, next_video_name)
# Assuming further processing (like generating predictions) is only intended for the first (selected) video
with col1, st.spinner("Processing video..."):
video, img_p, files = load_video(f"{video_name}.mp4", opt.device)
coordinates = generate_lip_coordinates(f"{video_name}_samples")
# Assuming 'frames_generated' and 'coordinates_generated' are used for control flow or further processing
frames_generated = True
coordinates_generated = True
if frames_generated and coordinates_generated:
st.markdown(f"Frames Generated for {video_name}:\n{files}")
st.markdown(f"Coordinates Generated for {video_name}:\n{coordinates}")
with col2:
st.info("Ready to make prediction!")
generate = st.button("Generate")
if generate:
with st.spinner("Generating..."):
y = model(
video[None, ...].to(opt.device),
coordinates[None, ...].to(opt.device),
)
txt = ctc_decode(y[0])
st.text(txt[-1])
st.info("Author ©️ : Wissem Karous ")
st.info("Made with ❤️")