iamtatsuki05's picture
Synced repo using 'sync_with_huggingface' Github Action
b86e5c3 verified
import shutil
import tempfile
from io import BytesIO
from pathlib import Path
import streamlit as st
from ocr_and_translate_en2jp.genarate_tranrate_df import df_generator
st.title('OCR and Translation App')
uploaded_file = st.file_uploader('Choose a file')
if uploaded_file is not None:
with tempfile.NamedTemporaryFile(
delete=False, suffix=Path(uploaded_file.name).suffix
) as tmpfile:
shutil.copyfileobj(uploaded_file, tmpfile)
uploaded_file_path = tmpfile.name
max_words = st.number_input(
'Maximum number of words to process', min_value=1, value=50
)
do_shuffle = st.checkbox('Shuffle output')
seed = (
st.number_input('Seed for shuffling', min_value=0, value=42)
if do_shuffle
else None
)
do_clean_noise_data = st.checkbox('Clean noise data', value=True)
if st.button('Process'):
df = df_generator(
uploaded_file_path, max_words, do_shuffle, seed, do_clean_noise_data
)
st.dataframe(df)
csv = df.to_csv(index=False).encode('utf-8')
st.download_button(
label='Download data as CSV',
data=csv,
file_name='processed_data.csv',
mime='text/csv',
)
# REF: https://qiita.com/nyakiri_0726/items/2ae8cfb926c48072b190
df.to_excel(buf := BytesIO(), index=False)
st.download_button(
label="Download data as Excel",
data=buf.getvalue(),
file_name='processed_data.xlsx',
mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
)