Spaces:

iamtatsuki05
/

ocr_and_translate_en2jp

Runtime error

Synced repo using 'sync_with_huggingface' Github Action

b86e5c3 verified 6 months ago

1.63 kB

	import shutil
	import tempfile
	from io import BytesIO
	from pathlib import Path

	import streamlit as st

	from ocr_and_translate_en2jp.genarate_tranrate_df import df_generator

	st.title('OCR and Translation App')

	uploaded_file = st.file_uploader('Choose a file')
	if uploaded_file is not None:
	with tempfile.NamedTemporaryFile(
	delete=False, suffix=Path(uploaded_file.name).suffix
	) as tmpfile:
	shutil.copyfileobj(uploaded_file, tmpfile)
	uploaded_file_path = tmpfile.name

	max_words = st.number_input(
	'Maximum number of words to process', min_value=1, value=50
	)
	do_shuffle = st.checkbox('Shuffle output')
	seed = (
	st.number_input('Seed for shuffling', min_value=0, value=42)
	if do_shuffle
	else None
	)
	do_clean_noise_data = st.checkbox('Clean noise data', value=True)

	if st.button('Process'):
	df = df_generator(
	uploaded_file_path, max_words, do_shuffle, seed, do_clean_noise_data
	)
	st.dataframe(df)

	csv = df.to_csv(index=False).encode('utf-8')
	st.download_button(
	label='Download data as CSV',
	data=csv,
	file_name='processed_data.csv',
	mime='text/csv',
	)

	# REF: https://qiita.com/nyakiri_0726/items/2ae8cfb926c48072b190
	df.to_excel(buf := BytesIO(), index=False)

	st.download_button(
	label="Download data as Excel",
	data=buf.getvalue(),
	file_name='processed_data.xlsx',
	mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
	)