import streamlit as st from datasets import load_dataset import csv import datetime as dt import random import os from huggingface_hub import HfApi HF_API_KEY = os.environ.get("HF_TOKEN", None) api = HfApi(token=HF_API_KEY) REPO_ID = "imomayiz/darija-english" DATASET_REPO_URL = f"https://huggingface.co/datasets/{REPO_ID}" def load_data(repo_id): dataset = load_dataset(f'{repo_id}', name='sentences', split='sentences') return dataset def fetch_sentence(dataset, column_name="darija_ar"): # Get a random sentence random_sentence_index = random.randint(0, len(dataset) - 1) random_sentence = dataset[random_sentence_index][column_name] st.session_state.sentence = random_sentence st.session_state.translation_input = "" st.session_state.translation_input_fr = "" return random_sentence def store_submission(api: HfApi, sentence: str, translation: str, translation_fr: str): if sentence and (translation or translation_fr): ts = dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f") folder_path = "submissions" os.makedirs(folder_path, exist_ok=True) filename = os.path.join(folder_path, f"submissions_{ts}.txt") with open(filename, "w", encoding="utf-8") as f: f.write(f"darija,eng,darija_ar\n{sentence},{translation},{translation_fr}") print(REPO_ID) print(filename) api.upload_folder( folder_path=folder_path, path_in_repo=folder_path, repo_id=REPO_ID, repo_type="dataset", commit_message="New submission", ) st.success( f"""Translation submitted successfully to {DATASET_REPO_URL}/tree/main/{folder_path}""" ) # Load the dataset dataset = load_data(REPO_ID) if "sentence" not in st.session_state: st.session_state.sentence = fetch_sentence(dataset) if 'translation_input' not in st.session_state: st.session_state.translation_input = "" if 'translation_input_fr' not in st.session_state: st.session_state.translation_input_fr = "" if 'display_new' not in st.session_state: st.session_state.display_new = False st.title("Translate From Arabic to English") st.markdown( """This mini-app allows you to contribute to the **darija-english** dataset as part of [DODa](https://darija-open-dataset.github.io/) project. To contribute, simply translate the given sentence from Arabic to English. The translated sentence will be submitted to the dataset [here](https://huggingface.co/datasets/imomayiz/darija-english).""" ) st.text("") st.write(f"""

{st.session_state.sentence}.

""", unsafe_allow_html=True) # Display new sentence button st.session_state.display_new = st.button("New Sentence", on_click=fetch_sentence, args=(dataset,)) # Input field for translation translation_input_placeholder = st.empty() with translation_input_placeholder.container(): translation_input = st.text_input("Enter translation to english: ", st.session_state.translation_input) st.session_state.translation_input = translation_input # Input field for translation translation_input_placeholder_fr = st.empty() with translation_input_placeholder_fr.container(): translation_input_fr = st.text_input( "Enter translation to darija in latin characters: ", st.session_state.translation_input_fr ) st.session_state.translation_input_fr = translation_input_fr # Submit button if st.button("Submit Translation"): if not translation_input and translation_input_fr: st.warning("Please enter a translation before submitting.") else: store_submission(api, st.session_state.sentence, st.session_state.translation_input, st.session_state.translation_input_fr )