import streamlit as st from datasets import load_dataset import csv import datetime as dt import random import os from huggingface_hub import Repository, HfApi HF_API_KEY = os.environ.get("HF_TOKEN", None) api = HfApi(token=HF_API_KEY) REPO_ID = "imomayiz/darija-english" DATASET_REPO_URL = f"https://huggingface.co/datasets/{REPO_ID}" SUBMISSIONS_DATA_FILE = os.path.join("submissions", "submissions.csv") submissions_repo = Repository( local_dir="submissions", clone_from=DATASET_REPO_URL, use_auth_token=HF_API_KEY ) def load_data(repo_id): dataset = load_dataset(f'{repo_id}', name='sentences', split='sentences') return dataset def fetch_sentence(dataset, column_name="darija_ar"): # Get a random sentence random_sentence_index = random.randint(0, len(dataset) - 1) random_sentence = dataset[random_sentence_index][column_name] return random_sentence def store_submission(sentence: str, translation: str, translation_fr: str): if sentence and (translation or translation_fr): with open(SUBMISSIONS_DATA_FILE, "a") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=["darija", "eng", "darija_ar", "time"]) writer.writerow( {"darija_ar": sentence, "eng": translation, "darija": translation_fr, "time": str(dt.datetime.now())} ) api.upload_file( path_or_fileobj=SUBMISSIONS_DATA_FILE, path_in_repo=SUBMISSIONS_DATA_FILE, repo_id=REPO_ID, repo_type="dataset", commit_message="New submission", ) st.success(f"Submitted at {DATASET_REPO_URL}/{SUBMISSIONS_DATA_FILE}") # Load the dataset dataset = load_data(REPO_ID) def main(): if "sentence" not in st.session_state: st.session_state.sentence = fetch_sentence(dataset) if 'translation_input' not in st.session_state: st.session_state.translation_input = "" if 'translation_input_fr' not in st.session_state: st.session_state.translation_input_fr = "" if 'display_new' not in st.session_state: st.session_state.display_new = False st.title("Translate From Arabic to English") st.markdown( """This mini-app allows you to contribute to the **darija-english** dataset as part of [DODa](https://darija-open-dataset.github.io/) project. To contribute, simply translate the given sentence from Arabic to English. The translated sentence will be submitted to the dataset [here](https://huggingface.co/datasets/imomayiz/darija-english).""" ) st.text("") st.write(f"""
{st.session_state.sentence}.