DODa / src /components.py
Imane Momayiz
fix: ar encoding
d8ad733
raw
history blame
No virus
1.84 kB
import streamlit as st
import datetime as dt
import random
import json
import os
from huggingface_hub import CommitScheduler
from datasets import load_dataset
import uuid
REPO_ID = "imomayiz/darija-english"
DATASET_REPO_URL = f"https://huggingface.co/datasets/{REPO_ID}"
submissions_folder = "submissions"
submissions_file = os.path.join(submissions_folder, f"submissions_{uuid.uuid4()}.json")
def load_data(repo_id):
dataset = load_dataset(f'{repo_id}', name='sentences', split='sentences')
return dataset
def fetch_sentence(dataset, column_name="darija_ar"):
# Get a random sentence
random_sentence_index = random.randint(0, len(dataset) - 1)
random_sentence = dataset[random_sentence_index][column_name]
st.session_state.sentence = random_sentence
st.session_state.translation_input = ""
st.session_state.translation_input_fr = ""
return random_sentence
def store_submission(
scheduler: CommitScheduler, sentence: str, translation: str, translation_fr: str
):
"""
Append input/outputs and user feedback to a JSON Lines file
using a thread lock to avoid concurrent writes from different users.
"""
ts = dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")
with scheduler.lock:
with open(submissions_file, "a") as f:
f.write(json.dumps({
"darija": translation_fr,
"eng": translation,
"darija_ar": sentence,
"timestamp": ts},
ensure_ascii=False
))
f.write("\n")
st.success(
f"""Translation submitted successfully.
You will see your commit in 1 minute at
{DATASET_REPO_URL}/tree/main/{submissions_folder}.
You can submit another translation or check the dataset."""
)