File size: 1,835 Bytes
d9514f5 c437348 d9514f5 c437348 d9514f5 c437348 d9514f5 c437348 d9514f5 c437348 d9514f5 c437348 d9514f5 c437348 d9514f5 28c72ae d8ad733 d9514f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import streamlit as st
import datetime as dt
import random
import json
import os
from huggingface_hub import CommitScheduler
from datasets import load_dataset
import uuid
REPO_ID = "imomayiz/darija-english"
DATASET_REPO_URL = f"https://huggingface.co/datasets/{REPO_ID}"
submissions_folder = "submissions"
submissions_file = os.path.join(submissions_folder, f"submissions_{uuid.uuid4()}.json")
def load_data(repo_id):
dataset = load_dataset(f'{repo_id}', name='sentences', split='sentences')
return dataset
def fetch_sentence(dataset, column_name="darija_ar"):
# Get a random sentence
random_sentence_index = random.randint(0, len(dataset) - 1)
random_sentence = dataset[random_sentence_index][column_name]
st.session_state.sentence = random_sentence
st.session_state.translation_input = ""
st.session_state.translation_input_fr = ""
return random_sentence
def store_submission(
scheduler: CommitScheduler, sentence: str, translation: str, translation_fr: str
):
"""
Append input/outputs and user feedback to a JSON Lines file
using a thread lock to avoid concurrent writes from different users.
"""
ts = dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")
with scheduler.lock:
with open(submissions_file, "a") as f:
f.write(json.dumps({
"darija": translation_fr,
"eng": translation,
"darija_ar": sentence,
"timestamp": ts},
ensure_ascii=False
))
f.write("\n")
st.success(
f"""Translation submitted successfully.
You will see your commit in 1 minute at
{DATASET_REPO_URL}/tree/main/{submissions_folder}.
You can submit another translation or check the dataset."""
) |