File size: 4,194 Bytes
9481a42
 
 
 
 
 
100b4b0
9481a42
 
 
 
c5139c6
 
9481a42
 
 
 
 
f249f4d
9481a42
 
 
 
 
 
 
 
0ecaa8b
 
 
 
9481a42
 
ada0440
100b4b0
9481a42
f044621
100b4b0
 
 
 
 
 
 
2ce4d20
665c783
2ce4d20
100b4b0
 
 
c2dd38c
 
 
 
100b4b0
 
 
 
9481a42
 
 
 
 
 
5ef45dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9481a42
 
5ef45dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import streamlit as st
from datasets import load_dataset
import csv
import datetime as dt
import random
import os
from huggingface_hub import HfApi


HF_API_KEY = os.environ.get("HF_TOKEN", None)

api = HfApi(token=HF_API_KEY)

REPO_ID = "imomayiz/darija-english"
DATASET_REPO_URL = f"https://huggingface.co/datasets/{REPO_ID}"


def load_data(repo_id):
    dataset = load_dataset(f'{repo_id}', name='sentences', split='sentences')
    return dataset

def fetch_sentence(dataset, column_name="darija_ar"):
    
    # Get a random sentence
    random_sentence_index = random.randint(0, len(dataset) - 1)
    random_sentence = dataset[random_sentence_index][column_name]

    st.session_state.sentence = random_sentence
    st.session_state.translation_input = ""
    st.session_state.translation_input_fr = ""

    return random_sentence

def store_submission(api: HfApi, sentence: str, translation: str, translation_fr: str):

    if sentence and (translation or translation_fr):

        ts = dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")
        folder_path = "submissions"
        os.makedirs(folder_path, exist_ok=True)
        filename = os.path.join(folder_path, f"submissions_{ts}.txt")

        with open(filename, "w", encoding="utf-8") as f:
            f.write(f"darija,eng,darija_ar\n{sentence},{translation},{translation_fr}")

        print(REPO_ID)
        print(filename)
        api.upload_folder(
                folder_path=folder_path,
                path_in_repo=folder_path,
                repo_id=REPO_ID,
                repo_type="dataset",
                commit_message="New submission",
            )
        st.success(
            f"""Translation submitted successfully to 
            {DATASET_REPO_URL}/tree/main/{folder_path}"""
            )


# Load the dataset
dataset = load_data(REPO_ID)


if "sentence" not in st.session_state:
    st.session_state.sentence = fetch_sentence(dataset) 
if 'translation_input' not in st.session_state:
    st.session_state.translation_input = ""
if 'translation_input_fr' not in st.session_state:
    st.session_state.translation_input_fr = ""
if 'display_new' not in st.session_state:
    st.session_state.display_new = False

st.title("Translate From Arabic to English")

st.markdown(
"""This mini-app allows you to contribute to the **darija-english** dataset 
as part of [DODa](https://darija-open-dataset.github.io/)
project. To contribute, simply translate the given sentence from Arabic to English.
The translated sentence will be submitted to the dataset 
[here](https://huggingface.co/datasets/imomayiz/darija-english)."""
)

st.text("")

st.write(f"""
    <div style="
        padding: 5px;
        border: 1px solid #000000;
        border-radius: 5px;
    ">
        <p style="font-size: 20px;">{st.session_state.sentence}.</p>
    </div>""", unsafe_allow_html=True)
    

# Display new sentence button
st.session_state.display_new = st.button("New Sentence",
                                            on_click=fetch_sentence,
                                            args=(dataset,))


# Input field for translation
translation_input_placeholder = st.empty()

with translation_input_placeholder.container():
    translation_input = st.text_input("Enter translation to english: ",
                                        st.session_state.translation_input)
    st.session_state.translation_input = translation_input

# Input field for translation
translation_input_placeholder_fr = st.empty()

with translation_input_placeholder_fr.container():
    translation_input_fr = st.text_input(
        "Enter translation to darija in latin characters: ",
        st.session_state.translation_input_fr
        )
    st.session_state.translation_input_fr = translation_input_fr

# Submit button
if st.button("Submit Translation"):
    if not translation_input and translation_input_fr:
        st.warning("Please enter a translation before submitting.")
    else:
        store_submission(api,
                            st.session_state.sentence, 
                            st.session_state.translation_input,
                            st.session_state.translation_input_fr
                            )