Spaces:
Sleeping
Sleeping
henryholloway
commited on
Commit
·
b78a659
1
Parent(s):
8709338
Porting over notebook file
Browse files
app.py
CHANGED
@@ -1,4 +1,57 @@
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
import random
|
3 |
+
from tensorflow.keras.preprocessing.text import Tokenizer
|
4 |
+
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
5 |
|
6 |
+
st.title("Addition Equation Generator")
|
7 |
+
|
8 |
+
# Sidebar for user input
|
9 |
+
num_samples = st.sidebar.number_input("Number of Samples", min_value=100, max_value=100000, value=5000)
|
10 |
+
max_num = st.sidebar.slider("Maximum Number for Addition", min_value=10, max_value=100, value=99)
|
11 |
+
|
12 |
+
# Function to generate addition data
|
13 |
+
def generate_addition_data(num_samples, max_num, stop_token=';'):
|
14 |
+
input_equations = []
|
15 |
+
answers = []
|
16 |
+
for _ in range(num_samples):
|
17 |
+
a = random.randint(0, max_num)
|
18 |
+
b = random.randint(0, max_num)
|
19 |
+
input_eq = f"{a} + {b} ="
|
20 |
+
answer = str(a + b) + stop_token
|
21 |
+
input_equations.append(input_eq)
|
22 |
+
answers.append(answer)
|
23 |
+
return input_equations, answers
|
24 |
+
|
25 |
+
# Button to generate and process data
|
26 |
+
if st.button('Generate and Process Data'):
|
27 |
+
input_equations, answers = generate_addition_data(num_samples, max_num)
|
28 |
+
|
29 |
+
# Display some sample data
|
30 |
+
st.write("Sample Generated Data:")
|
31 |
+
for i in range(min(5, len(input_equations))):
|
32 |
+
st.write(f"Input Equation: {input_equations[i]}")
|
33 |
+
st.write(f"Answer: {answers[i]}")
|
34 |
+
|
35 |
+
# Tokenization
|
36 |
+
tokenizer = Tokenizer(char_level=True)
|
37 |
+
tokenizer.fit_on_texts(input_equations + answers)
|
38 |
+
input_sequences = tokenizer.texts_to_sequences(input_equations)
|
39 |
+
answer_sequences = tokenizer.texts_to_sequences(answers)
|
40 |
+
|
41 |
+
# Padding sequences
|
42 |
+
max_len = max(max([len(seq) for seq in input_sequences]), max([len(seq) for seq in answer_sequences]))
|
43 |
+
input_sequences_padded = pad_sequences(input_sequences, maxlen=max_len, padding='post')
|
44 |
+
answer_sequences_padded = pad_sequences(answer_sequences, maxlen=max_len, padding='post')
|
45 |
+
|
46 |
+
# Display tokenization and padding results
|
47 |
+
st.write("Tokenization and Padding Results:")
|
48 |
+
for i in range(min(5, len(input_equations))):
|
49 |
+
st.write(f"Input Equation: {input_equations[i]}")
|
50 |
+
st.write(f"Tokenized Input Sequence: {input_sequences[i]}")
|
51 |
+
st.write(f"Padded Input Sequence: {input_sequences_padded[i]}")
|
52 |
+
st.write(f"Answer: {answers[i]}")
|
53 |
+
st.write(f"Tokenized Answer Sequence: {answer_sequences[i]}")
|
54 |
+
st.write(f"Padded Answer Sequence: {answer_sequences_padded[i]}")
|
55 |
+
|
56 |
+
# Instruction to run the app
|
57 |
+
st.write("Run the app with `streamlit run <script_name>.py` in your terminal.")
|