File size: 2,455 Bytes
8709338
b78a659
 
 
8709338
b78a659
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import streamlit as st
import random
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

st.title("Addition Equation Generator")

# Sidebar for user input
num_samples = st.sidebar.number_input("Number of Samples", min_value=100, max_value=100000, value=5000)
max_num = st.sidebar.slider("Maximum Number for Addition", min_value=10, max_value=100, value=99)

# Function to generate addition data
def generate_addition_data(num_samples, max_num, stop_token=';'):
    input_equations = []
    answers = []
    for _ in range(num_samples):
        a = random.randint(0, max_num)
        b = random.randint(0, max_num)
        input_eq = f"{a} + {b} ="
        answer = str(a + b) + stop_token
        input_equations.append(input_eq)
        answers.append(answer)
    return input_equations, answers

# Button to generate and process data
if st.button('Generate and Process Data'):
    input_equations, answers = generate_addition_data(num_samples, max_num)

    # Display some sample data
    st.write("Sample Generated Data:")
    for i in range(min(5, len(input_equations))):
        st.write(f"Input Equation: {input_equations[i]}")
        st.write(f"Answer: {answers[i]}")

    # Tokenization
    tokenizer = Tokenizer(char_level=True)
    tokenizer.fit_on_texts(input_equations + answers)
    input_sequences = tokenizer.texts_to_sequences(input_equations)
    answer_sequences = tokenizer.texts_to_sequences(answers)

    # Padding sequences
    max_len = max(max([len(seq) for seq in input_sequences]), max([len(seq) for seq in answer_sequences]))
    input_sequences_padded = pad_sequences(input_sequences, maxlen=max_len, padding='post')
    answer_sequences_padded = pad_sequences(answer_sequences, maxlen=max_len, padding='post')

    # Display tokenization and padding results
    st.write("Tokenization and Padding Results:")
    for i in range(min(5, len(input_equations))):
        st.write(f"Input Equation: {input_equations[i]}")
        st.write(f"Tokenized Input Sequence: {input_sequences[i]}")
        st.write(f"Padded Input Sequence: {input_sequences_padded[i]}")
        st.write(f"Answer: {answers[i]}")
        st.write(f"Tokenized Answer Sequence: {answer_sequences[i]}")
        st.write(f"Padded Answer Sequence: {answer_sequences_padded[i]}")

# Instruction to run the app
st.write("Run the app with `streamlit run <script_name>.py` in your terminal.")