Circhastic commited on
Commit
9ac8247
·
1 Parent(s): 00e9766

added classifier implementation and associated files

Browse files
Files changed (3) hide show
  1. app.py +45 -4
  2. cr_tokenizer.json +0 -0
  3. requirements.txt +3 -0
app.py CHANGED
@@ -1,7 +1,48 @@
1
  import gradio as gr
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import numpy as np
3
+ import tensorflow as tf
4
+ from tokenizers import Tokenizer
5
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
6
 
7
+ # Load trained tokenizer and model
8
+ tokenizer = Tokenizer.from_file("cr_tokenizer.json")
9
+ model = tf.keras.models.load_model("crv3.keras")
10
 
11
+ # Tokenization function
12
+ def tokenize_java_code(code: str, max_length=100):
13
+ """Tokenizes and pads Java code for model input."""
14
+ encoded = tokenizer.encode(code).ids
15
+ padded_sequence = pad_sequences([encoded], maxlen=max_length, padding="post")[0]
16
+ return np.array(padded_sequence).reshape(1, -1) # Ensure correct shape for model
17
+
18
+ # Prediction function
19
+ def classify_code(input_text, input_file):
20
+ """Classifies Java code readability based on user input."""
21
+ # Load Java file if provided
22
+ if input_file is not None:
23
+ code = input_file.read().decode("utf-8") # Read Java file as text
24
+ else:
25
+ code = input_text # Use text input
26
+
27
+ if not code.strip(): # Ensure input is not empty
28
+ return "Please provide a Java code snippet."
29
+
30
+ # Tokenize and predict
31
+ tokenized_code = tokenize_java_code(code)
32
+ prediction = model.predict(tokenized_code)[0][0]
33
+
34
+ # Convert to readable/unreadable
35
+ return "Readable" if prediction > 0.5 else "Unreadable"
36
+
37
+ # Create Gradio interface
38
+ gr.Interface(
39
+ fn=classify_code,
40
+ inputs=[
41
+ gr.Textbox(lines=10, placeholder="Paste Java code here...", label="Java Code Snippet"),
42
+ gr.File(type="binary", label="Upload Java File (.java)")
43
+ ],
44
+ outputs=gr.Text(label="Readability Prediction"),
45
+ title="Java Code Readability Classifier",
46
+ description="Upload a Java file or paste a Java code snippet to check if it's readable or unreadable.",
47
+ allow_flagging="never"
48
+ ).launch()
cr_tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ tensorflow
3
+ tokenizers