Commit
·
9ac8247
1
Parent(s):
00e9766
added classifier implementation and associated files
Browse files- app.py +45 -4
- cr_tokenizer.json +0 -0
- requirements.txt +3 -0
app.py
CHANGED
@@ -1,7 +1,48 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
+
import tensorflow as tf
|
4 |
+
from tokenizers import Tokenizer
|
5 |
+
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
6 |
|
7 |
+
# Load trained tokenizer and model
|
8 |
+
tokenizer = Tokenizer.from_file("cr_tokenizer.json")
|
9 |
+
model = tf.keras.models.load_model("crv3.keras")
|
10 |
|
11 |
+
# Tokenization function
|
12 |
+
def tokenize_java_code(code: str, max_length=100):
|
13 |
+
"""Tokenizes and pads Java code for model input."""
|
14 |
+
encoded = tokenizer.encode(code).ids
|
15 |
+
padded_sequence = pad_sequences([encoded], maxlen=max_length, padding="post")[0]
|
16 |
+
return np.array(padded_sequence).reshape(1, -1) # Ensure correct shape for model
|
17 |
+
|
18 |
+
# Prediction function
|
19 |
+
def classify_code(input_text, input_file):
|
20 |
+
"""Classifies Java code readability based on user input."""
|
21 |
+
# Load Java file if provided
|
22 |
+
if input_file is not None:
|
23 |
+
code = input_file.read().decode("utf-8") # Read Java file as text
|
24 |
+
else:
|
25 |
+
code = input_text # Use text input
|
26 |
+
|
27 |
+
if not code.strip(): # Ensure input is not empty
|
28 |
+
return "Please provide a Java code snippet."
|
29 |
+
|
30 |
+
# Tokenize and predict
|
31 |
+
tokenized_code = tokenize_java_code(code)
|
32 |
+
prediction = model.predict(tokenized_code)[0][0]
|
33 |
+
|
34 |
+
# Convert to readable/unreadable
|
35 |
+
return "Readable" if prediction > 0.5 else "Unreadable"
|
36 |
+
|
37 |
+
# Create Gradio interface
|
38 |
+
gr.Interface(
|
39 |
+
fn=classify_code,
|
40 |
+
inputs=[
|
41 |
+
gr.Textbox(lines=10, placeholder="Paste Java code here...", label="Java Code Snippet"),
|
42 |
+
gr.File(type="binary", label="Upload Java File (.java)")
|
43 |
+
],
|
44 |
+
outputs=gr.Text(label="Readability Prediction"),
|
45 |
+
title="Java Code Readability Classifier",
|
46 |
+
description="Upload a Java file or paste a Java code snippet to check if it's readable or unreadable.",
|
47 |
+
allow_flagging="never"
|
48 |
+
).launch()
|
cr_tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
tensorflow
|
3 |
+
tokenizers
|