File size: 6,315 Bytes
dd4ac10
 
d9db28b
 
3116e29
dd4ac10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3116e29
d9db28b
dd4ac10
d9db28b
dd4ac10
d9db28b
 
dd4ac10
3116e29
d9db28b
3116e29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd4ac10
d9db28b
 
dd4ac10
d9db28b
3116e29
d9db28b
 
dd4ac10
 
d9db28b
3116e29
dd4ac10
 
 
 
 
 
 
d9db28b
dd4ac10
 
 
 
 
 
 
 
d9db28b
dd4ac10
 
 
d9db28b
dd4ac10
 
 
 
d9db28b
 
 
 
 
dd4ac10
 
 
 
 
 
 
 
 
d9db28b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import ast
import astor
import traceback

# Initialize Hugging Face model and tokenizer
MODEL_NAME = "microsoft/codebert-base"

# Load the pre-trained CodeBERT model for understanding code
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

# Helper function to analyze code
def analyze_code(code):
    # Split the code into manageable chunks
    max_length = 512
    lines = code.split("\n")
    chunks = ["\n".join(lines[i:i+max_length]) for i in range(0, len(lines), max_length)]
    
    results = []
    for chunk in chunks:
        tokenized_code = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=max_length)
        outputs = model(**tokenized_code)
        logits = outputs.logits
        results.append(logits.argmax(dim=1).item())
    
    return results

# Function to detect and fix bugs, including logical errors
def detect_and_fix_bugs(code):
    suggestions = []
    fixed_code = code

    try:
        tree = ast.parse(code)

        # Detect undefined variable usage
        for node in ast.walk(tree):
            if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load):
                if node.id not in {n.id for n in ast.walk(tree) if isinstance(n, ast.Name) and isinstance(n.ctx, ast.Store)}:
                    suggestions.append(f"Variable '{node.id}' is used but not defined.")

        undefined_variables = [
            node.id for node in ast.walk(tree)
            if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load) and
            node.id not in {n.id for n in ast.walk(tree) if isinstance(n, ast.Name) and isinstance(n.ctx, ast.Store)}
        ]
        for var in undefined_variables:
            fix_statement = ast.Assign(targets=[ast.Name(id=var, ctx=ast.Store())], value=ast.Constant(value=None))
            tree.body.insert(0, fix_statement)
            suggestions.append(f"Added a definition for variable '{var}'.")

        # Detect unused variables
        assigned_vars = {n.id for n in ast.walk(tree) if isinstance(n, ast.Name) and isinstance(n.ctx, ast.Store)}
        used_vars = {n.id for n in ast.walk(tree) if isinstance(n, ast.Name) and isinstance(n.ctx, ast.Load)}
        unused_vars = assigned_vars - used_vars
        for var in unused_vars:
            suggestions.append(f"Variable '{var}' is defined but never used.")

        # Detect missing imports
        import_names = {n.name for n in ast.walk(tree) if isinstance(n, ast.Import)}
        for node in ast.walk(tree):
            if isinstance(node, ast.Call) and hasattr(node.func, 'id') and node.func.id not in import_names:
                suggestions.append(f"Missing import for '{node.func.id}'.")

        # Detect invalid function calls
        for node in ast.walk(tree):
            if isinstance(node, ast.Call):
                if isinstance(node.func, ast.Name):
                    if not any(isinstance(n, ast.FunctionDef) and n.name == node.func.id for n in ast.walk(tree)):
                        suggestions.append(f"Function '{node.func.id}' is called but not defined.")

        # Detect type mismatches (example: adding string to integer)
        for node in ast.walk(tree):
            if isinstance(node, ast.BinOp):
                left = node.left
                right = node.right
                if isinstance(left, ast.Constant) and isinstance(right, ast.Constant):
                    if type(left.value) != type(right.value):
                        suggestions.append(f"Type mismatch in operation: '{left.value}' ({type(left.value).__name__}) and '{right.value}' ({type(right.value).__name__}).")

        # Detect logical errors (example: unreachable code)
        for i, node in enumerate(tree.body):
            if isinstance(node, ast.If):
                if isinstance(node.test, ast.Constant) and node.test.value is False:
                    suggestions.append(f"Unreachable code detected at line {node.lineno}.")
                elif isinstance(node.test, ast.Constant) and node.test.value is True:
                    suggestions.append(f"Redundant condition always True at line {node.lineno}.")

        # Detect duplicate keys in dictionaries
        for node in ast.walk(tree):
            if isinstance(node, ast.Dict):
                keys = [k.value for k in node.keys if isinstance(k, ast.Constant)]
                if len(keys) != len(set(keys)):
                    suggestions.append("Duplicate keys detected in dictionary.")

        # Convert the modified AST back to code
        fixed_code = astor.to_source(tree)

    except Exception as e:
        suggestions.append(f"Error analyzing code: {traceback.format_exc()}")

    return suggestions, fixed_code

# Streamlit app UI
st.title("Code Quality, Bug Detection, and Auto-Correction Tool")
st.markdown("Analyze your code for syntax issues, quality, bugs, logical errors, and get suggested corrections.")

# File uploader
uploaded_file = st.file_uploader("Upload a Python code file", type=["py"])

# Code snippet input
code_snippet = st.text_area("Or paste your code snippet below:")

if st.button("Analyze and Fix Code"):
    if uploaded_file is not None:
        code = uploaded_file.read().decode("utf-8")
    elif code_snippet.strip():
        code = code_snippet
    else:
        st.error("Please upload a file or paste code to analyze.")
        st.stop()

    # Perform code analysis and bug fixing
    st.subheader("Analysis Results")
    
    st.write("**Code Quality and Bug Suggestions:**")
    suggestions, fixed_code = detect_and_fix_bugs(code)
    if suggestions:
        for i, suggestion in enumerate(suggestions, 1):
            st.write(f"{i}. {suggestion}")
    else:
        st.write("No major issues detected. Your code looks good!")

    # Display corrected code
    st.subheader("Corrected Code:")
    st.code(fixed_code, language="python")

    # Simulated CodeBERT analysis (placeholder)
    st.write("**Model Analysis:**")
    model_results = analyze_code(code)
    for idx, result in enumerate(model_results, 1):
        st.write(f"Chunk {idx} classification result: {result}")

st.markdown("---")
st.markdown("*Powered by Hugging Face and Streamlit*")