# app.py (updated with no `max_chars` and using correct model initialization) import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM import json import os # Load the CodeGen-2B-mono model and tokenizer from Hugging Face model_name = "Salesforce/codegen-2B-mono" # Best version for CPU-friendly performance in code generation tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # Ensure the model runs on CPU (important for Hugging Face Spaces free tier) device = torch.device("cpu") model.to(device) # Cache to store recent prompts and responses with file-based persistence CACHE_FILE = "cache.json" cache = {} # Load cache from file if it exists if os.path.exists(CACHE_FILE): with open(CACHE_FILE, "r") as f: cache = json.load(f) def code_assistant(prompt, language): # Input validation with a 1024-character limit if not prompt.strip(): return "⚠️ Error: The input prompt cannot be empty. Please provide a coding question or code snippet." if len(prompt) > 1024: return "⚠️ Error: The input prompt is too long. Please limit it to 1024 characters." # Check if the prompt is in cache cache_key = (prompt, language) if str(cache_key) in cache: return cache[str(cache_key)] # Customize the prompt based on language if language: prompt = f"[{language}] {prompt}" # Indicate the language for context # Tokenize the input inputs = tokenizer(prompt, return_tensors="pt").to(device) # Generate response with adjusted parameters for faster CPU response outputs = model.generate( inputs.input_ids, max_length=256, # Shortened max length for quicker response temperature=0.1, # Lower temperature for focused output top_p=0.8, # Slightly reduced top_p for quicker sampling do_sample=True ) # Decode the generated output generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) # Store the response in cache (limit cache size to 10 items) if len(cache) >= 10: cache.pop(next(iter(cache))) # Remove the oldest item cache[str(cache_key)] = generated_text # Write the updated cache to file with open(CACHE_FILE, "w") as f: json.dump(cache, f) return generated_text # Custom CSS styling for animations and colors css = """ /* Center-align all text in the input and output boxes */ input, textarea, .output_text { text-align: center; } /* Style the main title */ h1 { color: #1e90ff; font-family: 'Arial', sans-serif; text-align: center; font-weight: bold; } /* Style the description */ .description { color: #555; font-family: 'Arial', sans-serif; text-align: center; margin-bottom: 20px; } /* Output box animation */ .output_text { color: #1e90ff; animation: fadeIn 2s ease-in-out; } /* Add fade-in animation */ @keyframes fadeIn { 0% { opacity: 0; } 100% { opacity: 1; } } /* Hover effect for the submit button */ button { background-color: #1e90ff; color: white; font-weight: bold; border: none; padding: 10px 20px; border-radius: 5px; transition: background-color 0.3s ease; } button:hover { background-color: #104e8b; cursor: pointer; } """ # Enhanced title and description with HTML styling title_html = """
An AI-powered assistant for coding queries, debugging, and code generation. Choose a programming language for more tailored responses. Limited to 1024 characters.
""" # Set up Gradio interface with a dropdown for programming language selection iface = gr.Interface( fn=code_assistant, inputs=[ gr.Textbox(lines=5, placeholder="Ask a coding question or paste your code here..."), # Removed `max_chars` gr.Dropdown(choices=["Python", "JavaScript", "Java", "C++", "HTML", "CSS", "SQL", "Other"], label="Programming Language") ], outputs="text", title=title_html, description=description_html, css=css # Add custom CSS ) # Launch the Gradio app iface.launch()