venture2 commited on
Commit
dc26a5e
·
verified ·
1 Parent(s): 2d7b97a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
+ import json
6
+ import os
7
+
8
+ # Load the model and tokenizer from Hugging Face
9
+ model_name = "bigcode/starcoder" # Use StarCoder for code-related tasks
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+ model = AutoModelForCausalLM.from_pretrained(model_name)
12
+
13
+ # Ensure the model runs on CPU for Hugging Face Spaces free tier
14
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
+ model.to(device)
16
+
17
+ # Cache to store recent prompts and responses with file-based persistence
18
+ CACHE_FILE = "cache.json"
19
+ cache = {}
20
+
21
+ # Load cache from file if it exists
22
+ if os.path.exists(CACHE_FILE):
23
+ with open(CACHE_FILE, "r") as f:
24
+ cache = json.load(f)
25
+
26
+ def code_assistant(prompt, language):
27
+ # Input validation
28
+ if not prompt.strip():
29
+ return "Error: The input prompt cannot be empty. Please provide a coding question or code snippet."
30
+ if len(prompt) > 256:
31
+ return "Error: The input prompt is too long. Please limit it to 256 characters."
32
+
33
+ # Check if the prompt is in cache
34
+ cache_key = (prompt, language)
35
+ if str(cache_key) in cache:
36
+ return cache[str(cache_key)]
37
+
38
+ # Customize the prompt based on language
39
+ if language:
40
+ prompt = f"[{language}] {prompt}" # Indicate the language for context
41
+
42
+ # Tokenize the input
43
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
44
+
45
+ # Generate response with adjusted parameters for faster responses
46
+ outputs = model.generate(
47
+ inputs.input_ids,
48
+ max_length=128, # Shortened max length for quicker response
49
+ temperature=0.1, # Lower temperature for more focused output
50
+ top_p=0.8, # Slightly reduced top_p for quicker sampling
51
+ do_sample=True
52
+ )
53
+
54
+ # Decode the generated output
55
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
56
+
57
+ # Store the response in cache (limit cache size to 10 items)
58
+ if len(cache) >= 10:
59
+ cache.pop(next(iter(cache))) # Remove the oldest item
60
+ cache[str(cache_key)] = generated_text
61
+
62
+ # Write the updated cache to file
63
+ with open(CACHE_FILE, "w") as f:
64
+ json.dump(cache, f)
65
+
66
+ return generated_text
67
+
68
+ # Set up Gradio interface with a dropdown for programming language selection
69
+ iface = gr.Interface(
70
+ fn=code_assistant,
71
+ inputs=[
72
+ gr.Textbox(lines=5, placeholder="Ask a coding question or paste your code here..."),
73
+ gr.Dropdown(choices=["Python", "JavaScript", "Java", "C++", "HTML", "CSS", "SQL", "Other"], label="Programming Language")
74
+ ],
75
+ outputs="text",
76
+ title="Code Assistant with StarCoder",
77
+ description="An AI code assistant to help you with coding queries, debugging, and code generation. Specify the programming language for more accurate responses."
78
+ )
79
+
80
+ # Launch the Gradio app
81
+ iface.launch()