File size: 3,842 Bytes
193a8a4 471343c 193a8a4 471343c 53c5d75 193a8a4 471343c 193a8a4 471343c 193a8a4 dd1ccae 1a817cf dd1ccae 471343c dd1ccae 10ef2b5 193a8a4 dd1ccae fe71cb6 193a8a4 471343c 193a8a4 471343c 193a8a4 471343c 193a8a4 471343c 193a8a4 471343c 193a8a4 471343c 193a8a4 1a817cf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed, pipeline
title = "Python Code Generator"
description = "This is a space to convert English text to Python code using the [codeparrot-small-text-to-code](https://huggingface.co/codeparrot/codeparrot-small-text-to-code) model, a pre-trained Python code generation model trained on a dataset of docstrings and Python code extracted from Jupyter notebooks available at [github-jupyter-text](https://huggingface.co/datasets/codeparrot/github-jupyter-text)."
example = [
["Utility function to calculate the precision of predictions using sklearn metrics", 65, 0.6, 42],
["Let's implement a function that calculates the size of a file called filepath", 60, 0.6, 42],
["Let's implement the Bubble Sort sorting algorithm in an auxiliary function:", 87, 0.6, 42],
["Function to calculate the nth Fibonacci number.", 65, 0.6, 42],
["Function to calculate the factorial of a number.", 65, 0.6, 42],
["Function to reverse a string.", 65, 0.6, 42],
["Function to check if a number is prime.", 65, 0.6, 42],
["Function to generate the Fibonacci sequence up to the nth term.", 65, 0.6, 42],
["Function to generate the factorial sequence up to the nth term.", 65, 0.6, 42],
]
# Change the model to the pre-trained model
tokenizer = AutoTokenizer.from_pretrained("codeparrot/codeparrot-small-text-to-code")
model = AutoModelForCausalLM.from_pretrained("codeparrot/codeparrot-small-text-to-code")
def create_docstring(gen_prompt):
return "\"\"\"\n" + gen_prompt + "\n\"\"\"\n\n"
def validate_inputs(gen_prompt, max_tokens, temperature, seed):
# Add validation logic here
if not gen_prompt:
raise ValueError("English instructions cannot be empty.")
if max_tokens <= 0 or max_tokens > 256:
raise ValueError("Number of tokens to generate must be between 1 and 256.")
if temperature < 0 or temperature > 2.5:
raise ValueError("Temperature must be between 0 and 2.5.")
if seed < 0 or seed > 1000:
raise ValueError("Random seed must be between 0 and 1000.")
def generate_code(gen_prompt, max_tokens, temperature=0.6, seed=42):
validate_inputs(gen_prompt, max_tokens, temperature, seed)
# Encode the input prompt
input_ids = tokenizer.encode(gen_prompt, return_tensors="pt")
# Set seed for reproducibility
set_seed(seed)
# Generate code tokens
output = model.generate(
input_ids,
max_length=max_tokens + input_ids.shape[-1],
temperature=temperature,
pad_token_id=tokenizer.eos_token_id,
num_return_sequences=1
)
# Decode the generated tokens into Python code
generated_code = tokenizer.decode(output[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
return generated_code
def save_to_text_file(output_text):
with open("generated_code.txt", "w") as file:
file.write(output_text)
iface = gr.Interface(
fn=generate_code,
inputs=[
gr.Textbox(label="English instructions", placeholder="Enter English instructions..."),
gr.inputs.Slider(
minimum=8,
maximum=256,
step=1,
default=8,
label="Number of tokens to generate",
),
gr.inputs.Slider(
minimum=0,
maximum=2.5,
step=0.1,
default=0.6,
label="Temperature",
),
gr.inputs.Slider(
minimum=0,
maximum=1000,
step=1,
default=42,
label="Random seed for generation"
)
],
outputs=gr.Code(label="Generated Python code", language="python", lines=10),
examples=example,
layout="horizontal",
theme="peach",
description=description,
title=title
)
iface.launch()
|