Update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,11 @@
|
|
|
|
1 |
from datasets import load_dataset
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
|
3 |
|
4 |
# Load the dataset
|
5 |
dataset = load_dataset("json", data_files="dataset.jsonl")
|
6 |
|
7 |
-
# Load the model and tokenizer
|
8 |
model_name = "Salesforce/codegen-2B-multi"
|
9 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|
10 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
@@ -21,13 +22,14 @@ training_args = TrainingArguments(
|
|
21 |
overwrite_output_dir=True,
|
22 |
evaluation_strategy="epoch",
|
23 |
learning_rate=5e-5,
|
24 |
-
per_device_train_batch_size=
|
25 |
num_train_epochs=3,
|
26 |
save_strategy="epoch",
|
27 |
logging_dir="./logs",
|
|
|
28 |
)
|
29 |
|
30 |
-
#
|
31 |
trainer = Trainer(
|
32 |
model=model,
|
33 |
args=training_args,
|
@@ -35,7 +37,29 @@ trainer = Trainer(
|
|
35 |
eval_dataset=tokenized_dataset["train"],
|
36 |
)
|
37 |
|
|
|
38 |
trainer.train()
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
from datasets import load_dataset
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
|
4 |
|
5 |
# Load the dataset
|
6 |
dataset = load_dataset("json", data_files="dataset.jsonl")
|
7 |
|
8 |
+
# Load the pre-trained model and tokenizer
|
9 |
model_name = "Salesforce/codegen-2B-multi"
|
10 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|
11 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
22 |
overwrite_output_dir=True,
|
23 |
evaluation_strategy="epoch",
|
24 |
learning_rate=5e-5,
|
25 |
+
per_device_train_batch_size=2,
|
26 |
num_train_epochs=3,
|
27 |
save_strategy="epoch",
|
28 |
logging_dir="./logs",
|
29 |
+
logging_strategy="epoch",
|
30 |
)
|
31 |
|
32 |
+
# Trainer setup
|
33 |
trainer = Trainer(
|
34 |
model=model,
|
35 |
args=training_args,
|
|
|
37 |
eval_dataset=tokenized_dataset["train"],
|
38 |
)
|
39 |
|
40 |
+
# Train the model
|
41 |
trainer.train()
|
42 |
+
|
43 |
+
# Save the fine-tuned model
|
44 |
+
trainer.save_model("./fine_tuned_model")
|
45 |
+
tokenizer.save_pretrained("./fine_tuned_model")
|
46 |
+
|
47 |
+
# Load the fine-tuned model for inference
|
48 |
+
fine_tuned_model = AutoModelForCausalLM.from_pretrained("./fine_tuned_model")
|
49 |
+
fine_tuned_tokenizer = AutoTokenizer.from_pretrained("./fine_tuned_model")
|
50 |
+
|
51 |
+
# Define a Gradio interface for testing the model
|
52 |
+
def generate_cypress_code(prompt):
|
53 |
+
inputs = fine_tuned_tokenizer(prompt, return_tensors="pt")
|
54 |
+
outputs = fine_tuned_model.generate(inputs["input_ids"], max_length=150, num_return_sequences=1)
|
55 |
+
return fine_tuned_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
56 |
+
|
57 |
+
# Launch the Gradio interface
|
58 |
+
interface = gr.Interface(
|
59 |
+
fn=generate_cypress_code,
|
60 |
+
inputs="text",
|
61 |
+
outputs="text",
|
62 |
+
title="Cypress Test Generator",
|
63 |
+
description="Enter a description of the test you want to generate Cypress code for.",
|
64 |
+
)
|
65 |
+
interface.launch()
|