Ankitajadhav commited on
Commit
8324d73
1 Parent(s): ac63cbd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -2
app.py CHANGED
@@ -7,7 +7,7 @@ from sentence_transformers import SentenceTransformer
7
  import chromadb
8
  from datasets import load_dataset
9
  import gradio as gr
10
- from transformers import GPT2Tokenizer, GPT2Model
11
 
12
  model_name = "Amitesh007/text_generation-finetuned-gpt2"
13
 
@@ -15,7 +15,7 @@ model_name = "Amitesh007/text_generation-finetuned-gpt2"
15
  tokenizer = GPT2Tokenizer.from_pretrained(model_name)
16
 
17
  # Load the model with from_tf=True
18
- model = GPT2Model.from_pretrained(model_name, from_tf=True)
19
 
20
  # Function to clear the cache
21
  def clear_cache(model_name):
@@ -80,6 +80,41 @@ class VectorStore:
80
  vector_store = VectorStore("embedding_vector")
81
  vector_store.populate_vectors(dataset=None)
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  # Define the chatbot response function
84
  conversation_history = []
85
 
 
7
  import chromadb
8
  from datasets import load_dataset
9
  import gradio as gr
10
+ from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments
11
 
12
  model_name = "Amitesh007/text_generation-finetuned-gpt2"
13
 
 
15
  tokenizer = GPT2Tokenizer.from_pretrained(model_name)
16
 
17
  # Load the model with from_tf=True
18
+ model = GPT2LMHeadModel.from_pretrained(model_name, from_tf=True)
19
 
20
  # Function to clear the cache
21
  def clear_cache(model_name):
 
80
  vector_store = VectorStore("embedding_vector")
81
  vector_store.populate_vectors(dataset=None)
82
 
83
+ # Fine-tuning function
84
+ def fine_tune_model():
85
+ # Load your dataset
86
+ dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full', split='train[:1500]', streaming=False)
87
+
88
+ # Prepare the data for training
89
+ def tokenize_function(examples):
90
+ return tokenizer(examples['title_cleaned'] + " " + examples['recipe_new'], padding="max_length", truncation=True)
91
+
92
+ tokenized_datasets = dataset.map(tokenize_function, batched=True)
93
+
94
+ # Define training arguments
95
+ training_args = TrainingArguments(
96
+ output_dir="./results",
97
+ evaluation_strategy="epoch",
98
+ learning_rate=2e-5,
99
+ per_device_train_batch_size=8,
100
+ per_device_eval_batch_size=8,
101
+ num_train_epochs=3,
102
+ weight_decay=0.01,
103
+ )
104
+
105
+ # Initialize Trainer
106
+ trainer = Trainer(
107
+ model=model,
108
+ args=training_args,
109
+ train_dataset=tokenized_datasets,
110
+ )
111
+
112
+ # Train the model
113
+ trainer.train()
114
+
115
+ # Fine-tune the model
116
+ fine_tune_model()
117
+
118
  # Define the chatbot response function
119
  conversation_history = []
120