Manasa1 commited on
Commit
210df67
·
verified ·
1 Parent(s): a6df729

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -30
app.py CHANGED
@@ -42,51 +42,48 @@ if uploaded_file is not None:
42
 
43
  # Step 4: Fine-tune a model on the extracted tweets
44
  def fine_tune_model(tweets):
45
-
46
  # Convert tweets to a DataFrame and Dataset
47
  df = pd.DataFrame(tweets, columns=["text"])
48
  tweet_dataset = Dataset.from_pandas(df)
49
 
50
- # Load model and tokenizer
51
  model_name = "gpt2" # Replace with a suitable model if needed
52
  tokenizer = AutoTokenizer.from_pretrained(model_name)
53
  model = AutoModelForCausalLM.from_pretrained(model_name)
54
 
55
- # Set the pad token
56
- tokenizer.pad_token = tokenizer.eos_token # Use eos_token as pad_token
57
 
58
- # Tokenize the dataset
59
- def tokenize_function(examples):
60
- return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)
61
 
62
- tokenized_tweets = tweet_dataset.map(tokenize_function, batched=True)
63
 
64
- # Training arguments
65
- training_args = TrainingArguments(
66
- output_dir="./fine_tuned_tweet_model",
67
- per_device_train_batch_size=4,
68
- num_train_epochs=3,
69
- save_steps=10_000,
70
- save_total_limit=1,
71
- logging_dir='./logs',
72
- )
73
 
74
- # Initialize the Trainer
75
- trainer = Trainer(
76
- model=model,
77
- args=training_args,
78
- train_dataset=tokenized_tweets,
79
- )
80
 
81
- # Fine-tune the model
82
- trainer.train()
83
 
84
- # Save the fine-tuned model
85
- model.save_pretrained("fine_tuned_tweet_model")
86
- tokenizer.save_pretrained("fine_tuned_tweet_model")
87
-
88
- return model, tokenizer
89
 
 
90
 
91
  # Trigger fine-tuning and notify user
92
  with st.spinner("Fine-tuning model..."):
 
42
 
43
  # Step 4: Fine-tune a model on the extracted tweets
44
  def fine_tune_model(tweets):
 
45
  # Convert tweets to a DataFrame and Dataset
46
  df = pd.DataFrame(tweets, columns=["text"])
47
  tweet_dataset = Dataset.from_pandas(df)
48
 
49
+ # Load model and tokenizer
50
  model_name = "gpt2" # Replace with a suitable model if needed
51
  tokenizer = AutoTokenizer.from_pretrained(model_name)
52
  model = AutoModelForCausalLM.from_pretrained(model_name)
53
 
54
+ tokenizer.pad_token = tokenizer.eos_token
 
55
 
56
+ # Tokenize the dataset
57
+ def tokenize_function(examples):
58
+ return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)
59
 
60
+ tokenized_tweets = tweet_dataset.map(tokenize_function, batched=True)
61
 
62
+ # Training arguments
63
+ training_args = TrainingArguments(
64
+ output_dir="./fine_tuned_tweet_model",
65
+ per_device_train_batch_size=4,
66
+ num_train_epochs=3,
67
+ save_steps=10_000,
68
+ save_total_limit=1,
69
+ logging_dir='./logs',
70
+ )
71
 
72
+ # Initialize the Trainer
73
+ trainer = Trainer(
74
+ model=model,
75
+ args=training_args,
76
+ train_dataset=tokenized_tweets,
77
+ )
78
 
79
+ # Fine-tune the model
80
+ trainer.train()
81
 
82
+ # Save the fine-tuned model
83
+ model.save_pretrained("fine_tuned_tweet_model")
84
+ tokenizer.save_pretrained("fine_tuned_tweet_model")
 
 
85
 
86
+ return model, tokenizer
87
 
88
  # Trigger fine-tuning and notify user
89
  with st.spinner("Fine-tuning model..."):