akazmi commited on
Commit
a14d5bb
·
verified ·
1 Parent(s): 715b086

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -0
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import Libraries
2
+ import torch
3
+ from torch.utils.data import DataLoader
4
+ from transformers import BertTokenizer, BertForSequenceClassification, AdamW, pipeline
5
+ from transformers import get_scheduler
6
+ from datasets import load_dataset
7
+ from sklearn.metrics import accuracy_score, classification_report
8
+ import gradio as gr
9
+ import numpy as np
10
+ import random
11
+
12
+ # Set Random Seeds for Reproducibility
13
+ torch.manual_seed(42)
14
+ random.seed(42)
15
+ np.random.seed(42)
16
+
17
+ # Load IMDb Dataset
18
+ dataset = load_dataset('imdb')
19
+
20
+ # Load Pretrained Tokenizer
21
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
22
+
23
+ # Tokenization Function
24
+ def tokenize_function(batch):
25
+ return tokenizer(batch['text'], padding="max_length", truncation=True, max_length=128)
26
+
27
+ # Tokenize the Dataset
28
+ tokenized_datasets = dataset.map(tokenize_function, batched=True)
29
+
30
+ # Remove the Original Text to Save Memory
31
+ tokenized_datasets = tokenized_datasets.remove_columns(['text'])
32
+
33
+ # Rename 'label' to 'labels' for Compatibility with Transformers
34
+ tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
35
+
36
+ # Set Dataset Format for PyTorch
37
+ tokenized_datasets.set_format("torch")
38
+
39
+ # Split the Data
40
+ train_dataset = tokenized_datasets["train"]
41
+ test_dataset = tokenized_datasets["test"]
42
+
43
+ # Create Data Loaders
44
+ train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
45
+ test_loader = DataLoader(test_dataset, batch_size=16)
46
+
47
+ # Load Pretrained BERT Model for Sequence Classification
48
+ model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
49
+
50
+ # Define Optimizer
51
+ optimizer = AdamW(model.parameters(), lr=5e-5)
52
+
53
+ # Learning Rate Scheduler
54
+ num_training_steps = len(train_loader) * 3 # 3 epochs
55
+ lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)
56
+
57
+ # Move Model to GPU if Available
58
+ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
59
+ model.to(device)
60
+
61
+ # Training Loop
62
+ def train_model():
63
+ model.train()
64
+ for epoch in range(3): # 3 Epochs
65
+ print(f"Epoch {epoch+1}")
66
+ for batch in train_loader:
67
+ # Move Batch to Device
68
+ batch = {k: v.to(device) for k, v in batch.items()}
69
+ outputs = model(**batch)
70
+ loss = outputs.loss
71
+
72
+ # Backpropagation
73
+ loss.backward()
74
+ optimizer.step()
75
+ lr_scheduler.step()
76
+ optimizer.zero_grad()
77
+
78
+ print(f"Loss: {loss.item()}")
79
+
80
+ # Evaluation Function
81
+ def evaluate_model():
82
+ model.eval()
83
+ preds, labels = [], []
84
+ with torch.no_grad():
85
+ for batch in test_loader:
86
+ batch = {k: v.to(device) for k, v in batch.items()}
87
+ outputs = model(**batch)
88
+ logits = outputs.logits
89
+ preds.extend(torch.argmax(logits, axis=1).cpu().numpy())
90
+ labels.extend(batch["labels"].cpu().numpy())
91
+
92
+ accuracy = accuracy_score(labels, preds)
93
+ print("Accuracy:", accuracy)
94
+ print("Classification Report:\n", classification_report(labels, preds))
95
+
96
+ # Train and Evaluate the Model
97
+ train_model()
98
+ evaluate_model()
99
+
100
+ # Save the Model for Deployment
101
+ model.save_pretrained("sentiment_model")
102
+ tokenizer.save_pretrained("sentiment_model")
103
+
104
+ # Deploy the Model with Gradio
105
+ sentiment_pipeline = pipeline("sentiment-analysis", model="sentiment_model")
106
+
107
+ # Gradio Inference Function
108
+ def analyze_sentiment(review):
109
+ result = sentiment_pipeline(review)
110
+ return result[0]['label']
111
+
112
+ # Gradio Interface
113
+ iface = gr.Interface(
114
+ fn=analyze_sentiment,
115
+ inputs=gr.Textbox(lines=5, placeholder="Enter a movie review..."),
116
+ outputs="text",
117
+ title="IMDb Sentiment Analysis",
118
+ )
119
+
120
+ # Launch the Gradio App
121
+ iface.launch()