GoodML commited on
Commit
06b8918
1 Parent(s): 3d05a9d

Update API.py

Browse files
Files changed (1) hide show
  1. API.py +74 -0
API.py CHANGED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Now, trying using the youtube video id
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel
4
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
+ import torch
6
+ import pandas as pd
7
+ import googleapiclient.discovery
8
+
9
+ app = FastAPI()
10
+
11
+ api_service_name = "youtube"
12
+ api_version = "v3"
13
+ DEVELOPER_KEY = "AIzaSyC4Vx8G6nm3Ow9xq7NluTuCCJ1d_5w4YPE" # Replace with your actual YouTube API key
14
+
15
+ youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey=DEVELOPER_KEY)
16
+
17
+ class SingleInput(BaseModel):
18
+ video_id: str
19
+
20
+ # Load the BERT tokenizer and model
21
+ tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
22
+ model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
23
+
24
+ def scrape_comments(video_id):
25
+ request = youtube.commentThreads().list(
26
+ part="snippet",
27
+ videoId=video_id,
28
+ maxResults=100 # You can adjust the maximum number of comments to fetch
29
+ )
30
+ response = request.execute()
31
+
32
+ comments = []
33
+
34
+ for item in response['items']:
35
+ comment = item['snippet']['topLevelComment']['snippet']
36
+ comments.append(comment['textDisplay'])
37
+
38
+ comments_df = pd.DataFrame(comments, columns=['comment'])
39
+ return comments_df
40
+
41
+ @app.post("/analyze_sentiment/")
42
+ def analyze_sentiment_endpoint(data: SingleInput):
43
+ video_id = data.video_id
44
+
45
+ comments_df = scrape_comments(video_id)
46
+
47
+ if comments_df.empty:
48
+ raise HTTPException(status_code=400, detail="No comments found for the provided video ID")
49
+
50
+ tokenized_comments = tokenizer(list(comments_df['comment']), padding=True, truncation=True, return_tensors="pt")
51
+
52
+ # Perform sentiment analysis
53
+ with torch.no_grad():
54
+ outputs = model(**tokenized_comments)
55
+ logits = outputs.logits
56
+
57
+ # Determine sentiment for each comment
58
+ sentiment_ids = torch.argmax(logits, dim=1).tolist()
59
+ sentiment_labels = []
60
+ for sentiment_id in sentiment_ids:
61
+ if sentiment_id == 2:
62
+ sentiment_labels.append("Positive")
63
+ elif sentiment_id == 0:
64
+ sentiment_labels.append("Negative")
65
+ else:
66
+ sentiment_labels.append("Neutral")
67
+
68
+ sentiment_counts = {
69
+ "positive": sentiment_labels.count("Positive"),
70
+ "negative": sentiment_labels.count("Negative"),
71
+ "neutral": sentiment_labels.count("Neutral")
72
+ }
73
+
74
+ return {"sentiment_counts": sentiment_counts, "comments_count": len(comments_df)}