Spaces:
Runtime error
Runtime error
Update API.py
Browse files
API.py
CHANGED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Now, trying using the youtube video id
|
2 |
+
from fastapi import FastAPI, HTTPException
|
3 |
+
from pydantic import BaseModel
|
4 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
5 |
+
import torch
|
6 |
+
import pandas as pd
|
7 |
+
import googleapiclient.discovery
|
8 |
+
|
9 |
+
app = FastAPI()
|
10 |
+
|
11 |
+
api_service_name = "youtube"
|
12 |
+
api_version = "v3"
|
13 |
+
DEVELOPER_KEY = "AIzaSyC4Vx8G6nm3Ow9xq7NluTuCCJ1d_5w4YPE" # Replace with your actual YouTube API key
|
14 |
+
|
15 |
+
youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey=DEVELOPER_KEY)
|
16 |
+
|
17 |
+
class SingleInput(BaseModel):
|
18 |
+
video_id: str
|
19 |
+
|
20 |
+
# Load the BERT tokenizer and model
|
21 |
+
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
|
22 |
+
model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
|
23 |
+
|
24 |
+
def scrape_comments(video_id):
|
25 |
+
request = youtube.commentThreads().list(
|
26 |
+
part="snippet",
|
27 |
+
videoId=video_id,
|
28 |
+
maxResults=100 # You can adjust the maximum number of comments to fetch
|
29 |
+
)
|
30 |
+
response = request.execute()
|
31 |
+
|
32 |
+
comments = []
|
33 |
+
|
34 |
+
for item in response['items']:
|
35 |
+
comment = item['snippet']['topLevelComment']['snippet']
|
36 |
+
comments.append(comment['textDisplay'])
|
37 |
+
|
38 |
+
comments_df = pd.DataFrame(comments, columns=['comment'])
|
39 |
+
return comments_df
|
40 |
+
|
41 |
+
@app.post("/analyze_sentiment/")
|
42 |
+
def analyze_sentiment_endpoint(data: SingleInput):
|
43 |
+
video_id = data.video_id
|
44 |
+
|
45 |
+
comments_df = scrape_comments(video_id)
|
46 |
+
|
47 |
+
if comments_df.empty:
|
48 |
+
raise HTTPException(status_code=400, detail="No comments found for the provided video ID")
|
49 |
+
|
50 |
+
tokenized_comments = tokenizer(list(comments_df['comment']), padding=True, truncation=True, return_tensors="pt")
|
51 |
+
|
52 |
+
# Perform sentiment analysis
|
53 |
+
with torch.no_grad():
|
54 |
+
outputs = model(**tokenized_comments)
|
55 |
+
logits = outputs.logits
|
56 |
+
|
57 |
+
# Determine sentiment for each comment
|
58 |
+
sentiment_ids = torch.argmax(logits, dim=1).tolist()
|
59 |
+
sentiment_labels = []
|
60 |
+
for sentiment_id in sentiment_ids:
|
61 |
+
if sentiment_id == 2:
|
62 |
+
sentiment_labels.append("Positive")
|
63 |
+
elif sentiment_id == 0:
|
64 |
+
sentiment_labels.append("Negative")
|
65 |
+
else:
|
66 |
+
sentiment_labels.append("Neutral")
|
67 |
+
|
68 |
+
sentiment_counts = {
|
69 |
+
"positive": sentiment_labels.count("Positive"),
|
70 |
+
"negative": sentiment_labels.count("Negative"),
|
71 |
+
"neutral": sentiment_labels.count("Neutral")
|
72 |
+
}
|
73 |
+
|
74 |
+
return {"sentiment_counts": sentiment_counts, "comments_count": len(comments_df)}
|