File size: 2,527 Bytes
06b8918
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# Now, trying using the youtube video id
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd
import googleapiclient.discovery

app = FastAPI()

api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = "AIzaSyC4Vx8G6nm3Ow9xq7NluTuCCJ1d_5w4YPE"  # Replace with your actual YouTube API key

youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey=DEVELOPER_KEY)

class SingleInput(BaseModel):
    video_id: str

# Load the BERT tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

def scrape_comments(video_id):
    request = youtube.commentThreads().list(
        part="snippet",
        videoId=video_id,
        maxResults=100  # You can adjust the maximum number of comments to fetch
    )
    response = request.execute()

    comments = []

    for item in response['items']:
        comment = item['snippet']['topLevelComment']['snippet']
        comments.append(comment['textDisplay'])

    comments_df = pd.DataFrame(comments, columns=['comment'])
    return comments_df

@app.post("/analyze_sentiment/")
def analyze_sentiment_endpoint(data: SingleInput):
    video_id = data.video_id

    comments_df = scrape_comments(video_id)

    if comments_df.empty:
        raise HTTPException(status_code=400, detail="No comments found for the provided video ID")

    tokenized_comments = tokenizer(list(comments_df['comment']), padding=True, truncation=True, return_tensors="pt")

    # Perform sentiment analysis
    with torch.no_grad():
        outputs = model(**tokenized_comments)
        logits = outputs.logits

    # Determine sentiment for each comment
    sentiment_ids = torch.argmax(logits, dim=1).tolist()
    sentiment_labels = []
    for sentiment_id in sentiment_ids:
        if sentiment_id == 2:
            sentiment_labels.append("Positive")
        elif sentiment_id == 0:
            sentiment_labels.append("Negative")
        else:
            sentiment_labels.append("Neutral")

    sentiment_counts = {
        "positive": sentiment_labels.count("Positive"),
        "negative": sentiment_labels.count("Negative"),
        "neutral": sentiment_labels.count("Neutral")
    }

    return {"sentiment_counts": sentiment_counts, "comments_count": len(comments_df)}