# Now, trying using the youtube video id from fastapi import FastAPI, HTTPException from pydantic import BaseModel from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch import pandas as pd import googleapiclient.discovery app = FastAPI() api_service_name = "youtube" api_version = "v3" DEVELOPER_KEY = "AIzaSyC4Vx8G6nm3Ow9xq7NluTuCCJ1d_5w4YPE" # Replace with your actual YouTube API key youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey=DEVELOPER_KEY) class SingleInput(BaseModel): video_id: str # Load the BERT tokenizer and model tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment") model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment") def scrape_comments(video_id): request = youtube.commentThreads().list( part="snippet", videoId=video_id, maxResults=100 # You can adjust the maximum number of comments to fetch ) response = request.execute() comments = [] for item in response['items']: comment = item['snippet']['topLevelComment']['snippet'] comments.append(comment['textDisplay']) comments_df = pd.DataFrame(comments, columns=['comment']) return comments_df @app.post("/analyze_sentiment/") def analyze_sentiment_endpoint(data: SingleInput): video_id = data.video_id comments_df = scrape_comments(video_id) if comments_df.empty: raise HTTPException(status_code=400, detail="No comments found for the provided video ID") tokenized_comments = tokenizer(list(comments_df['comment']), padding=True, truncation=True, return_tensors="pt") # Perform sentiment analysis with torch.no_grad(): outputs = model(**tokenized_comments) logits = outputs.logits # Determine sentiment for each comment sentiment_ids = torch.argmax(logits, dim=1).tolist() sentiment_labels = [] for sentiment_id in sentiment_ids: if sentiment_id == 2: sentiment_labels.append("Positive") elif sentiment_id == 0: sentiment_labels.append("Negative") else: sentiment_labels.append("Neutral") sentiment_counts = { "positive": sentiment_labels.count("Positive"), "negative": sentiment_labels.count("Negative"), "neutral": sentiment_labels.count("Neutral") } return {"sentiment_counts": sentiment_counts, "comments_count": len(comments_df)}