Spaces:
Runtime error
Runtime error
File size: 2,603 Bytes
5da36ef e42331d 5da36ef e42331d 5da36ef 3a69ea2 5da36ef 3a69ea2 5da36ef 3a69ea2 5da36ef 3a69ea2 5da36ef 0c25a63 51906e7 ad71124 7338b09 ad71124 6973620 7338b09 ad71124 f5172c9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import pandas as pd
import requests
import urllib.parse as urlparse
def get_video_id(url_video):
"""Get video id"""
query = urlparse.urlparse(url_video)
if query.hostname == 'youtu.be':
return query.path[1:]
if query.hostname in ('www.youtube.com', 'youtube.com'):
if query.path == '/watch':
return urlparse.parse_qs(query.query)["v"][0]
if query.path[:7] == '/embed/' or query.path[:3] == '/v/':
return query.path.split('/')[2]
return None
def get_comments(api_key, video_id):
"""Get comments"""
endpoint = "https://www.googleapis.com/youtube/v3/commentThreads"
params = {
"part":"snippet",
"videoId": video_id,
"maxResults": 100,
"key": api_key,
}
response = requests.get(endpoint, params=params)
res = response.json()
if "items" in res.keys():
return {
num: {
"text_comment": " ".join(
x["snippet"]["topLevelComment"]["snippet"][
"textOriginal"
].splitlines()
),
"publish_data": x["snippet"]["topLevelComment"]["snippet"][
"publishedAt"
],
}
for num, x in enumerate(res['items'])
}
def get_sentim(data, headers, url):
"""Get result of sentimental analysis"""
res = requests.post(url, headers=headers, json=data)
res = res.json()[0][0]
return res['label'], res['score']
def pipeline_sentiment(url_video, api_key, headers, url):
"""Pipeline of sentimental analysis"""
video_id = get_video_id(url_video)
comments = get_comments(api_key, video_id)
comments_df = pd.DataFrame(comments).T
text_tuple = [get_sentim(i, headers, url) for i in comments_df["text_comment"]]
comments_df[["sentiment", "score"]] = pd.DataFrame(list(text_tuple))
return comments_df
def pipeline_stats(data):
"""Get statistic of sentiment"""
return data['sentiment'].value_counts(normalize=True).mul(100).round(2)
def pipeline_summarize(data, headers, url, length=2000, max_length=35):
"""Get summarization result"""
text = " ".join(data)
result_text = []
for i in range(0, len(text), length):
new_text = text[i : i + length]
payload = {
"inputs": new_text,
"parameters": {
"max_length": max_length
}
}
res = requests.post(url, headers=headers, json=payload)
result_text.append(res.json()[0]["generated_text"])
return " ".join(result_text)
|