File size: 1,522 Bytes
00320ff 1d197a9 00320ff 1d197a9 00320ff 1d197a9 00320ff 1d197a9 00320ff 1d197a9 00320ff 9f1606d 00320ff 320952b 00320ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import praw
import praw.exceptions as redditexception
import pandas as pd
import boto3
def getComments(url):
ssm = boto3.client('ssm')
cid = ssm.get_parameter(Name='client_id', WithDecryption=True)['Parameter']['Value']
csecret = ssm.get_parameter(Name='client_secret', WithDecryption=True)['Parameter']['Value']
user_agent = ssm.get_parameter(Name='user_agent', WithDecryption=True)['Parameter']['Value']
cols = [
"text",
"score",
"id",
"parent_id",
"submission_title",
"submission_score",
"submission_id"
]
reddit = praw.Reddit(
client_id=cid , client_secret=csecret, user_agent=user_agent
)
try:
submission = reddit.submission(url=url)
except redditexception.InvalidURL:
print("The URL is invalid. Make sure that you have included the submission id")
submission.comments.replace_more(limit=0)
rows = []
for comment in submission.comments.list():
if comment.stickied:
continue
data = [
comment.body,
comment.score,
comment.id,
comment.parent_id,
submission.title,
submission.score,
submission.id,
]
rows.append(data)
df = pd.DataFrame(data=rows, columns=cols)
# save for testing to avoid sending tons of requests to reddit
# df.to_csv(f'{submission.id}_comments.csv', index=False)
return df
if __name__ == "__main__":
pass
|