File size: 1,522 Bytes
00320ff
1d197a9
00320ff
1d197a9
00320ff
 
 
 
1d197a9
 
 
 
 
00320ff
 
 
 
 
 
 
 
 
 
 
1d197a9
00320ff
 
1d197a9
 
 
 
 
00320ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f1606d
 
 
00320ff
 
 
320952b
00320ff
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import praw
import praw.exceptions as redditexception
import pandas as pd
import boto3


def getComments(url):

    ssm = boto3.client('ssm')
    cid = ssm.get_parameter(Name='client_id', WithDecryption=True)['Parameter']['Value']
    csecret = ssm.get_parameter(Name='client_secret', WithDecryption=True)['Parameter']['Value']
    user_agent = ssm.get_parameter(Name='user_agent', WithDecryption=True)['Parameter']['Value']

    cols = [
        "text",
        "score",
        "id",
        "parent_id",
        "submission_title",
        "submission_score",
        "submission_id"
    ]

    reddit = praw.Reddit(
        client_id=cid , client_secret=csecret, user_agent=user_agent
    )

    try:
        submission = reddit.submission(url=url)
    except redditexception.InvalidURL:
        print("The URL is invalid. Make sure that you have included the submission id")

    submission.comments.replace_more(limit=0)
    rows = []

    for comment in submission.comments.list():

        if comment.stickied:
            continue

        data = [
            comment.body,
            comment.score,
            comment.id,
            comment.parent_id,
            submission.title,
            submission.score,
            submission.id,
        ]

        rows.append(data)

    df = pd.DataFrame(data=rows, columns=cols)

    # save for testing to avoid sending tons of requests to reddit

    # df.to_csv(f'{submission.id}_comments.csv', index=False)

    return df


if __name__ == "__main__":
    pass