File size: 4,806 Bytes
9b744c5
12ae336
 
 
 
 
 
 
 
 
 
9b744c5
 
 
 
b42fea9
9b744c5
b42fea9
 
9b744c5
7d5704e
9b744c5
b42fea9
 
9b744c5
b42fea9
9b744c5
 
18ec458
12ae336
 
9b744c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b42fea9
9b744c5
 
 
 
 
 
 
c1fc690
9b744c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1fc690
 
 
9b744c5
 
 
 
 
 
 
 
c1fc690
9b744c5
 
 
c1fc690
 
 
 
 
 
 
 
9b744c5
 
c1fc690
9b744c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18ec458
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
"""
Module which updates any of the issues to reflect changes in the issue state e.g. new comments

The module can be run from the command line using the following arguments:
            --input_filename: The name of the input file containing the issues
            --output_filename: The name of the output file to save the updated issues
            --github_api_version: The version of the GitHub API to use
            --owner: The owner of the repo
            --repo: The name of the repo
            --token: The GitHub token to use
            --n_pages: The number of pages to fetch. Useful for testing
"""
import argparse
import json
import logging
import os

import numpy as np
import requests

from .defaults import TOKEN, OWNER, REPO, GITHUB_API_VERSION, ISSUE_JSON_FILE

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

JSON_FILE = "issues.json"


def update_issues(
    input_filename=ISSUE_JSON_FILE,
    output_filename=ISSUE_JSON_FILE,
    github_api_version=GITHUB_API_VERSION,
    owner=OWNER,
    repo=REPO,
    token=TOKEN,
    n_pages=-1,
):
    """
    Function to get the issues from the transformers repo and save them to a json file
    """
    with open("issues_dict.json", "r") as f:
        issues = json.load(f)

    # Get most recent updated at information
    updated_at = [issue["updated_at"] for issue in issues.values()]
    most_recent = max(updated_at)

    # If file exists and we want to overwrite it, delete it
    if not os.path.exists(output_filename):
        raise ValueError(f"File {output_filename} does not exist")

    # Define the URL and headers
    url = f"https://api.github.com/repos/{owner}/{repo}/issues"
    headers = {
        "Accept": "application/vnd.github+json",
        "Authorization": f"{token}",
        "X-GitHub-Api-Version": f"{github_api_version}",
        "User-Agent": "amyeroberts",
    }
    per_page = 100
    page = 1
    query_params = {
        "state": "all",
        "since": most_recent,
        "sort": "created",
        "direction": "asc",
        "page": page,
    }

    new_lines = []

    page_limit = (n_pages + page) if n_pages > 0 else np.inf
    while True:
        if page >= page_limit:
            break

        # Send the GET request
        response = requests.get(url, headers=headers, params=query_params)

        if not response.status_code == 200:
            raise ValueError(
                f"Request failed with status code {response.status_code} and message {response.text}"
            )

        json_response = response.json()
        logger.info(f"Page: {page}, number of issues: {len(json_response)}")

        # If we get an empty response, we've reached the end of the issues
        if len(json_response) == 0:
            break

        new_lines.extend(json_response)

        # If we get less than the number of issues per page, we've reached the end of the issues
        if len(json_response) < per_page:
            break

        page += 1
        query_params["page"] = page

    issue_lines_map = {issue["number"]: issue for issue in new_lines}

    updated_issues = []

    # Update any issues that already exist
    with open(input_filename, "r") as f:
        with open("tmp_" + output_filename, "a") as g:
            for line in f:
                issue = json.loads(line)
                number = issue["number"]
                if number in issue_lines_map:
                    g.write(json.dumps(issue_lines_map[number]))
                    g.write("\n")
                    updated_issues.append(number)
                else:
                    g.write(line)

    # Append any new issues
    new_issues = [issue for issue in new_lines if issue["number"] not in updated_issues]
    with open("tmp_" + output_filename, "a") as g:
        for issue in new_issues:
            g.write(json.dumps(issue))
            g.write("\n")

    # Overwrite the old file with the new file
    os.rename("tmp_" + output_filename, output_filename)

    # Save a record of the updated issues for the embedding update
    with open("updated_issues.json", "w") as f:
        json.dump(issue_lines_map, f, indent=4, sort_keys=True)

    return output_filename


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--input_filename", type=str, default=JSON_FILE)
    parser.add_argument("--output_filename", type=str, default=JSON_FILE)
    parser.add_argument("--github_api_version", type=str, default=GITHUB_API_VERSION)
    parser.add_argument("--owner", type=str, default=OWNER)
    parser.add_argument("--repo", type=str, default=REPO)
    parser.add_argument("--token", type=str, default=TOKEN)
    parser.add_argument("--n_pages", type=int, default=-1)
    args = parser.parse_args()
    update_issues(**vars(args))