""" Module which updates any of the issues to reflect changes in the issue state e.g. new comments The module can be run from the command line using the following arguments: --input_filename: The name of the input file containing the issues --output_filename: The name of the output file to save the updated issues --github_api_version: The version of the GitHub API to use --owner: The owner of the repo --repo: The name of the repo --token: The GitHub token to use --n_pages: The number of pages to fetch. Useful for testing """ import argparse import json import logging import os import numpy as np import requests from .defaults import TOKEN, OWNER, REPO, GITHUB_API_VERSION, ISSUE_JSON_FILE logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) JSON_FILE = "issues.json" def update_issues( input_filename=ISSUE_JSON_FILE, output_filename=ISSUE_JSON_FILE, github_api_version=GITHUB_API_VERSION, owner=OWNER, repo=REPO, token=TOKEN, n_pages=-1, ): """ Function to get the issues from the transformers repo and save them to a json file """ with open("issues_dict.json", "r") as f: issues = json.load(f) # Get most recent updated at information updated_at = [issue["updated_at"] for issue in issues.values()] most_recent = max(updated_at) # If file exists and we want to overwrite it, delete it if not os.path.exists(output_filename): raise ValueError(f"File {output_filename} does not exist") # Define the URL and headers url = f"https://api.github.com/repos/{owner}/{repo}/issues" headers = { "Accept": "application/vnd.github+json", "Authorization": f"{token}", "X-GitHub-Api-Version": f"{github_api_version}", "User-Agent": "amyeroberts", } per_page = 100 page = 1 query_params = { "state": "all", "since": most_recent, "sort": "created", "direction": "asc", "page": page, } new_lines = [] page_limit = (n_pages + page) if n_pages > 0 else np.inf while True: if page >= page_limit: break # Send the GET request response = requests.get(url, headers=headers, params=query_params) if not response.status_code == 200: raise ValueError( f"Request failed with status code {response.status_code} and message {response.text}" ) json_response = response.json() logger.info(f"Page: {page}, number of issues: {len(json_response)}") # If we get an empty response, we've reached the end of the issues if len(json_response) == 0: break new_lines.extend(json_response) # If we get less than the number of issues per page, we've reached the end of the issues if len(json_response) < per_page: break page += 1 query_params["page"] = page issue_lines_map = {issue["number"]: issue for issue in new_lines} updated_issues = [] # Update any issues that already exist with open(input_filename, "r") as f: with open("tmp_" + output_filename, "a") as g: for line in f: issue = json.loads(line) number = issue["number"] if number in issue_lines_map: g.write(json.dumps(issue_lines_map[number])) g.write("\n") updated_issues.append(number) else: g.write(line) # Append any new issues new_issues = [issue for issue in new_lines if issue["number"] not in updated_issues] with open("tmp_" + output_filename, "a") as g: for issue in new_issues: g.write(json.dumps(issue)) g.write("\n") # Overwrite the old file with the new file os.rename("tmp_" + output_filename, output_filename) # Save a record of the updated issues for the embedding update with open("updated_issues.json", "w") as f: json.dump(issue_lines_map, f, indent=4, sort_keys=True) return output_filename if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--input_filename", type=str, default=JSON_FILE) parser.add_argument("--output_filename", type=str, default=JSON_FILE) parser.add_argument("--github_api_version", type=str, default=GITHUB_API_VERSION) parser.add_argument("--owner", type=str, default=OWNER) parser.add_argument("--repo", type=str, default=REPO) parser.add_argument("--token", type=str, default=TOKEN) parser.add_argument("--n_pages", type=int, default=-1) args = parser.parse_args() update_issues(**vars(args))