efeno commited on
Commit
c560014
·
1 Parent(s): 37ec043

reduce upload time

Browse files
Files changed (2) hide show
  1. api/external_services.py +5 -3
  2. api/main.py +4 -2
api/external_services.py CHANGED
@@ -51,21 +51,23 @@ class InitiazlizeGithubService:
51
 
52
  return False
53
 
54
- def load_repo_data(self, owner, repo):
55
  if self.validate_owner_repo(owner, repo):
56
  loader = GithubRepositoryReader(
57
  self.github_client,
58
  owner=self.owner,
59
  repo=self.repo,
60
  filter_file_extensions=(
61
- [".py", ".js", ".ts", ".md", "ipynb"],
62
  GithubRepositoryReader.FilterType.INCLUDE,
63
  ),
64
  verbose=False,
65
  concurrent_requests=25,
66
  )
67
 
68
- print(f"Loading {self.repo} repository by {self.owner}")
 
 
69
 
70
  docs = loader.load_data(branch="main")
71
  print("Documents uploaded:")
 
51
 
52
  return False
53
 
54
+ def load_repo_data(self, owner, repo, file_type):
55
  if self.validate_owner_repo(owner, repo):
56
  loader = GithubRepositoryReader(
57
  self.github_client,
58
  owner=self.owner,
59
  repo=self.repo,
60
  filter_file_extensions=(
61
+ [file_type],
62
  GithubRepositoryReader.FilterType.INCLUDE,
63
  ),
64
  verbose=False,
65
  concurrent_requests=25,
66
  )
67
 
68
+ print(
69
+ f"Loading {self.repo} repository by {self.owner}, file type: {file_type}"
70
+ )
71
 
72
  docs = loader.load_data(branch="main")
73
  print("Documents uploaded:")
api/main.py CHANGED
@@ -31,8 +31,10 @@ async def scrape_and_upload_to_activeloop(repo_request: GitHubRepoRequest):
31
  print(f"repo from user: {repo_request.githubRepoUrl}")
32
 
33
  owner, repo = github_service.parse_github_url(repo_request.githubRepoUrl)
34
- docs = github_service.load_repo_data(owner, repo)
35
- activeloop_service.upload_to_activeloop(docs)
 
 
36
 
37
  return {"status": "success", "message": "Repo processed successfully"}
38
 
 
31
  print(f"repo from user: {repo_request.githubRepoUrl}")
32
 
33
  owner, repo = github_service.parse_github_url(repo_request.githubRepoUrl)
34
+ file_types = [".py", ".js", ".ts", ".md", "ipynb"]
35
+ for file_type in file_types:
36
+ docs = github_service.load_repo_data(owner, repo, file_type)
37
+ activeloop_service.upload_to_activeloop(docs)
38
 
39
  return {"status": "success", "message": "Repo processed successfully"}
40