cyyeh commited on
Commit
231b6fa
β€’
1 Parent(s): 2323639
.env.example ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ USER=
2
+ PERSONAL_ACCESS_TOKEN=
.gitignore CHANGED
@@ -1,2 +1,5 @@
1
  .DS_Store
2
- .coverage
 
 
 
 
1
  .DS_Store
2
+ .coverage
3
+ *.txt
4
+ *.html
5
+ .env
Pipfile CHANGED
@@ -6,6 +6,9 @@ name = "pypi"
6
  [packages]
7
  streamlit = "*"
8
  requests = "*"
 
 
 
9
 
10
  [dev-packages]
11
  black = "*"
 
6
  [packages]
7
  streamlit = "*"
8
  requests = "*"
9
+ networkx = "*"
10
+ pyvis = "*"
11
+ orjson = "*"
12
 
13
  [dev-packages]
14
  black = "*"
Pipfile.lock CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_meta": {
3
  "hash": {
4
- "sha256": "5f6e3997d5cf785edec4ab60f34ebef181b10ce1362ec421a4f64eb1562132bf"
5
  },
6
  "pipfile-spec": 6,
7
  "requires": {
@@ -375,6 +375,14 @@
375
  "markers": "python_version >= '3.7'",
376
  "version": "==3.1.2"
377
  },
 
 
 
 
 
 
 
 
378
  "jsonschema": {
379
  "hashes": [
380
  "sha256:1c92d2db1900b668201f1797887d66453ab1fbfea51df8e4b46236689c427baf",
@@ -508,6 +516,14 @@
508
  "markers": "python_version >= '3.5'",
509
  "version": "==1.5.5"
510
  },
 
 
 
 
 
 
 
 
511
  "notebook": {
512
  "hashes": [
513
  "sha256:709b1856a564fe53054796c80e17a67262071c86bfbdfa6b96aaa346113c555a",
@@ -544,6 +560,44 @@
544
  "markers": "python_version < '3.10' and platform_machine != 'aarch64' and platform_machine != 'arm64'",
545
  "version": "==1.22.4"
546
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547
  "packaging": {
548
  "hashes": [
549
  "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb",
@@ -878,6 +932,13 @@
878
  "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
879
  "version": "==0.1.0.post0"
880
  },
 
 
 
 
 
 
 
881
  "pyzmq": {
882
  "hashes": [
883
  "sha256:057176dd3f5ccf5aad4abd662d76b6a39bbf799baaf2f39cd4fdaf2eab326e43",
@@ -954,7 +1015,7 @@
954
  "sha256:4c586de507202505346f3e32d1363eb9ed6932f0c2f63184dea88983ff4971e2",
955
  "sha256:d2bbd99c320a2532ac71ff6a3164867884357da3e3301f0240090c5d2fdac7ec"
956
  ],
957
- "markers": "python_version < '4' and python_full_version >= '3.6.3'",
958
  "version": "==12.4.4"
959
  },
960
  "semver": {
@@ -1127,7 +1188,7 @@
1127
  "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14",
1128
  "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"
1129
  ],
1130
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
1131
  "version": "==1.26.9"
1132
  },
1133
  "validators": {
 
1
  {
2
  "_meta": {
3
  "hash": {
4
+ "sha256": "0006fdd10642d6c40ca9dc5aaaa8f3d586465b1f5c1a306b70eb71b3559d707f"
5
  },
6
  "pipfile-spec": 6,
7
  "requires": {
 
375
  "markers": "python_version >= '3.7'",
376
  "version": "==3.1.2"
377
  },
378
+ "jsonpickle": {
379
+ "hashes": [
380
+ "sha256:7b272918b0554182e53dc340ddd62d9b7f902fec7e7b05620c04f3ccef479a0e",
381
+ "sha256:de7f2613818aa4f234138ca11243d6359ff83ae528b2185efdd474f62bcf9ae1"
382
+ ],
383
+ "markers": "python_version >= '2.7'",
384
+ "version": "==2.2.0"
385
+ },
386
  "jsonschema": {
387
  "hashes": [
388
  "sha256:1c92d2db1900b668201f1797887d66453ab1fbfea51df8e4b46236689c427baf",
 
516
  "markers": "python_version >= '3.5'",
517
  "version": "==1.5.5"
518
  },
519
+ "networkx": {
520
+ "hashes": [
521
+ "sha256:51d6ae63c24dcd33901357688a2ad20d6bcd38f9a4c5307720048d3a8081059c",
522
+ "sha256:ae99c9b0d35e5b4a62cf1cfea01e5b3633d8d02f4a0ead69685b6e7de5b85eab"
523
+ ],
524
+ "index": "pypi",
525
+ "version": "==2.8.2"
526
+ },
527
  "notebook": {
528
  "hashes": [
529
  "sha256:709b1856a564fe53054796c80e17a67262071c86bfbdfa6b96aaa346113c555a",
 
560
  "markers": "python_version < '3.10' and platform_machine != 'aarch64' and platform_machine != 'arm64'",
561
  "version": "==1.22.4"
562
  },
563
+ "orjson": {
564
+ "hashes": [
565
+ "sha256:033dd5f91a8a967a007d3d05cbabec67040d6ff3e159ea17d3f681c3114a0d78",
566
+ "sha256:26c64da280c9e097081d12047f13b4adba776b19885da5e488c093d4a1461056",
567
+ "sha256:31367b5d8389373aff1742b66608c4bff318a9015d94981a8c1919e82fe983ca",
568
+ "sha256:32f384ff9dd555ee21508887b12316d8bd04921b396c876b4d4c87a30a3c8a13",
569
+ "sha256:349514d69ce089b0e39014345907318ddba8ceca32a187635601c391c36ecdd6",
570
+ "sha256:37b41c8869347388b1794a1c92c5e981ab764638f62e026252026a650b1b266c",
571
+ "sha256:3a1e2dfa7ba8adb7511f3560e968ff2a66e7d0cd2f454219a0ab778c3c2e1a5c",
572
+ "sha256:3ae89fd45bc9c72dcc0a489aa2411f139ee8a32468c387188be21d25f20f83d3",
573
+ "sha256:4175929ca77338e6a57ff232c0e80443411ac0b489bfff755988ae70e3f62a97",
574
+ "sha256:42376b0330cbbe5864b480de16a48f4c82aae95dca9cdcf81490e7ca87cc131a",
575
+ "sha256:43b9a44b42c67adbc02fc86efacf27a374b09971cd58e0cd9739b8a748d19be5",
576
+ "sha256:454a4d8c81882cbff19eaed90d0a4e42602970c686c8cd34071c8097e3dfdb5c",
577
+ "sha256:4f657a16f81b0497e5c67b3c151d9eb8c99d2d3a7ab996500e9ae453e8b0e0fe",
578
+ "sha256:5730e44fc20891cadea7d163a2dad723f95cf81199e1a02dac339a11437a999d",
579
+ "sha256:6165914b1a209458201bf0a99dcf5f44f58477ba23ac71d7b5e4ca197e174f30",
580
+ "sha256:696661c6b6e58361aba0b14a2c5977c049f481bc7fe41759a55e86b13b361905",
581
+ "sha256:6fdef8939f528dd9386c4941e88227a3ccf124c8278ebc7e98533294ae446ef1",
582
+ "sha256:71c285488c5f767e102a389f9efb11e93e6345247d60043efeffa616a3056945",
583
+ "sha256:7b03a3f32cd5fcdd8460de690579d1dfa1965bd333b89bd3d202907c7b49ada4",
584
+ "sha256:7e309db07b13d84bc5eb6ffdfd46f00b2301ce78871809e177f599db3f31fe24",
585
+ "sha256:8d877467096dc117500a5ed38238085a81518252db3991793a8468ca97445e6c",
586
+ "sha256:906a33c2fe834cb47daafd09d77405260caa5fa1354219bec5df9ac2b4e909fa",
587
+ "sha256:a23292d6093748eee3f7ed85dbe6c9abd24d4d399d7044bc323ff39834966d6c",
588
+ "sha256:aa0919735afbdeada9687347fa7963ca9732c60b1005832ac9d4853a9cb48be9",
589
+ "sha256:ad0faec8ee89cd50a486804b7d9a97016a1d2074298ceadbc75efc6691030b36",
590
+ "sha256:b71915140261916e50dbf62dec1b448c159a789e23bc89b3ad1e6516f677036f",
591
+ "sha256:c4d9d1edd7c92d0b35082ae3a230705206d6beb3c07a8c72ebdd980820598f9c",
592
+ "sha256:d4a380c164f8c40660ce8f96791695aa74d32a2a45d4038a2a4826d9ec2c61f6",
593
+ "sha256:f65b7d87534c567136d73f9bcdff46ad5ded2aabe8e89be7ba97395129d48e72",
594
+ "sha256:f6e691fcf0e03575bef7efd24b331282ad3c1df75855be368e4c3c2cfba6967f",
595
+ "sha256:f7b628f4dcfc0b726ede7d2024cf79107d54851451e15edeae2f4ee55554f3c6",
596
+ "sha256:fc7ecc7b38ebf6ac072efa209c5b8d02eb7af393a8a2c812fc01dcb037a86c48"
597
+ ],
598
+ "index": "pypi",
599
+ "version": "==3.7.1"
600
+ },
601
  "packaging": {
602
  "hashes": [
603
  "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb",
 
932
  "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
933
  "version": "==0.1.0.post0"
934
  },
935
+ "pyvis": {
936
+ "hashes": [
937
+ "sha256:29b94d0a10a01ac790d262318680369baa9e5eb8524c355758f1c3799f0c3965"
938
+ ],
939
+ "index": "pypi",
940
+ "version": "==0.2.1"
941
+ },
942
  "pyzmq": {
943
  "hashes": [
944
  "sha256:057176dd3f5ccf5aad4abd662d76b6a39bbf799baaf2f39cd4fdaf2eab326e43",
 
1015
  "sha256:4c586de507202505346f3e32d1363eb9ed6932f0c2f63184dea88983ff4971e2",
1016
  "sha256:d2bbd99c320a2532ac71ff6a3164867884357da3e3301f0240090c5d2fdac7ec"
1017
  ],
1018
+ "markers": "python_full_version >= '3.6.3' and python_full_version < '4.0.0'",
1019
  "version": "==12.4.4"
1020
  },
1021
  "semver": {
 
1188
  "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14",
1189
  "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"
1190
  ],
1191
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_full_version < '4.0.0'",
1192
  "version": "==1.26.9"
1193
  },
1194
  "validators": {
README.md CHANGED
@@ -5,16 +5,35 @@
5
  The main purpose of the app is to allow Python developers navigate Python code base much easier by showing dependencies
6
  among files included in the directory with better visualization.
7
 
 
 
 
 
 
 
 
 
8
  ## Thoughts on solving the problem
9
 
10
  1. Build a prototype without UI interface to show dependencies among files included in the directory(output) given user's
11
  input to one GitHub public repo's URL(input)
12
- 2. Build a streamlit app to show results using some network visualization tools
13
 
14
  ## Used technologies
15
 
16
- - Streamlit
 
 
 
 
17
 
 
 
 
 
 
 
 
18
  ## References
19
 
20
  - [How to set up a perfect Python project](https://sourcery.ai/blog/python-best-practices/)
 
5
  The main purpose of the app is to allow Python developers navigate Python code base much easier by showing dependencies
6
  among files included in the directory with better visualization.
7
 
8
+ ## Setup
9
+
10
+ - Python version: 3.8
11
+ - `pipenv install`
12
+ - `cp .env.example .env`: fill in your GitHub username and personal access token if you need to increase [GitHub API requests rate limiting](https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limiting)
13
+ - `make run`
14
+ - Also refer to References to understand settings behind the project
15
+
16
  ## Thoughts on solving the problem
17
 
18
  1. Build a prototype without UI interface to show dependencies among files included in the directory(output) given user's
19
  input to one GitHub public repo's URL(input)
20
+ 2. Build a `streamlit` app to show results using some network visualization tools
21
 
22
  ## Used technologies
23
 
24
+ - `networkx`: network analysis
25
+ - `pyvis`: network visualization
26
+ - `streamlit`: web app
27
+
28
+ ## TODOs
29
 
30
+ - [ ] Build a prototype
31
+ - [ ] Finish `generate_imports_graph` implementation
32
+ - [x] Fetch python files given public GitHub repo url(owner, repo, path, ref)
33
+ - [x] Use `ast` to parse imports among given python files
34
+ - [x] Generate a basic `networkx` graph given python imports
35
+ - [x] Visualize a basic `networkx` graph using `pyvis`
36
+ - [ ] Build a `streamlit` app
37
  ## References
38
 
39
  - [How to set up a perfect Python project](https://sourcery.ai/blog/python-best-practices/)
app.py CHANGED
@@ -1,6 +1,9 @@
1
- from py_code_analyzer.code_analyzer import CodeAnalyzer
2
- from py_code_analyzer.code_fetcher import get_repository_python_files
 
3
 
4
- python_files = get_repository_python_files("cyyeh", "gradio")
5
-
6
- CodeAnalyzer(python_files).analyze_imports().report()
 
 
 
1
+ from py_code_analyzer.code_fetcher import CodeFetcher
2
+ from py_code_analyzer.code_imports_analyzer import CodeImportsAnalyzer
3
+ from py_code_analyzer.imports_graph_visualizer import ImportsGraphVisualizer
4
 
5
+ python_files = CodeFetcher().get_python_files("cyyeh", "gradio", "gradio")
6
+ code_imports_analyzer = (
7
+ CodeImportsAnalyzer(python_files).analyze_imports().generate_imports_graph()
8
+ )
9
+ ImportsGraphVisualizer().visualize(code_imports_analyzer.imports_graph)
py_code_analyzer/code_fetcher.py CHANGED
@@ -1,19 +1,48 @@
1
- """This file deals with every detail of how to get all python files in the given directory
 
2
  """
 
 
3
  import requests
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- def get_repository_python_files(owner: str, repo: str, path: str = "", ref: str = ""):
7
- """https://docs.github.com/en/rest/repos/contents#get-repository-content"""
8
- api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
9
- if ref:
10
- api_url += f"?ref={ref}"
 
 
11
 
12
- python_files = []
13
- api_results = requests.get(api_url).json()
14
 
15
- for result in api_results:
16
- if result["type"] == "file" and result["name"].endswith(".py"):
17
- python_files.append(result)
 
 
 
 
 
 
 
 
18
 
19
- return python_files
 
1
+ """CodeFetcher deals with every detail of
2
+ how to get all python files in the given directory
3
  """
4
+ import os
5
+
6
  import requests
7
 
8
+ # to increase api rate limiting
9
+ # https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limiting
10
+ USER = os.environ.get("PERSONAL_ACCESS_TOKEN", "")
11
+ PERSONAL_ACCESS_TOKEN = os.environ.get("PERSONAL_ACCESS_TOKEN", "")
12
+
13
+
14
+ class CodeFetcher:
15
+ def get_python_files(
16
+ self,
17
+ owner: str,
18
+ repo: str,
19
+ path: str = "",
20
+ ref: str = "",
21
+ recursive: bool = True,
22
+ ):
23
+ """https://docs.github.com/en/rest/repos/contents#get-repository-content"""
24
 
25
+ api_url = (
26
+ f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
27
+ if not USER or not PERSONAL_ACCESS_TOKEN
28
+ else f"https://{USER}:{PERSONAL_ACCESS_TOKEN}@api.github.com/repos/{owner}/{repo}/contents/{path}"
29
+ )
30
+ if ref:
31
+ api_url += f"?ref={ref}"
32
 
33
+ python_files = []
34
+ api_results = requests.get(api_url).json()
35
 
36
+ for result in api_results:
37
+ if result["type"] == "file" and result["name"].endswith(".py"):
38
+ python_files.append(result)
39
+ elif (
40
+ recursive
41
+ and result["type"] == "dir"
42
+ and not result["name"].startswith(".")
43
+ ):
44
+ python_files += self.get_python_files(
45
+ owner, repo, path=result["path"], recursive=recursive
46
+ )
47
 
48
+ return python_files
py_code_analyzer/{code_analyzer.py β†’ code_imports_analyzer.py} RENAMED
@@ -1,11 +1,15 @@
 
 
 
1
  import ast
2
  from pprint import pprint
3
 
 
4
  import requests
5
 
6
 
7
- class CodeAnalyzer:
8
- class NodeVisitor(ast.NodeVisitor):
9
  def __init__(self, imports):
10
  self.imports = imports
11
 
@@ -25,16 +29,35 @@ class CodeAnalyzer:
25
 
26
  def __init__(self, python_files):
27
  self.imports = []
 
28
  self.python_files = python_files
29
- self._node_visitor = CodeAnalyzer.NodeVisitor(self.imports)
30
 
31
  def analyze_imports(self):
32
  for python_file in self.python_files:
33
  program = requests.get(python_file["download_url"]).text
34
  tree = ast.parse(program)
35
- self.imports += [{"file_name": python_file["name"], "imports": []}]
 
 
 
 
 
 
36
  self._node_visitor.visit(tree)
37
  return self
38
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  def report(self):
40
  pprint(self.imports)
 
1
+ """CodeImportsAnalyzer uses the ast module from Python's standard library
2
+ to get what modules are imported in given python files, then uses networkx to generate imports graph
3
+ """
4
  import ast
5
  from pprint import pprint
6
 
7
+ import networkx as nx
8
  import requests
9
 
10
 
11
+ class CodeImportsAnalyzer:
12
+ class _NodeVisitor(ast.NodeVisitor):
13
  def __init__(self, imports):
14
  self.imports = imports
15
 
 
29
 
30
  def __init__(self, python_files):
31
  self.imports = []
32
+ self.imports_graph = nx.DiGraph() # imports_graph is a directed graph
33
  self.python_files = python_files
34
+ self._node_visitor = CodeImportsAnalyzer._NodeVisitor(self.imports)
35
 
36
  def analyze_imports(self):
37
  for python_file in self.python_files:
38
  program = requests.get(python_file["download_url"]).text
39
  tree = ast.parse(program)
40
+ self.imports += [
41
+ {
42
+ "file_name": python_file["name"],
43
+ "file_path": python_file["path"],
44
+ "imports": [],
45
+ }
46
+ ]
47
  self._node_visitor.visit(tree)
48
  return self
49
 
50
+ def generate_imports_graph(self):
51
+ for _import in self.imports:
52
+ _nodes = _import["file_path"].split("/")
53
+ if len(_nodes):
54
+ if len(_nodes) > 1:
55
+ for first_node, second_node in zip(_nodes, _nodes[1:]):
56
+ self.imports_graph.add_edge(first_node, second_node)
57
+ else:
58
+ self.imports_graph.add_node(_nodes[0])
59
+
60
+ return self
61
+
62
  def report(self):
63
  pprint(self.imports)
py_code_analyzer/imports_graph_visualizer.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import networkx as nx
2
+ from pyvis.network import Network
3
+
4
+
5
+ class ImportsGraphVisualizer:
6
+ @classmethod
7
+ def visualize(
8
+ cls,
9
+ imports_graph: nx.Graph,
10
+ width: int = 100,
11
+ height: int = 100,
12
+ display_html_name: str = "nx.html",
13
+ ):
14
+ _pyvis_network = Network(f"{width}%", f"{height}%")
15
+ _pyvis_network.from_nx(imports_graph)
16
+ _pyvis_network.show(display_html_name)