efeno commited on
Commit
8f0c52a
·
1 Parent(s): b557058

conf files

Browse files
Files changed (5) hide show
  1. Dockerfile +14 -0
  2. api/external_services.py +111 -0
  3. api/main.py +59 -0
  4. requirements.txt +5 -0
  5. resources.yaml +2 -0
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ COPY . .
13
+
14
+ CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "7860"]
api/external_services.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import os
3
+ from fastapi import HTTPException
4
+ from dotenv import load_dotenv
5
+ from llama_index import download_loader
6
+ from llama_hub.github_repo import GithubRepositoryReader, GithubClient
7
+ from llama_index import VectorStoreIndex
8
+ from llama_index.vector_stores import DeepLakeVectorStore
9
+ from llama_index.storage.storage_context import StorageContext
10
+ import yaml
11
+
12
+ load_dotenv()
13
+
14
+ # Fetch and set API keys
15
+ openai_api_key = os.getenv("OPENAI_API_KEY")
16
+
17
+
18
+ # Check for OpenAI API key
19
+ if not openai_api_key:
20
+ raise EnvironmentError("OpenAI API key not found in environment variables")
21
+
22
+
23
+ def get_validate_token(token_name):
24
+ token = os.getenv(token_name)
25
+ if not token:
26
+ raise EnvironmentError(f"{token_name} not found in environment variables")
27
+ return token
28
+
29
+
30
+ class InitiazlizeGithubService:
31
+ def __init__(self):
32
+ self.owner = None
33
+ self.repo = None
34
+ self.github_token = get_validate_token("GITHUB_TOKEN") # Check for GitHub Token
35
+ self.github_client = self.initialize_github_client(self.github_token)
36
+ download_loader("GithubRepositoryReader")
37
+
38
+ def initialize_github_client(self, github_token):
39
+ return GithubClient(github_token)
40
+
41
+ def parse_github_url(self, url):
42
+ pattern = r"https://github\.com/([^/]+)/([^/]+)"
43
+ match = re.match(pattern, url)
44
+ return match.groups() if match else (None, None)
45
+
46
+ def validate_owner_repo(self, owner, repo):
47
+ if bool(owner) and bool(repo):
48
+ self.owner = owner
49
+ self.repo = repo
50
+ return True
51
+
52
+ return False
53
+
54
+ def load_repo_data(self, owner, repo):
55
+ if self.validate_owner_repo(owner, repo):
56
+ loader = GithubRepositoryReader(
57
+ self.github_client,
58
+ owner=self.owner,
59
+ repo=self.repo,
60
+ filter_file_extensions=(
61
+ [".py", ".js", ".ts", ".md"],
62
+ GithubRepositoryReader.FilterType.INCLUDE,
63
+ ),
64
+ verbose=False,
65
+ concurrent_requests=5,
66
+ )
67
+
68
+ print(f"Loading {self.repo} repository by {self.owner}")
69
+
70
+ docs = loader.load_data(branch="main")
71
+ print("Documents uploaded:")
72
+ for doc in docs:
73
+ print(doc.metadata)
74
+
75
+ return docs
76
+
77
+ else:
78
+ raise HTTPException(
79
+ status_code=400,
80
+ detail="Invalid GitHub URL. Please enter a valid GitHub URL",
81
+ )
82
+
83
+
84
+ class InitiazlizeActiveloopService:
85
+ def __init__(self):
86
+ self.active_loop_token = get_validate_token(
87
+ "ACTIVELOOP_TOKEN"
88
+ ) # Check for Activeloop Token
89
+ self.dataset_path = self.get_user_info("dataset_path")
90
+ self.vector_store = DeepLakeVectorStore(
91
+ dataset_path=f"hub://{self.dataset_path}",
92
+ overwrite=True,
93
+ runtime={"tensor_db": True},
94
+ )
95
+
96
+ self.storage_context = StorageContext.from_defaults(
97
+ vector_store=self.vector_store
98
+ )
99
+
100
+ def upload_to_activeloop(self, docs):
101
+ self.index = VectorStoreIndex.from_documents(
102
+ docs, storage_context=self.storage_context
103
+ )
104
+ self.query_engine = self.index.as_query_engine()
105
+
106
+ def get_user_info(self, user_info):
107
+ with open("resources.yaml", "r") as file:
108
+ yaml_data = yaml.safe_load(file)
109
+
110
+ retrieved_info = yaml_data["info"][user_info]
111
+ return retrieved_info
api/main.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import textwrap
2
+ from fastapi import FastAPI
3
+ from pydantic import BaseModel
4
+ from dotenv import load_dotenv
5
+
6
+ from external_services import InitiazlizeGithubService, InitiazlizeActiveloopService
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+
11
+
12
+ app = FastAPI()
13
+
14
+
15
+ class GitHubRepoRequest(BaseModel):
16
+ githubRepoUrl: str
17
+
18
+
19
+ class UserCodeRequest(BaseModel):
20
+ userCode: str
21
+
22
+
23
+ @app.post("/upload")
24
+ async def scrape_and_upload_to_activeloop(repo_request: GitHubRepoRequest):
25
+ # Add logic to scrape and upload to ActiveLoop
26
+ # Example: Scrape GitHub repo and upload to ActiveLoop
27
+ # Implement your scraping and upload logic here
28
+ github_service = InitiazlizeGithubService()
29
+ activeloop_service = InitiazlizeActiveloopService()
30
+
31
+ print(f"repo from user: {repo_request.githubRepoUrl}")
32
+
33
+ owner, repo = github_service.parse_github_url(repo_request.githubRepoUrl)
34
+ docs = github_service.load_repo_data(owner, repo)
35
+ activeloop_service.upload_to_activeloop(docs)
36
+
37
+ return {"status": "success", "message": "Repo processed successfully"}
38
+
39
+
40
+ @app.post("/retrieve")
41
+ async def find_similar_code_and_explain(code_request: UserCodeRequest):
42
+ # Add logic to find similar code and provide explanations or improvements
43
+ # Example: Search in ActiveLoop DB
44
+ # Implement your search and analysis logic here
45
+ activeloop_service = InitiazlizeActiveloopService()
46
+
47
+ print(f"code from user: {code_request.userCode}")
48
+
49
+ # intro_question = "What is the repository about?"
50
+ intro_question = code_request.userCode
51
+ print(f"Test question: {intro_question}")
52
+ print("=" * 50)
53
+
54
+ answer = activeloop_service.query_engine.query(intro_question)
55
+ print(f"Answer: {textwrap.fill(str(answer), 100)} \n")
56
+
57
+ return {
58
+ "answer": answer,
59
+ }
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ llama-index==0.9.38
2
+ deeplake==3.8.17
3
+ openai==1.10.0
4
+ python-dotenv==1.0.1
5
+ llama-hub==0.0.76
resources.yaml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ info:
2
+ dataset_path: manufe_test/code_retriever