Spaces:
Runtime error
Runtime error
conf files
Browse files- Dockerfile +14 -0
- api/external_services.py +111 -0
- api/main.py +59 -0
- requirements.txt +5 -0
- resources.yaml +2 -0
Dockerfile
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.9
|
5 |
+
|
6 |
+
WORKDIR /code
|
7 |
+
|
8 |
+
COPY ./requirements.txt /code/requirements.txt
|
9 |
+
|
10 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
11 |
+
|
12 |
+
COPY . .
|
13 |
+
|
14 |
+
CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
api/external_services.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import os
|
3 |
+
from fastapi import HTTPException
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
from llama_index import download_loader
|
6 |
+
from llama_hub.github_repo import GithubRepositoryReader, GithubClient
|
7 |
+
from llama_index import VectorStoreIndex
|
8 |
+
from llama_index.vector_stores import DeepLakeVectorStore
|
9 |
+
from llama_index.storage.storage_context import StorageContext
|
10 |
+
import yaml
|
11 |
+
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
# Fetch and set API keys
|
15 |
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
16 |
+
|
17 |
+
|
18 |
+
# Check for OpenAI API key
|
19 |
+
if not openai_api_key:
|
20 |
+
raise EnvironmentError("OpenAI API key not found in environment variables")
|
21 |
+
|
22 |
+
|
23 |
+
def get_validate_token(token_name):
|
24 |
+
token = os.getenv(token_name)
|
25 |
+
if not token:
|
26 |
+
raise EnvironmentError(f"{token_name} not found in environment variables")
|
27 |
+
return token
|
28 |
+
|
29 |
+
|
30 |
+
class InitiazlizeGithubService:
|
31 |
+
def __init__(self):
|
32 |
+
self.owner = None
|
33 |
+
self.repo = None
|
34 |
+
self.github_token = get_validate_token("GITHUB_TOKEN") # Check for GitHub Token
|
35 |
+
self.github_client = self.initialize_github_client(self.github_token)
|
36 |
+
download_loader("GithubRepositoryReader")
|
37 |
+
|
38 |
+
def initialize_github_client(self, github_token):
|
39 |
+
return GithubClient(github_token)
|
40 |
+
|
41 |
+
def parse_github_url(self, url):
|
42 |
+
pattern = r"https://github\.com/([^/]+)/([^/]+)"
|
43 |
+
match = re.match(pattern, url)
|
44 |
+
return match.groups() if match else (None, None)
|
45 |
+
|
46 |
+
def validate_owner_repo(self, owner, repo):
|
47 |
+
if bool(owner) and bool(repo):
|
48 |
+
self.owner = owner
|
49 |
+
self.repo = repo
|
50 |
+
return True
|
51 |
+
|
52 |
+
return False
|
53 |
+
|
54 |
+
def load_repo_data(self, owner, repo):
|
55 |
+
if self.validate_owner_repo(owner, repo):
|
56 |
+
loader = GithubRepositoryReader(
|
57 |
+
self.github_client,
|
58 |
+
owner=self.owner,
|
59 |
+
repo=self.repo,
|
60 |
+
filter_file_extensions=(
|
61 |
+
[".py", ".js", ".ts", ".md"],
|
62 |
+
GithubRepositoryReader.FilterType.INCLUDE,
|
63 |
+
),
|
64 |
+
verbose=False,
|
65 |
+
concurrent_requests=5,
|
66 |
+
)
|
67 |
+
|
68 |
+
print(f"Loading {self.repo} repository by {self.owner}")
|
69 |
+
|
70 |
+
docs = loader.load_data(branch="main")
|
71 |
+
print("Documents uploaded:")
|
72 |
+
for doc in docs:
|
73 |
+
print(doc.metadata)
|
74 |
+
|
75 |
+
return docs
|
76 |
+
|
77 |
+
else:
|
78 |
+
raise HTTPException(
|
79 |
+
status_code=400,
|
80 |
+
detail="Invalid GitHub URL. Please enter a valid GitHub URL",
|
81 |
+
)
|
82 |
+
|
83 |
+
|
84 |
+
class InitiazlizeActiveloopService:
|
85 |
+
def __init__(self):
|
86 |
+
self.active_loop_token = get_validate_token(
|
87 |
+
"ACTIVELOOP_TOKEN"
|
88 |
+
) # Check for Activeloop Token
|
89 |
+
self.dataset_path = self.get_user_info("dataset_path")
|
90 |
+
self.vector_store = DeepLakeVectorStore(
|
91 |
+
dataset_path=f"hub://{self.dataset_path}",
|
92 |
+
overwrite=True,
|
93 |
+
runtime={"tensor_db": True},
|
94 |
+
)
|
95 |
+
|
96 |
+
self.storage_context = StorageContext.from_defaults(
|
97 |
+
vector_store=self.vector_store
|
98 |
+
)
|
99 |
+
|
100 |
+
def upload_to_activeloop(self, docs):
|
101 |
+
self.index = VectorStoreIndex.from_documents(
|
102 |
+
docs, storage_context=self.storage_context
|
103 |
+
)
|
104 |
+
self.query_engine = self.index.as_query_engine()
|
105 |
+
|
106 |
+
def get_user_info(self, user_info):
|
107 |
+
with open("resources.yaml", "r") as file:
|
108 |
+
yaml_data = yaml.safe_load(file)
|
109 |
+
|
110 |
+
retrieved_info = yaml_data["info"][user_info]
|
111 |
+
return retrieved_info
|
api/main.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import textwrap
|
2 |
+
from fastapi import FastAPI
|
3 |
+
from pydantic import BaseModel
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
|
6 |
+
from external_services import InitiazlizeGithubService, InitiazlizeActiveloopService
|
7 |
+
|
8 |
+
# Load environment variables
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
|
12 |
+
app = FastAPI()
|
13 |
+
|
14 |
+
|
15 |
+
class GitHubRepoRequest(BaseModel):
|
16 |
+
githubRepoUrl: str
|
17 |
+
|
18 |
+
|
19 |
+
class UserCodeRequest(BaseModel):
|
20 |
+
userCode: str
|
21 |
+
|
22 |
+
|
23 |
+
@app.post("/upload")
|
24 |
+
async def scrape_and_upload_to_activeloop(repo_request: GitHubRepoRequest):
|
25 |
+
# Add logic to scrape and upload to ActiveLoop
|
26 |
+
# Example: Scrape GitHub repo and upload to ActiveLoop
|
27 |
+
# Implement your scraping and upload logic here
|
28 |
+
github_service = InitiazlizeGithubService()
|
29 |
+
activeloop_service = InitiazlizeActiveloopService()
|
30 |
+
|
31 |
+
print(f"repo from user: {repo_request.githubRepoUrl}")
|
32 |
+
|
33 |
+
owner, repo = github_service.parse_github_url(repo_request.githubRepoUrl)
|
34 |
+
docs = github_service.load_repo_data(owner, repo)
|
35 |
+
activeloop_service.upload_to_activeloop(docs)
|
36 |
+
|
37 |
+
return {"status": "success", "message": "Repo processed successfully"}
|
38 |
+
|
39 |
+
|
40 |
+
@app.post("/retrieve")
|
41 |
+
async def find_similar_code_and_explain(code_request: UserCodeRequest):
|
42 |
+
# Add logic to find similar code and provide explanations or improvements
|
43 |
+
# Example: Search in ActiveLoop DB
|
44 |
+
# Implement your search and analysis logic here
|
45 |
+
activeloop_service = InitiazlizeActiveloopService()
|
46 |
+
|
47 |
+
print(f"code from user: {code_request.userCode}")
|
48 |
+
|
49 |
+
# intro_question = "What is the repository about?"
|
50 |
+
intro_question = code_request.userCode
|
51 |
+
print(f"Test question: {intro_question}")
|
52 |
+
print("=" * 50)
|
53 |
+
|
54 |
+
answer = activeloop_service.query_engine.query(intro_question)
|
55 |
+
print(f"Answer: {textwrap.fill(str(answer), 100)} \n")
|
56 |
+
|
57 |
+
return {
|
58 |
+
"answer": answer,
|
59 |
+
}
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
llama-index==0.9.38
|
2 |
+
deeplake==3.8.17
|
3 |
+
openai==1.10.0
|
4 |
+
python-dotenv==1.0.1
|
5 |
+
llama-hub==0.0.76
|
resources.yaml
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
info:
|
2 |
+
dataset_path: manufe_test/code_retriever
|