sebastianalgharaballi
commited on
Upload main.py
Browse files
main.py
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# main.py
|
2 |
+
|
3 |
+
from fastapi import FastAPI, HTTPException
|
4 |
+
from pydantic import BaseModel
|
5 |
+
from typing import List, Optional, Dict, Any
|
6 |
+
from two_phase_search import TwoPhaseSearchSystem
|
7 |
+
from chroma_storage import ChromaMatchingSystem
|
8 |
+
from opensearch_client import OpenSearchClient
|
9 |
+
from embeddings import JobPosting, Skill, EmbeddingManager, IndependentJobSeekerAssessmentRDS, JobseekerInfoRDS
|
10 |
+
from encoder import create_encoders
|
11 |
+
import numpy as np
|
12 |
+
from datetime import datetime
|
13 |
+
|
14 |
+
app = FastAPI()
|
15 |
+
|
16 |
+
# Initialize systems
|
17 |
+
chroma_matcher = ChromaMatchingSystem(collection_name="job_seekers")
|
18 |
+
opensearch_client = OpenSearchClient()
|
19 |
+
BASE_URL = "https://dev-abhinav.ngrok.io"
|
20 |
+
search_system = TwoPhaseSearchSystem(chroma_matcher, opensearch_client, BASE_URL)
|
21 |
+
|
22 |
+
# Initialize encoders and embedding manager
|
23 |
+
job_encoder, seeker_encoder = create_encoders('all-mpnet-base-v2')
|
24 |
+
embedding_manager = EmbeddingManager(job_encoder, seeker_encoder)
|
25 |
+
|
26 |
+
# New schemas
|
27 |
+
class JobPostingEmbeddingRequest(BaseModel):
|
28 |
+
title: str
|
29 |
+
role_description: str
|
30 |
+
company_description: str
|
31 |
+
primary_skills: List[str]
|
32 |
+
secondary_skills: List[str]
|
33 |
+
|
34 |
+
class JobPostingEmbeddingResponse(BaseModel):
|
35 |
+
title_embedding: List[float]
|
36 |
+
role_description_embedding: List[float]
|
37 |
+
company_description_embedding: List[float]
|
38 |
+
primary_skills_embedding: List[float]
|
39 |
+
secondary_skills_embedding: List[float]
|
40 |
+
|
41 |
+
class Title(BaseModel):
|
42 |
+
name: str
|
43 |
+
|
44 |
+
class ExperienceBaseWithoutCompanyCore(BaseModel):
|
45 |
+
location_names: List[str] = []
|
46 |
+
title: Title
|
47 |
+
is_primary: bool
|
48 |
+
experience_summaries: List[str] = []
|
49 |
+
|
50 |
+
class EmbeddingExperience(ExperienceBaseWithoutCompanyCore):
|
51 |
+
company_name: str
|
52 |
+
|
53 |
+
class EducationCore(BaseModel):
|
54 |
+
institution: Optional[str] = None
|
55 |
+
degree: Optional[str] = None
|
56 |
+
field_of_study: Optional[str] = None
|
57 |
+
|
58 |
+
class CertificationCore(BaseModel):
|
59 |
+
organization: Optional[str] = None
|
60 |
+
name: str
|
61 |
+
# start_date and end_date not required, map to '' if not provided
|
62 |
+
|
63 |
+
class JobseekerEmbeddingRequest(BaseModel):
|
64 |
+
summary: str
|
65 |
+
skills: List[str]
|
66 |
+
educations: List[EducationCore]
|
67 |
+
certifications: List[CertificationCore]
|
68 |
+
experiences: List[EmbeddingExperience]
|
69 |
+
|
70 |
+
class JobseekerEmbeddingResponse(BaseModel):
|
71 |
+
summary_embedding: List[float]
|
72 |
+
skills_embedding: List[float]
|
73 |
+
educations_embedding: List[float]
|
74 |
+
certifications_embedding: List[float]
|
75 |
+
experiences_embedding: List[float]
|
76 |
+
|
77 |
+
@app.post("/embed/jobposting", response_model=JobPostingEmbeddingResponse)
|
78 |
+
async def create_jobposting_embedding(req: JobPostingEmbeddingRequest):
|
79 |
+
try:
|
80 |
+
job = JobPosting(
|
81 |
+
title=req.title,
|
82 |
+
role_description=req.role_description,
|
83 |
+
company_description=req.company_description,
|
84 |
+
primary_skills=[Skill(skill_name=s) for s in req.primary_skills],
|
85 |
+
secondary_skills=[Skill(skill_name=s) for s in req.secondary_skills]
|
86 |
+
)
|
87 |
+
|
88 |
+
embeddings = embedding_manager.embed_jobposting(job)
|
89 |
+
|
90 |
+
return {
|
91 |
+
"title_embedding": embeddings['title'].tolist(),
|
92 |
+
"role_description_embedding": embeddings['role_description'].tolist(),
|
93 |
+
"company_description_embedding": embeddings['company_description'].tolist(),
|
94 |
+
"primary_skills_embedding": embeddings['primary_skills'].tolist(),
|
95 |
+
"secondary_skills_embedding": embeddings['secondary_skills'].tolist()
|
96 |
+
}
|
97 |
+
except Exception as e:
|
98 |
+
raise HTTPException(status_code=500, detail=str(e))
|
99 |
+
|
100 |
+
@app.post("/embed/jobseeker", response_model=JobseekerEmbeddingResponse)
|
101 |
+
async def create_jobseeker_embedding(req: JobseekerEmbeddingRequest):
|
102 |
+
try:
|
103 |
+
# Map all skills into primary_skills, leave secondary_skills empty
|
104 |
+
processed_obj = IndependentJobSeekerAssessmentRDS(
|
105 |
+
primary_skills=req.skills,
|
106 |
+
secondary_skills=[],
|
107 |
+
experiences=[
|
108 |
+
{
|
109 |
+
"experience_summaries": exp.experience_summaries,
|
110 |
+
"title": exp.title.name,
|
111 |
+
"is_primary": exp.is_primary,
|
112 |
+
"location_names": exp.location_names,
|
113 |
+
# Map company_name to company dict
|
114 |
+
"company": {"name": exp.company_name} if exp.company_name else None,
|
115 |
+
"start_date": "", # Not used directly, but can be empty
|
116 |
+
"end_date": "" # Not used directly, but can be empty
|
117 |
+
} for exp in req.experiences
|
118 |
+
],
|
119 |
+
educations=[
|
120 |
+
{
|
121 |
+
"degree": edu.degree if edu.degree else "",
|
122 |
+
"field": edu.field_of_study if edu.field_of_study else "",
|
123 |
+
"institution": edu.institution if edu.institution else ""
|
124 |
+
} for edu in req.educations
|
125 |
+
],
|
126 |
+
certifications=[
|
127 |
+
{
|
128 |
+
"name": cert.name,
|
129 |
+
"organization": cert.organization if cert.organization else "",
|
130 |
+
"start_date": "", # empty as per instructions
|
131 |
+
"end_date": ""
|
132 |
+
} for cert in req.certifications
|
133 |
+
]
|
134 |
+
)
|
135 |
+
|
136 |
+
unprocessed_obj = JobseekerInfoRDS(
|
137 |
+
summary=req.summary
|
138 |
+
)
|
139 |
+
|
140 |
+
embeddings = embedding_manager.embed_jobseeker(processed_obj, unprocessed_obj)
|
141 |
+
|
142 |
+
# embeddings keys: summary, experience, primary_skills, secondary_skills, certifications, education
|
143 |
+
# We need: summary_embedding, skills_embedding, educations_embedding, certifications_embedding, experiences_embedding
|
144 |
+
# We put all skills into primary_skills. Ignore secondary_skills.
|
145 |
+
# experiences map to 'experience'
|
146 |
+
# educations map to 'education'
|
147 |
+
|
148 |
+
return {
|
149 |
+
"summary_embedding": embeddings['summary'].tolist(),
|
150 |
+
"skills_embedding": embeddings['primary_skills'].tolist(),
|
151 |
+
"educations_embedding": embeddings['education'].tolist(),
|
152 |
+
"certifications_embedding": embeddings['certifications'].tolist(),
|
153 |
+
"experiences_embedding": embeddings['experience'].tolist()
|
154 |
+
}
|
155 |
+
except Exception as e:
|
156 |
+
raise HTTPException(status_code=500, detail=str(e))
|
157 |
+
|
158 |
+
if __name__ == "__main__":
|
159 |
+
import uvicorn
|
160 |
+
uvicorn.run(app, host="0.0.0.0", port=8001)
|