sebastianalgharaballi commited on
Commit
550d2ef
·
verified ·
1 Parent(s): 29e2c27

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +160 -0
main.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py
2
+
3
+ from fastapi import FastAPI, HTTPException
4
+ from pydantic import BaseModel
5
+ from typing import List, Optional, Dict, Any
6
+ from two_phase_search import TwoPhaseSearchSystem
7
+ from chroma_storage import ChromaMatchingSystem
8
+ from opensearch_client import OpenSearchClient
9
+ from embeddings import JobPosting, Skill, EmbeddingManager, IndependentJobSeekerAssessmentRDS, JobseekerInfoRDS
10
+ from encoder import create_encoders
11
+ import numpy as np
12
+ from datetime import datetime
13
+
14
+ app = FastAPI()
15
+
16
+ # Initialize systems
17
+ chroma_matcher = ChromaMatchingSystem(collection_name="job_seekers")
18
+ opensearch_client = OpenSearchClient()
19
+ BASE_URL = "https://dev-abhinav.ngrok.io"
20
+ search_system = TwoPhaseSearchSystem(chroma_matcher, opensearch_client, BASE_URL)
21
+
22
+ # Initialize encoders and embedding manager
23
+ job_encoder, seeker_encoder = create_encoders('all-mpnet-base-v2')
24
+ embedding_manager = EmbeddingManager(job_encoder, seeker_encoder)
25
+
26
+ # New schemas
27
+ class JobPostingEmbeddingRequest(BaseModel):
28
+ title: str
29
+ role_description: str
30
+ company_description: str
31
+ primary_skills: List[str]
32
+ secondary_skills: List[str]
33
+
34
+ class JobPostingEmbeddingResponse(BaseModel):
35
+ title_embedding: List[float]
36
+ role_description_embedding: List[float]
37
+ company_description_embedding: List[float]
38
+ primary_skills_embedding: List[float]
39
+ secondary_skills_embedding: List[float]
40
+
41
+ class Title(BaseModel):
42
+ name: str
43
+
44
+ class ExperienceBaseWithoutCompanyCore(BaseModel):
45
+ location_names: List[str] = []
46
+ title: Title
47
+ is_primary: bool
48
+ experience_summaries: List[str] = []
49
+
50
+ class EmbeddingExperience(ExperienceBaseWithoutCompanyCore):
51
+ company_name: str
52
+
53
+ class EducationCore(BaseModel):
54
+ institution: Optional[str] = None
55
+ degree: Optional[str] = None
56
+ field_of_study: Optional[str] = None
57
+
58
+ class CertificationCore(BaseModel):
59
+ organization: Optional[str] = None
60
+ name: str
61
+ # start_date and end_date not required, map to '' if not provided
62
+
63
+ class JobseekerEmbeddingRequest(BaseModel):
64
+ summary: str
65
+ skills: List[str]
66
+ educations: List[EducationCore]
67
+ certifications: List[CertificationCore]
68
+ experiences: List[EmbeddingExperience]
69
+
70
+ class JobseekerEmbeddingResponse(BaseModel):
71
+ summary_embedding: List[float]
72
+ skills_embedding: List[float]
73
+ educations_embedding: List[float]
74
+ certifications_embedding: List[float]
75
+ experiences_embedding: List[float]
76
+
77
+ @app.post("/embed/jobposting", response_model=JobPostingEmbeddingResponse)
78
+ async def create_jobposting_embedding(req: JobPostingEmbeddingRequest):
79
+ try:
80
+ job = JobPosting(
81
+ title=req.title,
82
+ role_description=req.role_description,
83
+ company_description=req.company_description,
84
+ primary_skills=[Skill(skill_name=s) for s in req.primary_skills],
85
+ secondary_skills=[Skill(skill_name=s) for s in req.secondary_skills]
86
+ )
87
+
88
+ embeddings = embedding_manager.embed_jobposting(job)
89
+
90
+ return {
91
+ "title_embedding": embeddings['title'].tolist(),
92
+ "role_description_embedding": embeddings['role_description'].tolist(),
93
+ "company_description_embedding": embeddings['company_description'].tolist(),
94
+ "primary_skills_embedding": embeddings['primary_skills'].tolist(),
95
+ "secondary_skills_embedding": embeddings['secondary_skills'].tolist()
96
+ }
97
+ except Exception as e:
98
+ raise HTTPException(status_code=500, detail=str(e))
99
+
100
+ @app.post("/embed/jobseeker", response_model=JobseekerEmbeddingResponse)
101
+ async def create_jobseeker_embedding(req: JobseekerEmbeddingRequest):
102
+ try:
103
+ # Map all skills into primary_skills, leave secondary_skills empty
104
+ processed_obj = IndependentJobSeekerAssessmentRDS(
105
+ primary_skills=req.skills,
106
+ secondary_skills=[],
107
+ experiences=[
108
+ {
109
+ "experience_summaries": exp.experience_summaries,
110
+ "title": exp.title.name,
111
+ "is_primary": exp.is_primary,
112
+ "location_names": exp.location_names,
113
+ # Map company_name to company dict
114
+ "company": {"name": exp.company_name} if exp.company_name else None,
115
+ "start_date": "", # Not used directly, but can be empty
116
+ "end_date": "" # Not used directly, but can be empty
117
+ } for exp in req.experiences
118
+ ],
119
+ educations=[
120
+ {
121
+ "degree": edu.degree if edu.degree else "",
122
+ "field": edu.field_of_study if edu.field_of_study else "",
123
+ "institution": edu.institution if edu.institution else ""
124
+ } for edu in req.educations
125
+ ],
126
+ certifications=[
127
+ {
128
+ "name": cert.name,
129
+ "organization": cert.organization if cert.organization else "",
130
+ "start_date": "", # empty as per instructions
131
+ "end_date": ""
132
+ } for cert in req.certifications
133
+ ]
134
+ )
135
+
136
+ unprocessed_obj = JobseekerInfoRDS(
137
+ summary=req.summary
138
+ )
139
+
140
+ embeddings = embedding_manager.embed_jobseeker(processed_obj, unprocessed_obj)
141
+
142
+ # embeddings keys: summary, experience, primary_skills, secondary_skills, certifications, education
143
+ # We need: summary_embedding, skills_embedding, educations_embedding, certifications_embedding, experiences_embedding
144
+ # We put all skills into primary_skills. Ignore secondary_skills.
145
+ # experiences map to 'experience'
146
+ # educations map to 'education'
147
+
148
+ return {
149
+ "summary_embedding": embeddings['summary'].tolist(),
150
+ "skills_embedding": embeddings['primary_skills'].tolist(),
151
+ "educations_embedding": embeddings['education'].tolist(),
152
+ "certifications_embedding": embeddings['certifications'].tolist(),
153
+ "experiences_embedding": embeddings['experience'].tolist()
154
+ }
155
+ except Exception as e:
156
+ raise HTTPException(status_code=500, detail=str(e))
157
+
158
+ if __name__ == "__main__":
159
+ import uvicorn
160
+ uvicorn.run(app, host="0.0.0.0", port=8001)