sebastianalgharaballi's picture
Upload main.py
550d2ef verified
raw
history blame
6.27 kB
# main.py
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Optional, Dict, Any
from two_phase_search import TwoPhaseSearchSystem
from chroma_storage import ChromaMatchingSystem
from opensearch_client import OpenSearchClient
from embeddings import JobPosting, Skill, EmbeddingManager, IndependentJobSeekerAssessmentRDS, JobseekerInfoRDS
from encoder import create_encoders
import numpy as np
from datetime import datetime
app = FastAPI()
# Initialize systems
chroma_matcher = ChromaMatchingSystem(collection_name="job_seekers")
opensearch_client = OpenSearchClient()
BASE_URL = "https://dev-abhinav.ngrok.io"
search_system = TwoPhaseSearchSystem(chroma_matcher, opensearch_client, BASE_URL)
# Initialize encoders and embedding manager
job_encoder, seeker_encoder = create_encoders('all-mpnet-base-v2')
embedding_manager = EmbeddingManager(job_encoder, seeker_encoder)
# New schemas
class JobPostingEmbeddingRequest(BaseModel):
title: str
role_description: str
company_description: str
primary_skills: List[str]
secondary_skills: List[str]
class JobPostingEmbeddingResponse(BaseModel):
title_embedding: List[float]
role_description_embedding: List[float]
company_description_embedding: List[float]
primary_skills_embedding: List[float]
secondary_skills_embedding: List[float]
class Title(BaseModel):
name: str
class ExperienceBaseWithoutCompanyCore(BaseModel):
location_names: List[str] = []
title: Title
is_primary: bool
experience_summaries: List[str] = []
class EmbeddingExperience(ExperienceBaseWithoutCompanyCore):
company_name: str
class EducationCore(BaseModel):
institution: Optional[str] = None
degree: Optional[str] = None
field_of_study: Optional[str] = None
class CertificationCore(BaseModel):
organization: Optional[str] = None
name: str
# start_date and end_date not required, map to '' if not provided
class JobseekerEmbeddingRequest(BaseModel):
summary: str
skills: List[str]
educations: List[EducationCore]
certifications: List[CertificationCore]
experiences: List[EmbeddingExperience]
class JobseekerEmbeddingResponse(BaseModel):
summary_embedding: List[float]
skills_embedding: List[float]
educations_embedding: List[float]
certifications_embedding: List[float]
experiences_embedding: List[float]
@app.post("/embed/jobposting", response_model=JobPostingEmbeddingResponse)
async def create_jobposting_embedding(req: JobPostingEmbeddingRequest):
try:
job = JobPosting(
title=req.title,
role_description=req.role_description,
company_description=req.company_description,
primary_skills=[Skill(skill_name=s) for s in req.primary_skills],
secondary_skills=[Skill(skill_name=s) for s in req.secondary_skills]
)
embeddings = embedding_manager.embed_jobposting(job)
return {
"title_embedding": embeddings['title'].tolist(),
"role_description_embedding": embeddings['role_description'].tolist(),
"company_description_embedding": embeddings['company_description'].tolist(),
"primary_skills_embedding": embeddings['primary_skills'].tolist(),
"secondary_skills_embedding": embeddings['secondary_skills'].tolist()
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/embed/jobseeker", response_model=JobseekerEmbeddingResponse)
async def create_jobseeker_embedding(req: JobseekerEmbeddingRequest):
try:
# Map all skills into primary_skills, leave secondary_skills empty
processed_obj = IndependentJobSeekerAssessmentRDS(
primary_skills=req.skills,
secondary_skills=[],
experiences=[
{
"experience_summaries": exp.experience_summaries,
"title": exp.title.name,
"is_primary": exp.is_primary,
"location_names": exp.location_names,
# Map company_name to company dict
"company": {"name": exp.company_name} if exp.company_name else None,
"start_date": "", # Not used directly, but can be empty
"end_date": "" # Not used directly, but can be empty
} for exp in req.experiences
],
educations=[
{
"degree": edu.degree if edu.degree else "",
"field": edu.field_of_study if edu.field_of_study else "",
"institution": edu.institution if edu.institution else ""
} for edu in req.educations
],
certifications=[
{
"name": cert.name,
"organization": cert.organization if cert.organization else "",
"start_date": "", # empty as per instructions
"end_date": ""
} for cert in req.certifications
]
)
unprocessed_obj = JobseekerInfoRDS(
summary=req.summary
)
embeddings = embedding_manager.embed_jobseeker(processed_obj, unprocessed_obj)
# embeddings keys: summary, experience, primary_skills, secondary_skills, certifications, education
# We need: summary_embedding, skills_embedding, educations_embedding, certifications_embedding, experiences_embedding
# We put all skills into primary_skills. Ignore secondary_skills.
# experiences map to 'experience'
# educations map to 'education'
return {
"summary_embedding": embeddings['summary'].tolist(),
"skills_embedding": embeddings['primary_skills'].tolist(),
"educations_embedding": embeddings['education'].tolist(),
"certifications_embedding": embeddings['certifications'].tolist(),
"experiences_embedding": embeddings['experience'].tolist()
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8001)