stealth-talent
/

embeddings-sebastian

Model card Files Files and versions Community

sebastianalgharaballi commited on 29 days ago

Commit

550d2ef

verified ·

1 Parent(s): 29e2c27

Upload main.py

Browse files

Files changed (1) hide show

main.py +160 -0

main.py ADDED Viewed

	@@ -0,0 +1,160 @@

+# main.py
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from typing import List, Optional, Dict, Any
+from two_phase_search import TwoPhaseSearchSystem
+from chroma_storage import ChromaMatchingSystem
+from opensearch_client import OpenSearchClient
+from embeddings import JobPosting, Skill, EmbeddingManager, IndependentJobSeekerAssessmentRDS, JobseekerInfoRDS
+from encoder import create_encoders
+import numpy as np
+from datetime import datetime
+app = FastAPI()
+# Initialize systems
+chroma_matcher = ChromaMatchingSystem(collection_name="job_seekers")
+opensearch_client = OpenSearchClient()
+BASE_URL = "https://dev-abhinav.ngrok.io"
+search_system = TwoPhaseSearchSystem(chroma_matcher, opensearch_client, BASE_URL)
+# Initialize encoders and embedding manager
+job_encoder, seeker_encoder = create_encoders('all-mpnet-base-v2')
+embedding_manager = EmbeddingManager(job_encoder, seeker_encoder)
+# New schemas
+class JobPostingEmbeddingRequest(BaseModel):
+    title: str
+    role_description: str
+    company_description: str
+    primary_skills: List[str]
+    secondary_skills: List[str]
+class JobPostingEmbeddingResponse(BaseModel):
+    title_embedding: List[float]
+    role_description_embedding: List[float]
+    company_description_embedding: List[float]
+    primary_skills_embedding: List[float]
+    secondary_skills_embedding: List[float]
+class Title(BaseModel):
+    name: str
+class ExperienceBaseWithoutCompanyCore(BaseModel):
+    location_names: List[str] = []
+    title: Title
+    is_primary: bool
+    experience_summaries: List[str] = []
+class EmbeddingExperience(ExperienceBaseWithoutCompanyCore):
+    company_name: str
+class EducationCore(BaseModel):
+    institution: Optional[str] = None
+    degree: Optional[str] = None
+    field_of_study: Optional[str] = None
+class CertificationCore(BaseModel):
+    organization: Optional[str] = None
+    name: str
+    # start_date and end_date not required, map to '' if not provided
+class JobseekerEmbeddingRequest(BaseModel):
+    summary: str
+    skills: List[str]
+    educations: List[EducationCore]
+    certifications: List[CertificationCore]
+    experiences: List[EmbeddingExperience]
+class JobseekerEmbeddingResponse(BaseModel):
+    summary_embedding: List[float]
+    skills_embedding: List[float]
+    educations_embedding: List[float]
+    certifications_embedding: List[float]
+    experiences_embedding: List[float]
+@app.post("/embed/jobposting", response_model=JobPostingEmbeddingResponse)
+async def create_jobposting_embedding(req: JobPostingEmbeddingRequest):
+    try:
+        job = JobPosting(
+            title=req.title,
+            role_description=req.role_description,
+            company_description=req.company_description,
+            primary_skills=[Skill(skill_name=s) for s in req.primary_skills],
+            secondary_skills=[Skill(skill_name=s) for s in req.secondary_skills]
+        )
+        embeddings = embedding_manager.embed_jobposting(job)
+        return {
+            "title_embedding": embeddings['title'].tolist(),
+            "role_description_embedding": embeddings['role_description'].tolist(),
+            "company_description_embedding": embeddings['company_description'].tolist(),
+            "primary_skills_embedding": embeddings['primary_skills'].tolist(),
+            "secondary_skills_embedding": embeddings['secondary_skills'].tolist()
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/embed/jobseeker", response_model=JobseekerEmbeddingResponse)
+async def create_jobseeker_embedding(req: JobseekerEmbeddingRequest):
+    try:
+        # Map all skills into primary_skills, leave secondary_skills empty
+        processed_obj = IndependentJobSeekerAssessmentRDS(
+            primary_skills=req.skills,
+            secondary_skills=[],
+            experiences=[
+                {
+                    "experience_summaries": exp.experience_summaries,
+                    "title": exp.title.name,
+                    "is_primary": exp.is_primary,
+                    "location_names": exp.location_names,
+                    # Map company_name to company dict
+                    "company": {"name": exp.company_name} if exp.company_name else None,
+                    "start_date": "",  # Not used directly, but can be empty
+                    "end_date": ""      # Not used directly, but can be empty
+                } for exp in req.experiences
+            ],
+            educations=[
+                {
+                    "degree": edu.degree if edu.degree else "",
+                    "field": edu.field_of_study if edu.field_of_study else "",
+                    "institution": edu.institution if edu.institution else ""
+                } for edu in req.educations
+            ],
+            certifications=[
+                {
+                    "name": cert.name,
+                    "organization": cert.organization if cert.organization else "",
+                    "start_date": "",  # empty as per instructions
+                    "end_date": ""
+                } for cert in req.certifications
+            ]
+        )
+        unprocessed_obj = JobseekerInfoRDS(
+            summary=req.summary
+        )
+        embeddings = embedding_manager.embed_jobseeker(processed_obj, unprocessed_obj)
+        # embeddings keys: summary, experience, primary_skills, secondary_skills, certifications, education
+        # We need: summary_embedding, skills_embedding, educations_embedding, certifications_embedding, experiences_embedding
+        # We put all skills into primary_skills. Ignore secondary_skills.
+        # experiences map to 'experience'
+        # educations map to 'education'
+        return {
+            "summary_embedding": embeddings['summary'].tolist(),
+            "skills_embedding": embeddings['primary_skills'].tolist(),
+            "educations_embedding": embeddings['education'].tolist(),
+            "certifications_embedding": embeddings['certifications'].tolist(),
+            "experiences_embedding": embeddings['experience'].tolist()
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8001)