Spaces:
Running
Running
import streamlit as st | |
from langchain_community.document_loaders import WebBaseLoader | |
#from chains import Chain | |
#from portfolio import Portfolio | |
#from utils import clean_text | |
import re | |
import pandas as pd | |
import chromadb | |
import uuid | |
import os | |
from langchain_groq import ChatGroq | |
from langchain_core.prompts import PromptTemplate | |
from langchain_core.output_parsers import JsonOutputParser | |
from langchain_core.exceptions import OutputParserException | |
#from dotenv import load_dotenv | |
#from google.colab import userdata | |
#load_dotenv() | |
class Chain: | |
def __init__(self): | |
# Get the API key from environment variables | |
groq_api_key = os.getenv("GROQ_API_KEY") | |
self.llm = ChatGroq(temperature=0, groq_api_key=groq_api_key, model_name="llama-3.1-70b-versatile") | |
def extract_jobs(self, cleaned_text): | |
prompt_extract = PromptTemplate.from_template( | |
""" | |
### SCRAPED TEXT FROM WEBSITE: | |
{page_data} | |
### INSTRUCTION: | |
The scraped text is from the career's page of a website. | |
Your job is to extract the job postings and return them in JSON format containing the following keys: `role`, `experience`, `skills` and `description`. | |
Only return the valid JSON. | |
### VALID JSON (NO PREAMBLE): | |
""" | |
) | |
chain_extract = prompt_extract | self.llm | |
res = chain_extract.invoke(input={"page_data": cleaned_text}) | |
try: | |
json_parser = JsonOutputParser() | |
res = json_parser.parse(res.content) | |
except OutputParserException: | |
raise OutputParserException("Context too big. Unable to parse jobs.") | |
return res if isinstance(res, list) else [res] | |
def write_mail(self, job, links): | |
prompt_email = PromptTemplate.from_template( | |
""" | |
### JOB DESCRIPTION: | |
{job_description} | |
### INSTRUCTION: | |
You are Anmol R Srivastava, a student pursuing a bachelor's degree in Computer Science Engineering with a specialization in Artificial Intelligence and Machine Learning, graduating in 2025. You have experience in cloud computing, AI, and software development, focusing on building AI-driven systems for various applications. Your task is to write a cold email to a potential client regarding a project that involves creating a predictive analytics tool for supply chain management. Highlight your expertise in AI and machine learning, particularly in predictive models and scalable solutions. Mention your ability to deliver customized and efficient systems tailored to client needs. Also, include your portfolio links to showcase your work: | |
GitHub: https://github.com/arssite | |
LinkedIn: https://www.linkedin.com/in/anmol-r-srivastava/ | |
Hugging Face: https://huggingface.co/arssite | |
Contact email: arssite2020@gmail.com | |
Also add the most relevant ones from the following links to showcase My Resume: {link_list} | |
. | |
Do not provide a preamble. | |
### EMAIL (NO PREAMBLE): | |
""" | |
) | |
chain_email = prompt_email | self.llm | |
res = chain_email.invoke({"job_description": str(job), "link_list": links}) | |
return res.content | |
class Portfolio: | |
def __init__(self, file_path="links.csv"): | |
self.file_path = file_path | |
self.data = pd.read_csv(file_path) | |
self.chroma_client = chromadb.PersistentClient('vectorstore') | |
self.collection = self.chroma_client.get_or_create_collection(name="portfolio") | |
def load_portfolio(self): | |
if not self.collection.count(): | |
for _, row in self.data.iterrows(): | |
self.collection.add(documents=row["Techstack"], | |
metadatas={"links": row["Links"]}, | |
ids=[str(uuid.uuid4())]) | |
def query_links(self, skills): | |
return self.collection.query(query_texts=skills, n_results=2).get('metadatas', []) | |
def clean_text(text): | |
# Remove HTML tags | |
text = re.sub(r'<[^>]*?>', '', text) | |
# Remove URLs | |
text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text) | |
# Remove special characters | |
text = re.sub(r'[^a-zA-Z0-9 ]', '', text) | |
# Replace multiple spaces with a single space | |
text = re.sub(r'\s{2,}', ' ', text) | |
# Trim leading and trailing whitespace | |
text = text.strip() | |
# Remove extra whitespace | |
text = ' '.join(text.split()) | |
return text | |
def create_streamlit_app(llm, portfolio, clean_text): | |
st.title("π§ Cold eMail Generator") | |
url_input = st.text_input("Enter a URL:", value="write Website or JD url") | |
submit_button = st.button("Submit") | |
if submit_button: | |
try: | |
loader = WebBaseLoader([url_input]) | |
data = clean_text(loader.load().pop().page_content) | |
portfolio.load_portfolio() | |
jobs = llm.extract_jobs(data) | |
for job in jobs: | |
skills = job.get('skills', []) | |
links = portfolio.query_links(skills) | |
email = llm.write_mail(job, links) | |
st.code(email, language='markdown') | |
except Exception as e: | |
st.error(f"An Error Occurred: {e}") | |
if __name__ == "__main__": | |
chain = Chain() | |
portfolio = Portfolio() | |
st.set_page_config(layout="wide", page_title="Cold Email Generator by ARS", page_icon="π§") | |
create_streamlit_app(chain, portfolio, clean_text) |