HEHEBOIOG commited on
Commit
37e2bde
·
verified ·
1 Parent(s): 07524fb

Upload 6 files

Browse files
Files changed (6) hide show
  1. app/.env +1 -0
  2. app/chains.py +60 -0
  3. app/main.py +35 -0
  4. app/portfolio.py +21 -0
  5. app/resource/my_portfolio.csv +21 -0
  6. app/utils.py +16 -0
app/.env ADDED
@@ -0,0 +1 @@
 
 
1
+ GROQ_API_KEY=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
app/chains.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_groq import ChatGroq
3
+ from langchain_core.prompts import PromptTemplate
4
+ from langchain_core.output_parsers import JsonOutputParser
5
+ from langchain_core.exceptions import OutputParserException
6
+ from dotenv import load_dotenv
7
+
8
+ load_dotenv()
9
+
10
+ class Chain:
11
+ def __init__(self):
12
+ self.llm = ChatGroq(temperature=0, groq_api_key=os.getenv("GROQ_API_KEY"), model_name="llama-3.1-70b-versatile")
13
+
14
+ def extract_jobs(self, cleaned_text):
15
+ prompt_extract = PromptTemplate.from_template(
16
+ """
17
+ ### SCRAPED TEXT FROM WEBSITE:
18
+ {page_data}
19
+ ### INSTRUCTION:
20
+ The scraped text is from the career's page of a website.
21
+ Your job is to extract the job postings and return them in JSON format containing the following keys: `role`, `experience`, `skills` and `description`.
22
+ Only return the valid JSON.
23
+ ### VALID JSON (NO PREAMBLE):
24
+ """
25
+ )
26
+ chain_extract = prompt_extract | self.llm
27
+ res = chain_extract.invoke(input={"page_data": cleaned_text})
28
+ try:
29
+ json_parser = JsonOutputParser()
30
+ res = json_parser.parse(res.content)
31
+ except OutputParserException:
32
+ raise OutputParserException("Context too big. Unable to parse jobs.")
33
+ return res if isinstance(res, list) else [res]
34
+
35
+ def write_mail(self, job, links):
36
+ prompt_email = PromptTemplate.from_template(
37
+ """
38
+ ### JOB DESCRIPTION:
39
+ {job_description}
40
+
41
+ ### INSTRUCTION:
42
+ You are Mohan, a business development executive at AtliQ. AtliQ is an AI & Software Consulting company dedicated to facilitating
43
+ the seamless integration of business processes through automated tools.
44
+ Over our experience, we have empowered numerous enterprises with tailored solutions, fostering scalability,
45
+ process optimization, cost reduction, and heightened overall efficiency.
46
+ Your job is to write a cold email to the client regarding the job mentioned above describing the capability of AtliQ
47
+ in fulfilling their needs.
48
+ Also add the most relevant ones from the following links to showcase Atliq's portfolio: {link_list}
49
+ Remember you are Mohan, BDE at AtliQ.
50
+ Do not provide a preamble.
51
+ ### EMAIL (NO PREAMBLE):
52
+
53
+ """
54
+ )
55
+ chain_email = prompt_email | self.llm
56
+ res = chain_email.invoke({"job_description": str(job), "link_list": links})
57
+ return res.content
58
+
59
+ if __name__ == "__main__":
60
+ print(os.getenv("GROQ_API_KEY"))
app/main.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_community.document_loaders import WebBaseLoader
3
+
4
+ from chains import Chain
5
+ from portfolio import Portfolio
6
+ from utils import clean_text
7
+
8
+
9
+ def create_streamlit_app(llm, portfolio, clean_text):
10
+ st.title("📧 Cold Mail Generator")
11
+ url_input = st.text_input("Enter a URL:", value="https://jobs.nike.com/job/R-33460")
12
+ submit_button = st.button("Submit")
13
+
14
+ if submit_button:
15
+ try:
16
+ loader = WebBaseLoader([url_input])
17
+ data = clean_text(loader.load().pop().page_content)
18
+ portfolio.load_portfolio()
19
+ jobs = llm.extract_jobs(data)
20
+ for job in jobs:
21
+ skills = job.get('skills', [])
22
+ links = portfolio.query_links(skills)
23
+ email = llm.write_mail(job, links)
24
+ st.code(email, language='markdown')
25
+ except Exception as e:
26
+ st.error(f"An Error Occurred: {e}")
27
+
28
+
29
+ if __name__ == "__main__":
30
+ chain = Chain()
31
+ portfolio = Portfolio()
32
+ st.set_page_config(layout="wide", page_title="Cold Email Generator", page_icon="📧")
33
+ create_streamlit_app(chain, portfolio, clean_text)
34
+
35
+
app/portfolio.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import chromadb
3
+ import uuid
4
+
5
+
6
+ class Portfolio:
7
+ def __init__(self, file_path="app/resource/my_portfolio.csv"):
8
+ self.file_path = file_path
9
+ self.data = pd.read_csv(file_path)
10
+ self.chroma_client = chromadb.PersistentClient('vectorstore')
11
+ self.collection = self.chroma_client.get_or_create_collection(name="portfolio")
12
+
13
+ def load_portfolio(self):
14
+ if not self.collection.count():
15
+ for _, row in self.data.iterrows():
16
+ self.collection.add(documents=row["Techstack"],
17
+ metadatas={"links": row["Links"]},
18
+ ids=[str(uuid.uuid4())])
19
+
20
+ def query_links(self, skills):
21
+ return self.collection.query(query_texts=skills, n_results=2).get('metadatas', [])
app/resource/my_portfolio.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "Techstack","Links"
2
+ "React, Node.js, MongoDB","https://example.com/react-portfolio"
3
+ "Angular,.NET, SQL Server","https://example.com/angular-portfolio"
4
+ "Vue.js, Ruby on Rails, PostgreSQL","https://example.com/vue-portfolio"
5
+ "Python, Django, MySQL","https://example.com/python-portfolio"
6
+ "Java, Spring Boot, Oracle","https://example.com/java-portfolio"
7
+ "Flutter, Firebase, GraphQL","https://example.com/flutter-portfolio"
8
+ "WordPress, PHP, MySQL","https://example.com/wordpress-portfolio"
9
+ "Magento, PHP, MySQL","https://example.com/magento-portfolio"
10
+ "React Native, Node.js, MongoDB","https://example.com/react-native-portfolio"
11
+ "iOS, Swift, Core Data","https://example.com/ios-portfolio"
12
+ "Android, Java, Room Persistence","https://example.com/android-portfolio"
13
+ "Kotlin, Android, Firebase","https://example.com/kotlin-android-portfolio"
14
+ "Android TV, Kotlin, Android NDK","https://example.com/android-tv-portfolio"
15
+ "iOS, Swift, ARKit","https://example.com/ios-ar-portfolio"
16
+ "Cross-platform, Xamarin, Azure","https://example.com/xamarin-portfolio"
17
+ "Backend, Kotlin, Spring Boot","https://example.com/kotlin-backend-portfolio"
18
+ "Frontend, TypeScript, Angular","https://example.com/typescript-frontend-portfolio"
19
+ "Full-stack, JavaScript, Express.js","https://example.com/full-stack-js-portfolio"
20
+ "Machine Learning, Python, TensorFlow","https://example.com/ml-python-portfolio"
21
+ "DevOps, Jenkins, Docker","https://example.com/devops-portfolio"
app/utils.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ def clean_text(text):
4
+ # Remove HTML tags
5
+ text = re.sub(r'<[^>]*?>', '', text)
6
+ # Remove URLs
7
+ text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text)
8
+ # Remove special characters
9
+ text = re.sub(r'[^a-zA-Z0-9 ]', '', text)
10
+ # Replace multiple spaces with a single space
11
+ text = re.sub(r'\s{2,}', ' ', text)
12
+ # Trim leading and trailing whitespace
13
+ text = text.strip()
14
+ # Remove extra whitespace
15
+ text = ' '.join(text.split())
16
+ return text