Spaces:
Runtime error
Runtime error
Create linkedin2.py
Browse files- linkedin2.py +131 -0
linkedin2.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
from langchain import PromptTemplate
|
5 |
+
|
6 |
+
from langchain.chains import LLMChain
|
7 |
+
import pandas as pd
|
8 |
+
import numpy as np
|
9 |
+
|
10 |
+
import streamlit as st
|
11 |
+
|
12 |
+
|
13 |
+
from langchain_community.llms import HuggingFaceHub
|
14 |
+
|
15 |
+
from collections import OrderedDict
|
16 |
+
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
|
22 |
+
|
23 |
+
import torch
|
24 |
+
#from transformers import LlamaTokenizer, LlamaForCausalLM
|
25 |
+
|
26 |
+
load_dotenv()
|
27 |
+
|
28 |
+
if 'list_df' not in st.session_state:
|
29 |
+
st.session_state['list_df'] =''
|
30 |
+
|
31 |
+
if 'e_list' not in st.session_state:
|
32 |
+
st.session_state['e_list'] = ''
|
33 |
+
e_list = []
|
34 |
+
list_df = []
|
35 |
+
|
36 |
+
if 'profile' not in st.session_state:
|
37 |
+
st.session_state['profile'] = ''
|
38 |
+
|
39 |
+
|
40 |
+
token = os.environ['HUGGINGFACEHUB_API_TOKEN']
|
41 |
+
print(token)
|
42 |
+
|
43 |
+
|
44 |
+
def scrape_linkedin_profile(linkedin_profile_url:str):
|
45 |
+
|
46 |
+
"""scrape information from LinkedIn profiles,
|
47 |
+
Manually scrape the information from the LinkedIn profile"""
|
48 |
+
headers = {'Authorization': 'Bearer ' + "QFVFLoRcXcRAsS1He3OikQ"}
|
49 |
+
api_endpoint = 'https://nubela.co/proxycurl/api/v2/linkedin'
|
50 |
+
|
51 |
+
|
52 |
+
response = requests.get(
|
53 |
+
api_endpoint, params={"url": linkedin_profile_url}, headers=headers
|
54 |
+
)
|
55 |
+
|
56 |
+
data = response.json()
|
57 |
+
data = {
|
58 |
+
k: v
|
59 |
+
for k, v in data.items()
|
60 |
+
if v not in ([], "", "", None)
|
61 |
+
and k not in ["people_also_viewed", "certifications"]
|
62 |
+
}
|
63 |
+
if data.get("groups"):
|
64 |
+
for group_dict in data.get("groups"):
|
65 |
+
group_dict.pop("profile_pic_url")
|
66 |
+
|
67 |
+
return data
|
68 |
+
|
69 |
+
|
70 |
+
summary_template = """
|
71 |
+
|
72 |
+
Name of the person is {full_name}.
|
73 |
+
Given input information {information} about {full_name} from I want you to create:
|
74 |
+
Summarize {information} in 2 lines.Do not repeat.
|
75 |
+
"""
|
76 |
+
|
77 |
+
experience_template = """
|
78 |
+
|
79 |
+
Name of the person is {full_name}.
|
80 |
+
Summarize {information} in 2 lines. You have to mention names of companies where the person has worked.Do not repeat.
|
81 |
+
"""
|
82 |
+
|
83 |
+
education_template = """
|
84 |
+
Name of the person is {full_name}.
|
85 |
+
Given input information {information} about {full_name} from I want you to create:
|
86 |
+
Summarize {information} with the insitutes where education was pursued.
|
87 |
+
"""
|
88 |
+
|
89 |
+
p1 = st.text_input('Enter the LinkedIn profile')
|
90 |
+
|
91 |
+
|
92 |
+
if st.button('Click for summary'):
|
93 |
+
with st.spinner("Generating response.."):
|
94 |
+
|
95 |
+
llm = HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large",\
|
96 |
+
huggingfacehub_api_token = token, model_kwargs={"temperature":0, "max_length":512})
|
97 |
+
linkedin_data1 = scrape_linkedin_profile(p1)
|
98 |
+
full_name = linkedin_data1.get('full_name')
|
99 |
+
|
100 |
+
summary_prompt_template = PromptTemplate(input_variables = ["full_name","information"],template = summary_template)
|
101 |
+
chain = LLMChain(llm=llm, prompt = summary_prompt_template)
|
102 |
+
|
103 |
+
if(linkedin_data1.get('summary')):
|
104 |
+
df1 = chain.invoke({'full_name':full_name, 'information':linkedin_data1.get('summary')})
|
105 |
+
df1 = df1.get('text')
|
106 |
+
else:
|
107 |
+
df1 = ''
|
108 |
+
|
109 |
+
experience_prompt_template = PromptTemplate(input_variables = ["full_name","information"],template = experience_template)
|
110 |
+
chain = LLMChain(llm=llm, prompt = experience_prompt_template)
|
111 |
+
|
112 |
+
if(linkedin_data1.get('experiences')):
|
113 |
+
df2 = chain.invoke({'full_name':full_name, 'information':linkedin_data1.get('experiences')})
|
114 |
+
df2= df2.get('text')
|
115 |
+
else:
|
116 |
+
df2 = ''
|
117 |
+
|
118 |
+
education_prompt_template = PromptTemplate(input_variables = ["full_name","information"],template = education_template)
|
119 |
+
chain = LLMChain(llm=llm, prompt = education_prompt_template)
|
120 |
+
|
121 |
+
if(linkedin_data1.get('education')):
|
122 |
+
df3 = chain.invoke({'full_name':full_name, 'information':linkedin_data1.get('education')})
|
123 |
+
df3= df3.get('text')
|
124 |
+
else:
|
125 |
+
df3 = ''
|
126 |
+
|
127 |
+
|
128 |
+
test_string = df1+df2+df3
|
129 |
+
|
130 |
+
y =".".join(list(OrderedDict.fromkeys(test_string.split("."))))
|
131 |
+
st.write(df1+df2+df3)
|