Spaces:

ravi6389
/

linkedin_summarizer

Runtime error

App Files Files Community

ravi6389 commited on Jul 20, 2024

Commit

55ce100

verified ·

1 Parent(s): 18a3675

Create linkedin2.py

Browse files

Files changed (1) hide show

linkedin2.py +131 -0

linkedin2.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import os
+import requests
+from dotenv import load_dotenv
+from langchain import PromptTemplate
+from langchain.chains import LLMChain
+import pandas as pd
+import numpy as np
+import streamlit as st
+from langchain_community.llms import HuggingFaceHub
+from collections import OrderedDict
+import torch
+#from transformers import LlamaTokenizer, LlamaForCausalLM
+load_dotenv()
+if 'list_df' not in st.session_state:
+    st.session_state['list_df'] =''
+if 'e_list' not in st.session_state:
+    st.session_state['e_list'] = ''
+e_list = []
+list_df = []
+if 'profile' not in st.session_state:
+    st.session_state['profile'] = ''
+token = os.environ['HUGGINGFACEHUB_API_TOKEN']
+print(token)
+def scrape_linkedin_profile(linkedin_profile_url:str):
+    """scrape information from LinkedIn profiles,
+    Manually scrape the information from the LinkedIn profile"""
+    headers = {'Authorization': 'Bearer ' + "QFVFLoRcXcRAsS1He3OikQ"}
+    api_endpoint = 'https://nubela.co/proxycurl/api/v2/linkedin'
+    response = requests.get(
+        api_endpoint, params={"url": linkedin_profile_url}, headers=headers
+    )
+    data = response.json()
+    data = {
+        k: v
+        for k, v in data.items()
+        if v not in ([], "", "", None)
+        and k not in ["people_also_viewed", "certifications"]
+    }
+    if data.get("groups"):
+        for group_dict in data.get("groups"):
+            group_dict.pop("profile_pic_url")
+    return data
+summary_template = """
+Name of the person is {full_name}.
+Given input information {information} about {full_name} from I want you to create:
+Summarize {information} in 2 lines.Do not repeat.
+"""
+experience_template = """
+Name of the person is {full_name}.
+Summarize {information} in 2 lines. You have to mention names of companies where the person has worked.Do not repeat.
+"""
+education_template = """
+Name of the person is {full_name}.
+Given input information {information} about {full_name} from I want you to create:
+Summarize {information} with the insitutes where education was pursued.
+"""
+p1 = st.text_input('Enter the LinkedIn profile')
+if st.button('Click for summary'):
+    with st.spinner("Generating response.."):
+        llm = HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large",\
+                        huggingfacehub_api_token = token, model_kwargs={"temperature":0, "max_length":512})
+        linkedin_data1 = scrape_linkedin_profile(p1)
+        full_name = linkedin_data1.get('full_name')
+        summary_prompt_template = PromptTemplate(input_variables = ["full_name","information"],template = summary_template)
+        chain = LLMChain(llm=llm, prompt = summary_prompt_template)
+        if(linkedin_data1.get('summary')):
+            df1 = chain.invoke({'full_name':full_name, 'information':linkedin_data1.get('summary')})
+            df1 = df1.get('text')
+        else:
+            df1 = ''
+        experience_prompt_template = PromptTemplate(input_variables = ["full_name","information"],template = experience_template)
+        chain = LLMChain(llm=llm, prompt = experience_prompt_template)
+        if(linkedin_data1.get('experiences')):
+            df2 = chain.invoke({'full_name':full_name, 'information':linkedin_data1.get('experiences')})
+            df2= df2.get('text')
+        else:
+            df2 = ''
+        education_prompt_template = PromptTemplate(input_variables = ["full_name","information"],template = education_template)
+        chain = LLMChain(llm=llm, prompt = education_prompt_template)
+        if(linkedin_data1.get('education')):
+            df3 = chain.invoke({'full_name':full_name, 'information':linkedin_data1.get('education')})
+            df3= df3.get('text')
+        else:
+            df3 = ''
+        test_string = df1+df2+df3
+        y =".".join(list(OrderedDict.fromkeys(test_string.split("."))))
+        st.write(df1+df2+df3)