Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- app.py +62 -0
- ice_cold.py +47 -0
- logs.py +50 -0
- web_scrapping_engine.py +17 -0
app.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
from functools import partial
|
4 |
+
import gdown
|
5 |
+
import pandas as pd
|
6 |
+
from logs import save_logs
|
7 |
+
import gdown
|
8 |
+
import time
|
9 |
+
import pandas as pd
|
10 |
+
from config import logs_folder_id, json_url_id
|
11 |
+
from ice_cold import get_refresher
|
12 |
+
from web_scrapping_engine import get_linkedin_profile
|
13 |
+
choices = ["sofwareAG"]
|
14 |
+
|
15 |
+
title = """SynthAI Alpha V2 - Strada Partners Demo
|
16 |
+
|
17 |
+
|
18 |
+
contact: adamrida.ra@gmail.com or sp.olivier@hotmail.com
|
19 |
+
"""
|
20 |
+
|
21 |
+
|
22 |
+
def stream(company):
|
23 |
+
linkedin_extracted_info = get_linkedin_profile(company)
|
24 |
+
resp = get_refresher(linkedin_extracted_info)
|
25 |
+
answer = ""
|
26 |
+
for chunk in resp:
|
27 |
+
if chunk.choices[0].delta.content is not None:
|
28 |
+
answer = answer + chunk.choices[0].delta.content
|
29 |
+
yield answer
|
30 |
+
save_logs(company, answer, folder_id=logs_folder_id)
|
31 |
+
|
32 |
+
download_url = f'https://drive.google.com/uc?id={json_url_id}'
|
33 |
+
output = 'secret_google_service_account.json'
|
34 |
+
gdown.download(download_url, output, quiet=False)
|
35 |
+
|
36 |
+
|
37 |
+
with gr.Blocks(title=title,theme='nota-ai/theme',css="footer {visibility: hidden}") as demo:
|
38 |
+
gr.Markdown(f"## {title}")
|
39 |
+
|
40 |
+
with gr.Row():
|
41 |
+
with gr.Column(scale=6):
|
42 |
+
with gr.Row():
|
43 |
+
with gr.Column(scale=3):
|
44 |
+
chat_input = gr.Textbox(placeholder="Company Name", lines=1, label="Get instant refresher on any company")
|
45 |
+
chat_submit_button = gr.Button(value="Submit ▶")
|
46 |
+
|
47 |
+
with gr.Column(scale=6):
|
48 |
+
chat_output = gr.Markdown("Waiting for company...")
|
49 |
+
|
50 |
+
|
51 |
+
fn_chat = stream
|
52 |
+
|
53 |
+
|
54 |
+
chat_submit_button.click(fn=fn_chat, inputs=[chat_input], outputs=[chat_output])
|
55 |
+
|
56 |
+
|
57 |
+
demo.launch(max_threads=40)
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
|
62 |
+
|
ice_cold.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
|
3 |
+
|
4 |
+
from config import openai_api
|
5 |
+
|
6 |
+
client = OpenAI(api_key=openai_api)
|
7 |
+
|
8 |
+
def get_refresher(json_linkedin_info):
|
9 |
+
response = client.chat.completions.create(
|
10 |
+
model="gpt-4o",
|
11 |
+
messages=[
|
12 |
+
{
|
13 |
+
"role": "system",
|
14 |
+
"content": [
|
15 |
+
{
|
16 |
+
"type": "text",
|
17 |
+
"text": "You will recieve a raw json file with information on a company pulled from linkedin.\nYour job is to determine, based on the recent updates, what are the best key info a potential private equity investor might need to know to get up to speed before meeting with them. Assume he already talked with them in the past and he just need in a few bullet point get up to speed and eventually have something personalized to say to them that is relevant.\n(current date is 15 January 2024)\n\nFollow the following 2 title format (5 bullet points for each tops):\n\n### Key refresher:\n\n\n### Updates / icebreaker\n"
|
18 |
+
}
|
19 |
+
]
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"role": "user",
|
23 |
+
"content": [
|
24 |
+
{
|
25 |
+
"type": "text",
|
26 |
+
"text": str(json_linkedin_info)
|
27 |
+
}
|
28 |
+
]
|
29 |
+
}
|
30 |
+
],
|
31 |
+
temperature=0,
|
32 |
+
max_tokens=1228,
|
33 |
+
top_p=1,
|
34 |
+
frequency_penalty=0,
|
35 |
+
presence_penalty=0,
|
36 |
+
stream =True
|
37 |
+
)
|
38 |
+
return response
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
# def stream(resp):
|
43 |
+
# for chunk in resp:
|
44 |
+
# if chunk.choices[0].delta.content is not None:
|
45 |
+
# print(chunk.choices[0].delta.content, end="")
|
46 |
+
# # yield chunk.choices[0].delta.content
|
47 |
+
|
logs.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from google.oauth2 import service_account
|
3 |
+
from googleapiclient.discovery import build
|
4 |
+
from googleapiclient.http import MediaFileUpload
|
5 |
+
from datetime import datetime
|
6 |
+
|
7 |
+
def save_logs(query,response, folder_id = ""):
|
8 |
+
to_save = f"LOG ENTRY\nQUERY\n{query}\n=================================\nRESPONSE\n{response}\n****************************************\n"
|
9 |
+
|
10 |
+
# Get the current date and time
|
11 |
+
now = datetime.now()
|
12 |
+
filename = str(now).replace(":","").replace(" ","").replace("-","").replace(".","")+".txt"
|
13 |
+
with open(filename, 'w') as file:
|
14 |
+
file.write(to_save)
|
15 |
+
# Path to the service account key file
|
16 |
+
SERVICE_ACCOUNT_FILE = 'secret_google_service_account.json'
|
17 |
+
|
18 |
+
# Define the required scopes
|
19 |
+
SCOPES = ['https://www.googleapis.com/auth/drive.file']
|
20 |
+
|
21 |
+
# Authenticate using the service account key file
|
22 |
+
credentials = service_account.Credentials.from_service_account_file(
|
23 |
+
SERVICE_ACCOUNT_FILE, scopes=SCOPES)
|
24 |
+
|
25 |
+
# Build the Google Drive API client
|
26 |
+
service = build('drive', 'v3', credentials=credentials)
|
27 |
+
|
28 |
+
# Specify the folder ID where you want to upload the file
|
29 |
+
|
30 |
+
# Metadata of the file to be uploaded
|
31 |
+
file_metadata = {
|
32 |
+
'name': filename, # Name of the file to be uploaded
|
33 |
+
'parents': [folder_id] # Folder ID
|
34 |
+
}
|
35 |
+
|
36 |
+
# Path to the file you want to upload
|
37 |
+
file_path = filename
|
38 |
+
|
39 |
+
# Create a MediaFileUpload object to upload the file
|
40 |
+
media = MediaFileUpload(file_path, mimetype='text/plain')
|
41 |
+
|
42 |
+
# Use the Drive API to upload the file
|
43 |
+
file = service.files().create(
|
44 |
+
body=file_metadata,
|
45 |
+
media_body=media,
|
46 |
+
fields='id'
|
47 |
+
).execute()
|
48 |
+
|
49 |
+
# Print the file ID of the uploaded file
|
50 |
+
print('Saved in Google Drive - File ID: %s' % file.get('id'))
|
web_scrapping_engine.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from scrappers.linkedin import get_linkedin_profile
|
3 |
+
from scrappers.company_website import full_company_website_exploration
|
4 |
+
|
5 |
+
def run_web_scrapping_engine(company_name = "sunday natural", folder_path = "data_dumpster", output_in_code = False):
|
6 |
+
path = f"{folder_path}/{company_name.replace(' ','-')}"
|
7 |
+
os.makedirs(path, exist_ok=True)
|
8 |
+
os.makedirs(f"{path}/pdf/", exist_ok=True)
|
9 |
+
print("Starting Linkedin gathering...")
|
10 |
+
linkedin = get_linkedin_profile(company_name, folder_path=path)
|
11 |
+
print("Linkedin Done!\n========> Starting now company website gathering...")
|
12 |
+
content = full_company_website_exploration(company_name, folder_path=path)
|
13 |
+
content.append(linkedin)
|
14 |
+
if output_in_code:
|
15 |
+
return content
|
16 |
+
|
17 |
+
|