Adr740 commited on
Commit
3289271
·
verified ·
1 Parent(s): 058ae29

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +62 -0
  2. ice_cold.py +47 -0
  3. logs.py +50 -0
  4. web_scrapping_engine.py +17 -0
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from functools import partial
4
+ import gdown
5
+ import pandas as pd
6
+ from logs import save_logs
7
+ import gdown
8
+ import time
9
+ import pandas as pd
10
+ from config import logs_folder_id, json_url_id
11
+ from ice_cold import get_refresher
12
+ from web_scrapping_engine import get_linkedin_profile
13
+ choices = ["sofwareAG"]
14
+
15
+ title = """SynthAI Alpha V2 - Strada Partners Demo
16
+
17
+
18
+ contact: adamrida.ra@gmail.com or sp.olivier@hotmail.com
19
+ """
20
+
21
+
22
+ def stream(company):
23
+ linkedin_extracted_info = get_linkedin_profile(company)
24
+ resp = get_refresher(linkedin_extracted_info)
25
+ answer = ""
26
+ for chunk in resp:
27
+ if chunk.choices[0].delta.content is not None:
28
+ answer = answer + chunk.choices[0].delta.content
29
+ yield answer
30
+ save_logs(company, answer, folder_id=logs_folder_id)
31
+
32
+ download_url = f'https://drive.google.com/uc?id={json_url_id}'
33
+ output = 'secret_google_service_account.json'
34
+ gdown.download(download_url, output, quiet=False)
35
+
36
+
37
+ with gr.Blocks(title=title,theme='nota-ai/theme',css="footer {visibility: hidden}") as demo:
38
+ gr.Markdown(f"## {title}")
39
+
40
+ with gr.Row():
41
+ with gr.Column(scale=6):
42
+ with gr.Row():
43
+ with gr.Column(scale=3):
44
+ chat_input = gr.Textbox(placeholder="Company Name", lines=1, label="Get instant refresher on any company")
45
+ chat_submit_button = gr.Button(value="Submit ▶")
46
+
47
+ with gr.Column(scale=6):
48
+ chat_output = gr.Markdown("Waiting for company...")
49
+
50
+
51
+ fn_chat = stream
52
+
53
+
54
+ chat_submit_button.click(fn=fn_chat, inputs=[chat_input], outputs=[chat_output])
55
+
56
+
57
+ demo.launch(max_threads=40)
58
+
59
+
60
+
61
+
62
+
ice_cold.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+
3
+
4
+ from config import openai_api
5
+
6
+ client = OpenAI(api_key=openai_api)
7
+
8
+ def get_refresher(json_linkedin_info):
9
+ response = client.chat.completions.create(
10
+ model="gpt-4o",
11
+ messages=[
12
+ {
13
+ "role": "system",
14
+ "content": [
15
+ {
16
+ "type": "text",
17
+ "text": "You will recieve a raw json file with information on a company pulled from linkedin.\nYour job is to determine, based on the recent updates, what are the best key info a potential private equity investor might need to know to get up to speed before meeting with them. Assume he already talked with them in the past and he just need in a few bullet point get up to speed and eventually have something personalized to say to them that is relevant.\n(current date is 15 January 2024)\n\nFollow the following 2 title format (5 bullet points for each tops):\n\n### Key refresher:\n\n\n### Updates / icebreaker\n"
18
+ }
19
+ ]
20
+ },
21
+ {
22
+ "role": "user",
23
+ "content": [
24
+ {
25
+ "type": "text",
26
+ "text": str(json_linkedin_info)
27
+ }
28
+ ]
29
+ }
30
+ ],
31
+ temperature=0,
32
+ max_tokens=1228,
33
+ top_p=1,
34
+ frequency_penalty=0,
35
+ presence_penalty=0,
36
+ stream =True
37
+ )
38
+ return response
39
+
40
+
41
+
42
+ # def stream(resp):
43
+ # for chunk in resp:
44
+ # if chunk.choices[0].delta.content is not None:
45
+ # print(chunk.choices[0].delta.content, end="")
46
+ # # yield chunk.choices[0].delta.content
47
+
logs.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from google.oauth2 import service_account
3
+ from googleapiclient.discovery import build
4
+ from googleapiclient.http import MediaFileUpload
5
+ from datetime import datetime
6
+
7
+ def save_logs(query,response, folder_id = ""):
8
+ to_save = f"LOG ENTRY\nQUERY\n{query}\n=================================\nRESPONSE\n{response}\n****************************************\n"
9
+
10
+ # Get the current date and time
11
+ now = datetime.now()
12
+ filename = str(now).replace(":","").replace(" ","").replace("-","").replace(".","")+".txt"
13
+ with open(filename, 'w') as file:
14
+ file.write(to_save)
15
+ # Path to the service account key file
16
+ SERVICE_ACCOUNT_FILE = 'secret_google_service_account.json'
17
+
18
+ # Define the required scopes
19
+ SCOPES = ['https://www.googleapis.com/auth/drive.file']
20
+
21
+ # Authenticate using the service account key file
22
+ credentials = service_account.Credentials.from_service_account_file(
23
+ SERVICE_ACCOUNT_FILE, scopes=SCOPES)
24
+
25
+ # Build the Google Drive API client
26
+ service = build('drive', 'v3', credentials=credentials)
27
+
28
+ # Specify the folder ID where you want to upload the file
29
+
30
+ # Metadata of the file to be uploaded
31
+ file_metadata = {
32
+ 'name': filename, # Name of the file to be uploaded
33
+ 'parents': [folder_id] # Folder ID
34
+ }
35
+
36
+ # Path to the file you want to upload
37
+ file_path = filename
38
+
39
+ # Create a MediaFileUpload object to upload the file
40
+ media = MediaFileUpload(file_path, mimetype='text/plain')
41
+
42
+ # Use the Drive API to upload the file
43
+ file = service.files().create(
44
+ body=file_metadata,
45
+ media_body=media,
46
+ fields='id'
47
+ ).execute()
48
+
49
+ # Print the file ID of the uploaded file
50
+ print('Saved in Google Drive - File ID: %s' % file.get('id'))
web_scrapping_engine.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from scrappers.linkedin import get_linkedin_profile
3
+ from scrappers.company_website import full_company_website_exploration
4
+
5
+ def run_web_scrapping_engine(company_name = "sunday natural", folder_path = "data_dumpster", output_in_code = False):
6
+ path = f"{folder_path}/{company_name.replace(' ','-')}"
7
+ os.makedirs(path, exist_ok=True)
8
+ os.makedirs(f"{path}/pdf/", exist_ok=True)
9
+ print("Starting Linkedin gathering...")
10
+ linkedin = get_linkedin_profile(company_name, folder_path=path)
11
+ print("Linkedin Done!\n========> Starting now company website gathering...")
12
+ content = full_company_website_exploration(company_name, folder_path=path)
13
+ content.append(linkedin)
14
+ if output_in_code:
15
+ return content
16
+
17
+