Hyma7 commited on
Commit
5bd3e9f
·
verified ·
1 Parent(s): e614c71

Create industry_research.py

Browse files
Files changed (1) hide show
  1. industry_research.py +37 -0
industry_research.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ from transformers import pipeline
4
+ import re
5
+
6
+ # Initialize NLP model for text summarization
7
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
8
+
9
+ def get_company_info(url):
10
+ try:
11
+ response = requests.get(url, timeout=5)
12
+ soup = BeautifulSoup(response.text, 'html.parser')
13
+
14
+ mission = soup.find('div', {'class': 'mission'}).get_text(strip=True) if soup.find('div', {'class': 'mission'}) else "Mission not found"
15
+ description = soup.find('meta', {'name': 'description'})['content'] if soup.find('meta', {'name': 'description'}) else "Description not found"
16
+
17
+ summary = summarizer(description, max_length=30, min_length=10, do_sample=False)[0]['summary_text']
18
+
19
+ industry = "Unknown"
20
+ if re.search(r'\b(finance|banking|insurance)\b', summary, re.I):
21
+ industry = "Finance"
22
+ elif re.search(r'\b(retail|ecommerce|shopping)\b', summary, re.I):
23
+ industry = "Retail"
24
+ elif re.search(r'\b(manufacturing|production|supply chain)\b', summary, re.I):
25
+ industry = "Manufacturing"
26
+
27
+ focus_areas = []
28
+ if "customer" in summary:
29
+ focus_areas.append("customer experience")
30
+ if "operation" in summary or "supply chain" in summary:
31
+ focus_areas.append("operations")
32
+
33
+ return {"mission": mission, "industry": industry, "focus_areas": focus_areas}
34
+
35
+ except Exception as e:
36
+ print(f"Error fetching company info: {e}")
37
+ return {"mission": "N/A", "industry": "N/A", "focus_areas": []}