Spaces:
Sleeping
Sleeping
Create industry_research.py
Browse files- industry_research.py +37 -0
industry_research.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from bs4 import BeautifulSoup
|
3 |
+
from transformers import pipeline
|
4 |
+
import re
|
5 |
+
|
6 |
+
# Initialize NLP model for text summarization
|
7 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
8 |
+
|
9 |
+
def get_company_info(url):
|
10 |
+
try:
|
11 |
+
response = requests.get(url, timeout=5)
|
12 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
13 |
+
|
14 |
+
mission = soup.find('div', {'class': 'mission'}).get_text(strip=True) if soup.find('div', {'class': 'mission'}) else "Mission not found"
|
15 |
+
description = soup.find('meta', {'name': 'description'})['content'] if soup.find('meta', {'name': 'description'}) else "Description not found"
|
16 |
+
|
17 |
+
summary = summarizer(description, max_length=30, min_length=10, do_sample=False)[0]['summary_text']
|
18 |
+
|
19 |
+
industry = "Unknown"
|
20 |
+
if re.search(r'\b(finance|banking|insurance)\b', summary, re.I):
|
21 |
+
industry = "Finance"
|
22 |
+
elif re.search(r'\b(retail|ecommerce|shopping)\b', summary, re.I):
|
23 |
+
industry = "Retail"
|
24 |
+
elif re.search(r'\b(manufacturing|production|supply chain)\b', summary, re.I):
|
25 |
+
industry = "Manufacturing"
|
26 |
+
|
27 |
+
focus_areas = []
|
28 |
+
if "customer" in summary:
|
29 |
+
focus_areas.append("customer experience")
|
30 |
+
if "operation" in summary or "supply chain" in summary:
|
31 |
+
focus_areas.append("operations")
|
32 |
+
|
33 |
+
return {"mission": mission, "industry": industry, "focus_areas": focus_areas}
|
34 |
+
|
35 |
+
except Exception as e:
|
36 |
+
print(f"Error fetching company info: {e}")
|
37 |
+
return {"mission": "N/A", "industry": "N/A", "focus_areas": []}
|