SmartCaseAI / industry_research.py
Hyma7's picture
Create industry_research.py
5bd3e9f verified
raw
history blame
1.63 kB
import requests
from bs4 import BeautifulSoup
from transformers import pipeline
import re
# Initialize NLP model for text summarization
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def get_company_info(url):
try:
response = requests.get(url, timeout=5)
soup = BeautifulSoup(response.text, 'html.parser')
mission = soup.find('div', {'class': 'mission'}).get_text(strip=True) if soup.find('div', {'class': 'mission'}) else "Mission not found"
description = soup.find('meta', {'name': 'description'})['content'] if soup.find('meta', {'name': 'description'}) else "Description not found"
summary = summarizer(description, max_length=30, min_length=10, do_sample=False)[0]['summary_text']
industry = "Unknown"
if re.search(r'\b(finance|banking|insurance)\b', summary, re.I):
industry = "Finance"
elif re.search(r'\b(retail|ecommerce|shopping)\b', summary, re.I):
industry = "Retail"
elif re.search(r'\b(manufacturing|production|supply chain)\b', summary, re.I):
industry = "Manufacturing"
focus_areas = []
if "customer" in summary:
focus_areas.append("customer experience")
if "operation" in summary or "supply chain" in summary:
focus_areas.append("operations")
return {"mission": mission, "industry": industry, "focus_areas": focus_areas}
except Exception as e:
print(f"Error fetching company info: {e}")
return {"mission": "N/A", "industry": "N/A", "focus_areas": []}