Spaces:
Sleeping
Sleeping
import requests | |
from bs4 import BeautifulSoup | |
from transformers import pipeline | |
import re | |
# Initialize NLP model for text summarization | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
def get_company_info(url): | |
try: | |
response = requests.get(url, timeout=5) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
mission = soup.find('div', {'class': 'mission'}).get_text(strip=True) if soup.find('div', {'class': 'mission'}) else "Mission not found" | |
description = soup.find('meta', {'name': 'description'})['content'] if soup.find('meta', {'name': 'description'}) else "Description not found" | |
summary = summarizer(description, max_length=30, min_length=10, do_sample=False)[0]['summary_text'] | |
industry = "Unknown" | |
if re.search(r'\b(finance|banking|insurance)\b', summary, re.I): | |
industry = "Finance" | |
elif re.search(r'\b(retail|ecommerce|shopping)\b', summary, re.I): | |
industry = "Retail" | |
elif re.search(r'\b(manufacturing|production|supply chain)\b', summary, re.I): | |
industry = "Manufacturing" | |
focus_areas = [] | |
if "customer" in summary: | |
focus_areas.append("customer experience") | |
if "operation" in summary or "supply chain" in summary: | |
focus_areas.append("operations") | |
return {"mission": mission, "industry": industry, "focus_areas": focus_areas} | |
except Exception as e: | |
print(f"Error fetching company info: {e}") | |
return {"mission": "N/A", "industry": "N/A", "focus_areas": []} | |