import requests from bs4 import BeautifulSoup from transformers import pipeline import re # Initialize NLP model for text summarization summarizer = pipeline("summarization", model="facebook/bart-large-cnn") def get_company_info(url): try: response = requests.get(url, timeout=5) soup = BeautifulSoup(response.text, 'html.parser') mission = soup.find('div', {'class': 'mission'}).get_text(strip=True) if soup.find('div', {'class': 'mission'}) else "Mission not found" description = soup.find('meta', {'name': 'description'})['content'] if soup.find('meta', {'name': 'description'}) else "Description not found" summary = summarizer(description, max_length=30, min_length=10, do_sample=False)[0]['summary_text'] industry = "Unknown" if re.search(r'\b(finance|banking|insurance)\b', summary, re.I): industry = "Finance" elif re.search(r'\b(retail|ecommerce|shopping)\b', summary, re.I): industry = "Retail" elif re.search(r'\b(manufacturing|production|supply chain)\b', summary, re.I): industry = "Manufacturing" focus_areas = [] if "customer" in summary: focus_areas.append("customer experience") if "operation" in summary or "supply chain" in summary: focus_areas.append("operations") return {"mission": mission, "industry": industry, "focus_areas": focus_areas} except Exception as e: print(f"Error fetching company info: {e}") return {"mission": "N/A", "industry": "N/A", "focus_areas": []}