from fastapi import FastAPI from typing import List, Dict import pandas as pd import requests from bs4 import BeautifulSoup app = FastAPI() # Global variable to store the dataset kali_tools_df = None def scrape_kali_tools(base_url: str = "https://www.kali.org/tools/") -> pd.DataFrame: """ Scrapes the Kali Linux tools documentation page and returns a structured dataset. Parameters: - base_url: The URL of the Kali Linux tools documentation. Returns: - Pandas DataFrame containing tool names, descriptions, and links. """ response = requests.get(base_url) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") # Extract tool names and descriptions tools = [] for tool in soup.select(".tools--index__item"): name = tool.select_one(".tools--index__title").get_text(strip=True) description = tool.select_one(".tools--index__description").get_text(strip=True) link = tool.find("a", href=True)["href"] tools.append({"name": name, "description": description, "link": link}) # Convert to DataFrame return pd.DataFrame(tools) @app.get("/scrape_kali_tools/") def scrape_kali_tools_endpoint(): """ Scrapes the Kali Linux tools documentation and stores it in memory. Returns: - Message indicating the dataset has been created. """ global kali_tools_df kali_tools_df = scrape_kali_tools() return {"message": f"Scraped {len(kali_tools_df)} tools from Kali Linux documentation."} @app.get("/get_kali_tools/") def get_kali_tools(start: int = 0, limit: int = 10) -> List[Dict]: """ Fetches a chunk of the Kali tools dataset. Parameters: - start: Starting index of the tools to fetch. - limit: Number of tools to return. Returns: - A list of tools with their names, descriptions, and links. """ if kali_tools_df is None: return {"error": "Dataset not yet scraped. Call /scrape_kali_tools first."} return kali_tools_df.iloc[start:start + limit].to_dict(orient="records") @app.get("/search_kali_tools/") def search_kali_tools(keyword: str) -> List[Dict]: """ Searches the Kali tools dataset for a specific keyword. Parameters: - keyword: Keyword to search in tool names or descriptions. Returns: - A list of tools matching the keyword. """ if kali_tools_df is None: return {"error": "Dataset not yet scraped. Call /scrape_kali_tools first."} results = kali_tools_df[ kali_tools_df["name"].str.contains(keyword, case=False) | kali_tools_df["description"].str.contains(keyword, case=False) ] return results.to_dict(orient="records")