Spaces:
Runtime error
Runtime error
from fastapi import FastAPI | |
from typing import List, Dict | |
import pandas as pd | |
import requests | |
from bs4 import BeautifulSoup | |
app = FastAPI() | |
# Global variable to store the dataset | |
kali_tools_df = None | |
def scrape_kali_tools(base_url: str = "https://www.kali.org/tools/") -> pd.DataFrame: | |
""" | |
Scrapes the Kali Linux tools documentation page and returns a structured dataset. | |
Parameters: | |
- base_url: The URL of the Kali Linux tools documentation. | |
Returns: | |
- Pandas DataFrame containing tool names, descriptions, and links. | |
""" | |
response = requests.get(base_url) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, "html.parser") | |
# Extract tool names and descriptions | |
tools = [] | |
for tool in soup.select(".tools--index__item"): | |
name = tool.select_one(".tools--index__title").get_text(strip=True) | |
description = tool.select_one(".tools--index__description").get_text(strip=True) | |
link = tool.find("a", href=True)["href"] | |
tools.append({"name": name, "description": description, "link": link}) | |
# Convert to DataFrame | |
return pd.DataFrame(tools) | |
def scrape_kali_tools_endpoint(): | |
""" | |
Scrapes the Kali Linux tools documentation and stores it in memory. | |
Returns: | |
- Message indicating the dataset has been created. | |
""" | |
global kali_tools_df | |
kali_tools_df = scrape_kali_tools() | |
return {"message": f"Scraped {len(kali_tools_df)} tools from Kali Linux documentation."} | |
def get_kali_tools(start: int = 0, limit: int = 10) -> List[Dict]: | |
""" | |
Fetches a chunk of the Kali tools dataset. | |
Parameters: | |
- start: Starting index of the tools to fetch. | |
- limit: Number of tools to return. | |
Returns: | |
- A list of tools with their names, descriptions, and links. | |
""" | |
if kali_tools_df is None: | |
return {"error": "Dataset not yet scraped. Call /scrape_kali_tools first."} | |
return kali_tools_df.iloc[start:start + limit].to_dict(orient="records") | |
def search_kali_tools(keyword: str) -> List[Dict]: | |
""" | |
Searches the Kali tools dataset for a specific keyword. | |
Parameters: | |
- keyword: Keyword to search in tool names or descriptions. | |
Returns: | |
- A list of tools matching the keyword. | |
""" | |
if kali_tools_df is None: | |
return {"error": "Dataset not yet scraped. Call /scrape_kali_tools first."} | |
results = kali_tools_df[ | |
kali_tools_df["name"].str.contains(keyword, case=False) | | |
kali_tools_df["description"].str.contains(keyword, case=False) | |
] | |
return results.to_dict(orient="records") |