from fastapi import FastAPI, HTTPException, Query from pydantic import BaseModel import hrequests import trafilatura from fastapi.middleware.cors import CORSMiddleware app = FastAPI() class URLRequest(BaseModel): url: str @app.post("/scrape") async def scrape(url_request: URLRequest): try: response = hrequests.get(url_request.url, browser='chrome') return {"content": response.text} except Exception as e: raise e @app.get("/extract-article") def extract_article( url: str, record_id: Optional[str] = Query(None, description="Add an ID to the metadata."), no_fallback: Optional[bool] = Query(False, description="Skip the backup extraction with readability-lxml and justext."), favor_precision: Optional[bool] = Query(False, description="Prefer less text but correct extraction."), favor_recall: Optional[bool] = Query(False, description="When unsure, prefer more text."), include_comments: Optional[bool] = Query(True, description="Extract comments along with the main text."), output_format: Optional[str] = Query('txt', description="Define an output format: 'csv', 'json', 'markdown', 'txt', 'xml', 'xmltei'.", enum=["csv", "json", "markdown", "txt", "xml", "xmltei"]), target_language: Optional[str] = Query(None, description="Define a language to discard invalid documents (ISO 639-1 format)."), include_tables: Optional[bool] = Query(True, description="Take into account information within the HTML