Spaces:

wesslen
/

fastapi-spacy-matcher

Sleeping

File size: 1,697 Bytes

from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
# from pydantic import BaseModel
import spacy
from spacy.matcher import Matcher

app = FastAPI()
nlp = spacy.load("en_core_web_sm")
matcher = Matcher(nlp.vocab)

# Define a Pydantic model for the request body
# class TextInput(BaseModel):
#     text: str

@app.get("/score_text")
def score_text(text_input):
    """Endpoint to score text for uncertain statements using spaCy Matcher."""
    # Load the text into spaCy's nlp object
    doc = nlp(text_input)

    # Define spaCy Matcher patterns for uncertain statements
    pattern1 = [{"IS_ALPHA": True, "OP": "?"}, {"ORTH": "may"}, {"IS_ALPHA": True, "OP": "?"}]
    pattern2 = [{"IS_ALPHA": True, "OP": "?"}, {"ORTH": "might"}, {"IS_ALPHA": True, "OP": "?"}]
    pattern3 = [{"IS_ALPHA": True, "OP": "?"}, {"ORTH": "could"}, {"IS_ALPHA": True, "OP": "?"}]

    # Add the patterns to the Matcher
    matcher.add("UNCERTAIN_STATEMENT", [pattern1, pattern2, pattern3])

    # Use the Matcher to find matches in the text
    matches = matcher(doc)

    # Extract matched spans and their associated text
    uncertain_statements = [doc[start:end].text for _, start, end in matches]

    # Calculate the score as the ratio of uncertain statements to the total number of sentences
    score = len(uncertain_statements) / len(list(doc.sents))

    # Return the score
    return {"score": score, "uncertain_statements": uncertain_statements}

app.mount("/", StaticFiles(directory="static", html=True), name="static")

@app.get("/")
def index() -> FileResponse:
    return FileResponse(path="/app/static/index.html", media_type="text/html")