|
from fastapi import FastAPI |
|
from fastapi.staticfiles import StaticFiles |
|
from fastapi.responses import FileResponse |
|
from transformers import AutoTokenizer, AutoModel |
|
import numpy as np |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
from pydantic import BaseModel |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained('allenai/specter') |
|
model = AutoModel.from_pretrained('allenai/specter') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Input(BaseModel): |
|
papers: list = [] |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
|
|
@app.post('/similarity') |
|
def similarity(input: Input): |
|
papers = input.papers |
|
title_abs = [d['title'] + tokenizer.sep_token + (d.get('abstract') or '') for d in papers] |
|
|
|
inputs = tokenizer(title_abs, padding=True, truncation=True, return_tensors="pt", max_length=512) |
|
result = model(**inputs) |
|
|
|
embeddings = result.last_hidden_state[:, 0, :].detach().numpy() |
|
res = cosine_similarity(embeddings, embeddings).tolist() |
|
return {"output": res} |
|
|
|
|
|
app.mount("/", StaticFiles(directory="static", html=True), name="static") |
|
|
|
@app.get("/") |
|
def index() -> FileResponse: |
|
return FileResponse(path="/app/static/index.html", media_type="text/html") |