|
from fastapi import FastAPI, File, UploadFile |
|
from fastapi.responses import HTMLResponse, JSONResponse |
|
|
|
from starlette.requests import Request |
|
from fastapi.templating import Jinja2Templates |
|
from fastapi.middleware.cors import CORSMiddleware |
|
from pydantic import * |
|
|
|
from module import config, transformers_utility as tr, utils, metrics, dataio |
|
|
|
import numpy as np |
|
|
|
app = FastAPI() |
|
app.add_middleware( |
|
CORSMiddleware, |
|
allow_origins=["*"], |
|
allow_credentials=True, |
|
allow_methods=["*"], |
|
allow_headers=["*"], |
|
) |
|
|
|
templates = Jinja2Templates(directory="templates") |
|
|
|
|
|
TOKENIZER_DIR = config.models / "byte-level-bpe-tokenizer" |
|
PRETRAINED_MODEL = config.models / "transformer" / "prediction-model" / "saved_model.pth" |
|
DATA_DIR = config.data |
|
|
|
def load_model(args, settings): |
|
return tr.load_model( |
|
args.model_name, |
|
args.tokenizer_dir, |
|
pretrained_model=args.pretrained_model, |
|
log_offset=args.log_offset, |
|
**settings, |
|
) |
|
|
|
@app.get("/", response_class=HTMLResponse) |
|
def read_root(request: Request): |
|
return templates.TemplateResponse("index.html", {"request": request}) |
|
|
|
@app.post("/uploadfile/") |
|
async def create_upload_file(file: UploadFile = File(...)): |
|
file_path = DATA_DIR / file.filename |
|
with open(file_path, "wb") as f: |
|
f.write(file.file.read()) |
|
return {"filename": file.filename} |
|
|
|
@app.get("/process/{filename}", response_class=HTMLResponse) |
|
def process_file(request: Request, filename: str): |
|
file_path = DATA_DIR / filename |
|
preds = main( |
|
data_dir=DATA_DIR, |
|
train_data=file_path, |
|
test_data=file_path, |
|
pretrained_model=PRETRAINED_MODEL, |
|
tokenizer_dir=TOKENIZER_DIR, |
|
model_name="roberta-pred-mean-pool", |
|
) |
|
predictions = [] |
|
for i in range(len(preds)): |
|
predictions.append([{"tissue": config.tissues[j], "prediction": preds[i][j] } for j in range(8)]) |
|
|
|
return JSONResponse(content=predictions) |
|
|
|
def main(data_dir: str, train_data: str, test_data: str, pretrained_model: str, tokenizer_dir: str, model_name: str): |
|
args = utils.get_args( |
|
data_dir=data_dir, |
|
train_data=train_data, |
|
test_data=test_data, |
|
pretrained_model=pretrained_model, |
|
tokenizer_dir=tokenizer_dir, |
|
model_name=model_name, |
|
) |
|
|
|
settings = utils.get_model_settings(config.settings, args) |
|
if args.output_mode: |
|
settings["output_mode"] = args.output_mode |
|
if args.tissue_subset is not None: |
|
settings["num_labels"] = len(args.tissue_subset) |
|
|
|
print("Loading model...") |
|
config_obj, tokenizer, model = load_model(args, settings) |
|
|
|
print("Loading data...") |
|
datasets = dataio.load_datasets( |
|
tokenizer, |
|
args.train_data, |
|
eval_data=args.eval_data, |
|
test_data=args.test_data, |
|
seq_key="text", |
|
file_type="text", |
|
filter_empty=args.filter_empty, |
|
shuffle=False, |
|
) |
|
dataset_test = datasets["train"] |
|
|
|
print("Getting predictions:") |
|
preds = np.exp(np.array(metrics.get_predictions(model, dataset_test))) - 1 |
|
|
|
|
|
|
|
|
|
|
|
return preds.tolist() |
|
|