Canstralian
/

RabbitRedux

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Canstralian commited on Nov 9, 2024

Commit

3e66b84

·

verified ·

1 Parent(s): d84922a

Create app.py

Files changed (1) hide show

app.py +53 -0

app.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import os
+import logging
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from transformers import AutoAdapterModel, AutoTokenizer
+# Initialize the app
+app = FastAPI()
+logging.basicConfig(level=logging.INFO)
+# Load model and tokenizer once on startup
+MODEL_NAME = os.getenv("MODEL_NAME", "bert-base-uncased")  # Set default model
+ADAPTER_NAME = os.getenv("ADAPTER_NAME", "Canstralian/RabbitRedux")  # Adapter name
+try:
+    logging.info("Loading model and adapter...")
+    model = AutoAdapterModel.from_pretrained(MODEL_NAME)
+    model.load_adapter(ADAPTER_NAME, set_active=True)
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    logging.info("Model and adapter loaded successfully.")
+except Exception as e:
+    logging.error("Error loading model or adapter:", exc_info=True)
+    raise RuntimeError("Model or adapter loading failed.") from e
+# Define request and response data structures
+class PredictionRequest(BaseModel):
+    text: str
+class PredictionResponse(BaseModel):
+    text: str
+    prediction: str
+# Endpoint for inference
+@app.post("/predict", response_model=PredictionResponse)
+async def predict(request: PredictionRequest):
+    try:
+        # Tokenize input text
+        inputs = tokenizer(request.text, return_tensors="pt")
+        # Perform inference
+        outputs = model(**inputs)
+        # Generate predicted text or classification (customize as needed)
+        prediction = tokenizer.decode(outputs.logits.argmax(-1)[0], skip_special_tokens=True)
+        return PredictionResponse(text=request.text, prediction=prediction)
+    except Exception as e:
+        logging.error("Error during prediction:", exc_info=True)
+        raise HTTPException(status_code=500, detail="Prediction failed")
+# Health check endpoint
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy"}