Canstralian
/

RabbitRedux

Transformers

English

code

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Canstralian commited on Nov 14, 2024

Commit

ed06dec

verified ·

1 Parent(s): 787a425

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -26

app.py CHANGED Viewed

@@ -1,53 +1,80 @@
-import os
-import logging
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
-from transformers import AutoAdapterModel, AutoTokenizer
-# Initialize the app
-app = FastAPI()
 logging.basicConfig(level=logging.INFO)
-# Load model and tokenizer once on startup
-MODEL_NAME = os.getenv("MODEL_NAME", "bert-base-uncased")  # Set default model
-ADAPTER_NAME = os.getenv("ADAPTER_NAME", "Canstralian/RabbitRedux")  # Adapter name
-try:
-    logging.info("Loading model and adapter...")
-    model = AutoAdapterModel.from_pretrained(MODEL_NAME)
-    model.load_adapter(ADAPTER_NAME, set_active=True)
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-    logging.info("Model and adapter loaded successfully.")
-except Exception as e:
-    logging.error("Error loading model or adapter:", exc_info=True)
-    raise RuntimeError("Model or adapter loading failed.") from e
-# Define request and response data structures
 class PredictionRequest(BaseModel):
     text: str
 class PredictionResponse(BaseModel):
     text: str
     prediction: str
-# Endpoint for inference
 @app.post("/predict", response_model=PredictionResponse)
 async def predict(request: PredictionRequest):
     try:
-        # Tokenize input text
         inputs = tokenizer(request.text, return_tensors="pt")
-        # Perform inference
         outputs = model(**inputs)
-        # Generate predicted text or classification (customize as needed)
         prediction = tokenizer.decode(outputs.logits.argmax(-1)[0], skip_special_tokens=True)
         return PredictionResponse(text=request.text, prediction=prediction)
     except Exception as e:
-        logging.error("Error during prediction:", exc_info=True)
         raise HTTPException(status_code=500, detail="Prediction failed")
-# Health check endpoint
 @app.get("/health")
 async def health_check():
     return {"status": "healthy"}

+import torch
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+import logging
+import json
+import os
+# Set up logging configuration
 logging.basicConfig(level=logging.INFO)
+# Initialize the FastAPI app
+app = FastAPI()
+# Load the trained model (adjust the path to your saved model)
+model = torch.load("path/to/your/model.pth", map_location=torch.device("cpu"))  # Replace with your actual model path
+model.eval()
+# Define the input and output format for prediction requests
 class PredictionRequest(BaseModel):
+    """
+    Data model for the prediction request.
+    Attributes:
+        text (str): Input text for model inference.
+    """
     text: str
 class PredictionResponse(BaseModel):
+    """
+    Data model for the prediction response.
+    Attributes:
+        text (str): The original input text.
+        prediction (str): The predicted result from the model.
+    """
     text: str
     prediction: str
+# Define prediction endpoint
 @app.post("/predict", response_model=PredictionResponse)
 async def predict(request: PredictionRequest):
+    """
+    Endpoint for generating a prediction based on input text.
+    Args:
+        request (PredictionRequest): The request body containing the input text.
+    Returns:
+        PredictionResponse: The response body containing the original text and prediction.
+    Raises:
+        HTTPException: If any error occurs during the prediction process.
+    """
     try:
+        # Tokenize the input text (assuming you're using a tokenizer for text inputs)
         inputs = tokenizer(request.text, return_tensors="pt")
+        # Perform inference with the model
         outputs = model(**inputs)
+        # Get the predicted token and decode it back to text
         prediction = tokenizer.decode(outputs.logits.argmax(-1)[0], skip_special_tokens=True)
+        # Return the prediction response
         return PredictionResponse(text=request.text, prediction=prediction)
     except Exception as e:
+        logging.error("Error during prediction", exc_info=True)
         raise HTTPException(status_code=500, detail="Prediction failed")
+# Define health check endpoint
 @app.get("/health")
 async def health_check():
+    """
+    Health check endpoint to verify if the service is up and running.
+    Returns:
+        dict: A dictionary containing the status of the service.
+    """
+    logging.info("Health check requested.")
     return {"status": "healthy"}