acecalisto3
/

InstructiPhi

Inference Endpoints

8-bit precision

Model card Files Files and versions Community

acecalisto3 commited on May 17

Commit

18caca1

•

1 Parent(s): ca75e75

Create handler.py

Files changed (1) hide show

handler.py +88 -0

handler.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import json
+import logging
+import datetime
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Load configuration settings from a separate file (config.json)
+# Example configuration file:
+# {
+#   "model_name": "acecalisto3/InstructiPhi",
+#   "max_length": 50,
+#   "logging_level": "INFO"
+# }
+try:
+    with open('config.json') as f:
+        config = json.load(f)
+except FileNotFoundError:
+    logger.error("Configuration file 'config.json' not found. Using default settings.")
+    config = {
+        "model_name": "acecalisto3/InstructiPhi",  # Default model name
+        "max_length": 50,                         # Default max length
+        "logging_level": "INFO"                   # Default logging level
+    }
+# Load model and tokenizer
+model_name = config["model_name"]
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+# Set logging level from configuration
+logging.basicConfig(level=config["logging_level"])
+def handle_request(event, context):
+    """Handles incoming requests to the deployed model.
+    Args:
+        event: The event data from the deployment platform.
+        context: The context data from the deployment platform.
+    Returns:
+        A dictionary containing the response status code and body.
+    """
+    try:
+        # Extract input text from the event
+        input_text = event.get('body')
+        if not input_text:
+            return {
+                'statusCode': 400,
+                'body': json.dumps({'error': 'Missing input text'})
+            }
+        # Input validation: Check length
+        if len(input_text) > 1000:  # Set a reasonable limit
+            return {
+                'statusCode': 400,
+                'body': json.dumps({'error': 'Input text is too long'})
+            }
+        # Tokenize the input text
+        input_ids = tokenizer(input_text, return_tensors="pt").input_ids
+        # Generate the response using the model
+        output = model.generate(input_ids, max_length=config["max_length"])
+        # Decode the generated response
+        generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
+        # Return a successful response with structured output
+        return {
+            'statusCode': 200,
+            'body': json.dumps({
+                'response': generated_text,
+                'model': model_name,  # Include model name in the output
+                'timestamp': datetime.datetime.now().isoformat()
+            })
+        }
+    except Exception as e:
+        # Log the error with more context
+        logger.error(f"Error processing request: {e}, input: {input_text}")
+        return {
+            'statusCode': 500,
+            'body': json.dumps({'error': 'Internal server error'})
+        }