Deepinfra

Running

App Files Files Community

API-Handler commited on Nov 17, 2024

Commit

4e2263c

•

1 Parent(s): b95d0b7

Upload 5 files

Browse files

Files changed (5) hide show

Dockerfile +20 -0
deepinfra_handler.py +65 -0
inference.py +133 -0
main.py +53 -0
requirements.txt +4 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+# Use an official Python runtime as the base image
+FROM python:3.9-slim
+# Set the working directory in the container
+WORKDIR /app
+# Copy the requirements file into the container
+COPY requirements.txt .
+# Install the required packages
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application code into the container
+COPY . .
+# Expose the port that FastAPI will run on
+EXPOSE 7860
+# Command to run the FastAPI application
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

deepinfra_handler.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import json
+import requests
+from typing import Dict, Any, Generator, Optional
+class DeepInfraHandler:
+    API_URL = "https://api.deepinfra.com/v1/openai/chat/completions"
+    def __init__(self):
+        self.headers = {
+            "Accept": "text/event-stream",
+            "Accept-Encoding": "gzip, deflate, br, zstd",
+            "Content-Type": "application/json",
+            "Connection": "keep-alive",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
+        }
+    def _prepare_payload(self, **kwargs) -> Dict[str, Any]:
+        """Prepare the payload for the API request"""
+        return {
+            "model": kwargs.get("model"),
+            "messages": kwargs.get("messages"),
+            "temperature": kwargs.get("temperature", 0.7),
+            "max_tokens": kwargs.get("max_tokens", 4096),
+            "top_p": kwargs.get("top_p", 1.0),
+            "frequency_penalty": kwargs.get("frequency_penalty", 0.0),
+            "presence_penalty": kwargs.get("presence_penalty", 0.0),
+            "stop": kwargs.get("stop", []),
+            "stream": kwargs.get("stream", False)
+        }
+    def generate_completion(self, **kwargs) -> Any:
+        """Generate completion based on streaming preference"""
+        payload = self._prepare_payload(**kwargs)
+        response = requests.post(
+            self.API_URL,
+            headers=self.headers,
+            json=payload,
+            stream=payload["stream"]
+        )
+        if payload["stream"]:
+            return self._handle_streaming_response(response)
+        return self._handle_regular_response(response)
+    def _handle_streaming_response(self, response) -> Generator[str, None, None]:
+        """Handle streaming response from the API"""
+        for line in response.iter_lines(decode_unicode=True):
+            if line.startswith("data:"):
+                try:
+                    content = json.loads(line[5:])
+                    if content == "[DONE]":
+                        continue
+                    delta_content = content.get("choices", [{}])[0].get("delta", {}).get("content")
+                    if delta_content:
+                        yield delta_content
+                except:
+                    continue
+    def _handle_regular_response(self, response) -> Dict[str, Any]:
+        """Handle regular (non-streaming) response from the API"""
+        try:
+            return response.json()
+        except Exception as e:
+            raise Exception(f"Error processing response: {str(e)}")

inference.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import requests
+import json
+from typing import Union, Dict, Generator
+import time
+class ChatCompletionTester:
+    def __init__(self, base_url: str = "http://localhost:8000"):
+        self.base_url = base_url
+        self.endpoint = f"{base_url}/chat/completions"
+    def create_test_payload(self, stream: bool = False) -> Dict:
+        """Create a sample payload for testing"""
+        return {
+            "model": "mistralai/Mixtral-8x22B-Instruct-v0.1",
+            "messages": [
+                {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": "What is the capital of France?"}
+            ],
+            "temperature": 0.7,
+            "max_tokens": 4096,
+            "stream": stream
+        }
+    def test_non_streaming(self) -> Union[Dict, None]:
+        """Test non-streaming response"""
+        print("\n=== Testing Non-Streaming Response ===")
+        try:
+            payload = self.create_test_payload(stream=False)
+            print("Sending request...")
+            response = requests.post(
+                self.endpoint,
+                json=payload,
+                headers={"Content-Type": "application/json"}
+            )
+            if response.status_code == 200:
+                result = response.json()
+                content = result['choices'][0]['message']['content']
+                print("\nResponse received successfully!")
+                print(f"Content: {content}")
+                return result
+            else:
+                print(f"Error: Status code {response.status_code}")
+                print(f"Response: {response.text}")
+                return None
+        except Exception as e:
+            print(f"Error during non-streaming test: {str(e)}")
+            return None
+    def test_streaming(self) -> Union[str, None]:
+        """Test streaming response"""
+        print("\n=== Testing Streaming Response ===")
+        try:
+            payload = self.create_test_payload(stream=True)
+            print("Sending request...")
+            response = requests.post(
+                self.endpoint,
+                json=payload,
+                headers={"Content-Type": "application/json"},
+                stream=True
+            )
+            if response.status_code == 200:
+                print("\nReceiving streaming response:")
+                full_response = ""
+                for line in response.iter_lines(decode_unicode=True):
+                    if line:
+                        if line.startswith("data: "):
+                            try:
+                                data = json.loads(line[6:])
+                                if data == "[DONE]":
+                                    continue
+                                content = data.get("choices", [{}])[0].get("delta", {}).get("content", "")
+                                if content:
+                                    print(content, end="", flush=True)
+                                    full_response += content
+                            except json.JSONDecodeError:
+                                continue
+                print("\n\nStreaming completed!")
+                return full_response
+            else:
+                print(f"Error: Status code {response.status_code}")
+                print(f"Response: {response.text}")
+                return None
+        except Exception as e:
+            print(f"Error during streaming test: {str(e)}")
+            return None
+    def run_all_tests(self):
+        """Run both streaming and non-streaming tests"""
+        print("Starting API endpoint tests...")
+        # Test server connectivity
+        try:
+            requests.get(self.base_url)
+            print("✓ Server is accessible")
+        except requests.exceptions.ConnectionError:
+            print("✗ Server is not accessible. Please ensure the FastAPI server is running.")
+            return
+        # Run tests with timing
+        start_time = time.time()
+        # Test non-streaming
+        non_streaming_result = self.test_non_streaming()
+        if non_streaming_result:
+            print("✓ Non-streaming test passed")
+        else:
+            print("✗ Non-streaming test failed")
+        # Test streaming
+        streaming_result = self.test_streaming()
+        if streaming_result:
+            print("✓ Streaming test passed")
+        else:
+            print("✗ Streaming test failed")
+        end_time = time.time()
+        print(f"\nAll tests completed in {end_time - start_time:.2f} seconds")
+def main():
+    # Create tester instance
+    tester = ChatCompletionTester()
+    # Run all tests
+    tester.run_all_tests()
+if __name__ == "__main__":
+    main()

main.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from fastapi import FastAPI, HTTPException
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel, Field
+from typing import List, Optional, Any, Dict
+from deepinfra_handler import DeepInfraHandler
+import json
+app = FastAPI()
+api_handler = DeepInfraHandler()
+class Message(BaseModel):
+    role: str
+    content: str
+class ChatCompletionRequest(BaseModel):
+    model: str
+    messages: List[Message]
+    temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0)
+    max_tokens: Optional[int] = Field(default=4096, ge=1)
+    top_p: Optional[float] = Field(default=1.0, ge=0.0, le=1.0)
+    frequency_penalty: Optional[float] = Field(default=0.0, ge=-2.0, le=2.0)
+    presence_penalty: Optional[float] = Field(default=0.0, ge=-2.0, le=2.0)
+    stop: Optional[List[str]] = Field(default=[])
+    stream: Optional[bool] = Field(default=False)
+@app.post("/chat/completions")
+async def chat_completions(request: ChatCompletionRequest):
+    try:
+        # Convert request to dictionary
+        params = request.dict()
+        if request.stream:
+            # Handle streaming response
+            def generate():
+                for chunk in api_handler.generate_completion(**params):
+                    yield f"data: {json.dumps({'choices': [{'delta': {'content': chunk}}]})}\n\n"
+                yield "data: [DONE]\n\n"
+            return StreamingResponse(
+                generate(),
+                media_type="text/event-stream"
+            )
+        # Handle regular response
+        response = api_handler.generate_completion(**params)
+        return response
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+fastapi>=0.104.1
+uvicorn>=0.24.0
+requests>=2.31.0
+pydantic>=2.5.2