nbroad HF staff commited on
Commit
76a9232
1 Parent(s): 107c802

inference client

Browse files
Files changed (1) hide show
  1. app.py +109 -0
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from datetime import datetime
4
+ from typing import List, Dict
5
+
6
+ import requests
7
+ from fastapi import FastAPI, HTTPException
8
+ from fastapi.staticfiles import StaticFiles
9
+ from fastapi.responses import FileResponse
10
+ from pydantic import BaseModel
11
+ import plotly.graph_objs as go
12
+ from apscheduler.schedulers.asyncio import AsyncIOScheduler
13
+
14
+ from huggingface_hub import AsyncInferenceClient
15
+
16
+ app = FastAPI()
17
+
18
+ # Configuration
19
+ models = [
20
+ "meta-llama/Meta-Llama-3.1-8B-Instruct",
21
+ "meta-llama/Meta-Llama-3.1-70B-Instruct",
22
+ "meta-llama/Meta-Llama-3-8B-Instruct",
23
+ "meta-llama/Meta-Llama-3-70B-Instruct",
24
+ "meta-llama/Llama-Guard-3-8B",
25
+ "meta-llama/Llama-2-7b-chat-hf",
26
+ "meta-llama/Llama-2-13b-chat-hf",
27
+ "deepseek-ai/DeepSeek-Coder-V2-Instruct",
28
+ "mistralai/Mistral-7B-Instruct-v0.3",
29
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
30
+ ]
31
+ LOG_FILE = "api_logs.json"
32
+ CHECK_INTERVAL = 60 # 1 minute
33
+
34
+
35
+ client = AsyncInferenceClient(token=os.environ["HF_INFERENCE_API_TOKEN"])
36
+
37
+ # Ensure log file exists
38
+ if not os.path.exists(LOG_FILE):
39
+ with open(LOG_FILE, "w") as f:
40
+ json.dump([], f)
41
+
42
+ class LogEntry(BaseModel):
43
+ model: str
44
+ success: bool
45
+ timestamp: str
46
+ status_code: int
47
+
48
+ async def check_apis():
49
+ results = []
50
+ for model in models:
51
+ try:
52
+ response = await client.chat_completion(
53
+ messages=[{"role": "user", "content": "What is the capital of France?"}],
54
+ max_tokens=10,
55
+ )
56
+ success = response.status_code == 200
57
+ except requests.RequestException:
58
+ success = False
59
+
60
+ results.append(LogEntry(
61
+ model=model,
62
+ success=success,
63
+ timestamp=datetime.now().isoformat(),
64
+ status_code=response.status_code
65
+ ))
66
+
67
+ with open(LOG_FILE, "r+") as f:
68
+ logs = json.load(f)
69
+ logs.extend([result.dict() for result in results])
70
+ f.seek(0)
71
+ json.dump(logs, f)
72
+
73
+ @app.on_event("startup")
74
+ async def start_scheduler():
75
+ scheduler = AsyncIOScheduler()
76
+ scheduler.add_job(check_apis, 'interval', minutes=1)
77
+ scheduler.start()
78
+
79
+ @app.get("/")
80
+ async def index():
81
+ return FileResponse("static/index.html")
82
+
83
+ @app.get("/api/logs", response_model=List[LogEntry])
84
+ async def get_logs():
85
+ with open(LOG_FILE, "r") as f:
86
+ logs = json.load(f)
87
+ return logs
88
+
89
+ @app.get("/api/chart-data", response_model=Dict[str, Dict[str, List]])
90
+ async def get_chart_data():
91
+ with open(LOG_FILE, "r") as f:
92
+ logs = json.load(f)
93
+
94
+ chart_data = {}
95
+ for log in logs:
96
+ model = log['model']
97
+ if model not in chart_data:
98
+ chart_data[model] = {'x': [], 'y': []}
99
+ chart_data[model]['x'].append(log['timestamp'])
100
+ chart_data[model]['y'].append(1 if log['success'] else 0)
101
+
102
+ return chart_data
103
+
104
+ # Mount the static files directory
105
+ app.mount("/static", StaticFiles(directory="static"), name="static")
106
+
107
+ if __name__ == "__main__":
108
+ import uvicorn
109
+ uvicorn.run(app, host="0.0.0.0", port=7860)