derek-thomas HF staff commited on
Commit
944ee1c
·
1 Parent(s): e5be074

Adding better error message, and count down.

Browse files
Files changed (1) hide show
  1. backend/query_llm.py +28 -3
backend/query_llm.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  from os import getenv
3
 
@@ -19,7 +20,11 @@ def call_jais(payload):
19
  response.raise_for_status() # This will raise an exception for HTTP error codes
20
  return response.json()
21
  except requests.exceptions.HTTPError as http_err:
22
- raise gr.Error(f"An error occurred while processing the request. {http_err}")
 
 
 
 
23
  except Exception as err:
24
  raise gr.Error(f"Check Inference Endpoint Status. An error occurred while processing the request. {err}")
25
 
@@ -30,8 +35,14 @@ def generate(prompt: str):
30
  return response
31
 
32
 
 
 
 
 
 
33
  def check_endpoint_status():
34
- # Replace with the actual API URL and headers
 
35
  api_url = os.getenv("ENDPOINT_URL")
36
  headers = {
37
  'accept': 'application/json',
@@ -47,6 +58,20 @@ def check_endpoint_status():
47
  status = data.get('status', {}).get('state', 'No status found')
48
  message = data.get('status', {}).get('message', 'No message found')
49
 
50
- return f"Status: {status}\nMessage: {message}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  except requests.exceptions.RequestException as e:
52
  return f"Failed to get status: {str(e)}"
 
1
+ import datetime
2
  import os
3
  from os import getenv
4
 
 
20
  response.raise_for_status() # This will raise an exception for HTTP error codes
21
  return response.json()
22
  except requests.exceptions.HTTPError as http_err:
23
+ # Check if the error is a 5XX server error
24
+ if 500 <= http_err.response.status_code < 600:
25
+ raise gr.Error("The endpoint is loading, it takes about 4 min from the first call.")
26
+ else:
27
+ raise gr.Error(f"An error occurred while processing the request. {http_err}")
28
  except Exception as err:
29
  raise gr.Error(f"Check Inference Endpoint Status. An error occurred while processing the request. {err}")
30
 
 
35
  return response
36
 
37
 
38
+ # Global variable to store the previous status and the time when it changed
39
+ previous_status = None
40
+ status_change_time = None
41
+
42
+
43
  def check_endpoint_status():
44
+ global previous_status, status_change_time
45
+
46
  api_url = os.getenv("ENDPOINT_URL")
47
  headers = {
48
  'accept': 'application/json',
 
58
  status = data.get('status', {}).get('state', 'No status found')
59
  message = data.get('status', {}).get('message', 'No message found')
60
 
61
+ # Check if the status has changed
62
+ if status != previous_status:
63
+ previous_status = status
64
+ status_change_time = datetime.datetime.now()
65
+
66
+ # If the previous status was 'scaled to zero' and the current one isn't,
67
+ # start the countdown
68
+ countdown_message = ""
69
+ if status_change_time and previous_status == "scaled to zero" and status != "scaled to zero":
70
+ elapsed_time = datetime.datetime.now() - status_change_time
71
+ if elapsed_time < datetime.timedelta(minutes=4):
72
+ remaining_time = datetime.timedelta(minutes=4) - elapsed_time
73
+ countdown_message = f"Countdown: {remaining_time} remaining until fully operational."
74
+
75
+ return f"Status: {status}\nMessage: {message}\n{countdown_message}"
76
  except requests.exceptions.RequestException as e:
77
  return f"Failed to get status: {str(e)}"