Use this endpoint to get the status of a development deployment’s async queue.
import requests
import os
model_id = ""
# Read secrets from environment variables
baseten_api_key = os.environ["BASETEN_API_KEY"]
resp = requests.get(
f"https://model-{model_id}.api.baseten.co/development/async_queue_status",
headers={"Authorization": f"Api-Key {baseten_api_key}"}
)
print(resp.json())
{
"model_id": "<string>",
"deployment_id": "<string>",
"num_queued_requests": 12,
"num_in_progress_requests": 3
}
Api-Key
(e.g. {"Authorization": "Api-Key abcd1234.abcd1234"}
).QUEUED
status (i.e. awaiting processing by the model).IN_PROGRESS
status (i.e. currently being processed by the model).{
"model_id": "<string>",
"deployment_id": "<string>",
"num_queued_requests": 12,
"num_in_progress_requests": 3
}
/async_queue_status
endpoint are limited to 20 requests per second. If this limit is exceeded, subsequent requests will receive a 429 status code.
To gracefully handle hitting this rate limit, we advise implementing a backpressure mechanism, such as calling /async_queue_status
with exponential backoff in response to 429 errors.
import requests
import os
model_id = ""
# Read secrets from environment variables
baseten_api_key = os.environ["BASETEN_API_KEY"]
resp = requests.get(
f"https://model-{model_id}.api.baseten.co/development/async_queue_status",
headers={"Authorization": f"Api-Key {baseten_api_key}"}
)
print(resp.json())
import requests
import os
model_id = ""
# Read secrets from environment variables
baseten_api_key = os.environ["BASETEN_API_KEY"]
resp = requests.get(
f"https://model-{model_id}.api.baseten.co/development/async_queue_status",
headers={"Authorization": f"Api-Key {baseten_api_key}"}
)
print(resp.json())
{
"model_id": "<string>",
"deployment_id": "<string>",
"num_queued_requests": 12,
"num_in_progress_requests": 3
}