Error Handling
This guide covers error handling in OneLLM, including error types, handling strategies, and best practices.
Error Hierarchy
OneLLM uses a structured error hierarchy for consistent error handling across all providers:
OneLLMError (base)
├── APIError
│ ├── AuthenticationError
│ ├── RateLimitError
│ ├── InvalidRequestError
│ ├── ResourceNotFoundError
│ └── ServiceUnavailableError
├── ConfigurationError
│ └── InvalidConfigurationError
└── ProviderError
└── ProviderNotFoundError
Common Error Types
AuthenticationError
Raised when API authentication fails:
from onellm.errors import AuthenticationError
try:
response = client.chat.completions.create(
model="openai/gpt-4",
messages=[{"role": "user", "content": "Hello"}]
)
except AuthenticationError as e:
print(f"Authentication failed: {e}")
print(f"Provider: {e.provider}")
print(f"Status code: {e.status_code}")
Common Causes:
- Invalid API key
- Expired API key
- Wrong API key for provider
- Missing API key
RateLimitError
Raised when rate limits are exceeded:
from onellm.errors import RateLimitError
import time
try:
response = client.chat.completions.create(...)
except RateLimitError as e:
print(f"Rate limit hit: {e}")
if hasattr(e, 'retry_after'):
print(f"Retry after: {e.retry_after} seconds")
time.sleep(e.retry_after)
Common Causes:
- Too many requests per minute
- Token limits exceeded
- Concurrent request limits
InvalidRequestError
Raised for invalid requests:
from onellm.errors import InvalidRequestError
try:
response = client.chat.completions.create(
model="openai/gpt-4",
messages=[], # Empty messages
temperature=2.5 # Invalid temperature
)
except InvalidRequestError as e:
print(f"Invalid request: {e}")
print(f"Details: {e.details}")
Common Causes:
- Invalid parameters
- Missing required fields
- Unsupported features for model
- Invalid model name
ServiceUnavailableError
Raised when service is temporarily unavailable:
from onellm.errors import ServiceUnavailableError
import asyncio
async def retry_with_backoff():
for attempt in range(3):
try:
return await client.chat.completions.acreate(...)
except ServiceUnavailableError:
if attempt < 2:
await asyncio.sleep(2 ** attempt)
else:
raise
Handling Strategies
1. Basic Error Handling
from onellm import OpenAI
from onellm.errors import APIError
client = OpenAI()
try:
response = client.chat.completions.create(
model="openai/gpt-4",
messages=[{"role": "user", "content": "Hello"}]
)
except APIError as e:
print(f"API error: {e}")
except Exception as e:
print(f"Unexpected error: {e}")
2. Specific Error Handling
from onellm.errors import (
AuthenticationError,
RateLimitError,
InvalidRequestError,
ServiceUnavailableError
)
try:
response = client.chat.completions.create(...)
except AuthenticationError:
print("Check your API key")
except RateLimitError as e:
print(f"Rate limited. Retry after {getattr(e, 'retry_after', 60)}s")
except InvalidRequestError as e:
print(f"Fix your request: {e}")
except ServiceUnavailableError:
print("Service down, try again later")
except Exception as e:
print(f"Unexpected: {e}")
3. Retry Logic
from onellm.utils.retry import retry_with_exponential_backoff
@retry_with_exponential_backoff(
max_retries=3,
initial_delay=1,
exponential_base=2,
errors=(RateLimitError, ServiceUnavailableError)
)
def make_request():
return client.chat.completions.create(...)
# Or manually:
def manual_retry(max_attempts=3):
for attempt in range(max_attempts):
try:
return client.chat.completions.create(...)
except (RateLimitError, ServiceUnavailableError) as e:
if attempt == max_attempts - 1:
raise
time.sleep(2 ** attempt)
4. Fallback Providers
providers = [
"openai/gpt-4",
"anthropic/claude-3-5-sonnet-20241022",
"google/gemini-1.5-pro"
]
for model in providers:
try:
response = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": "Hello"}]
)
break
except APIError as e:
print(f"{model} failed: {e}")
if model == providers[-1]:
raise
Error Context
OneLLM errors include helpful context:
try:
response = client.chat.completions.create(...)
except APIError as e:
# Basic error info
print(f"Message: {e.message}")
print(f"Provider: {e.provider}")
print(f"Status code: {e.status_code}")
# Additional context if available
if hasattr(e, 'request_id'):
print(f"Request ID: {e.request_id}")
if hasattr(e, 'details'):
print(f"Details: {e.details}")
Async Error Handling
import asyncio
from onellm import AsyncOpenAI
async def handle_async_errors():
client = AsyncOpenAI()
try:
response = await client.chat.completions.create(...)
except APIError as e:
print(f"Async error: {e}")
# With multiple requests
async def handle_multiple():
tasks = [
client.chat.completions.create(...),
client.chat.completions.create(...),
client.chat.completions.create(...)
]
results = await asyncio.gather(*tasks, return_exceptions=True)
for i, result in enumerate(results):
if isinstance(result, Exception):
print(f"Request {i} failed: {result}")
else:
print(f"Request {i} succeeded")
Logging Errors
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
try:
response = client.chat.completions.create(...)
except APIError as e:
logger.error(f"API error: {e}", exc_info=True)
logger.error(f"Provider: {e.provider}, Status: {e.status_code}")
Custom Error Handling
Create Custom Errors
from onellm.errors import OneLLMError
class CustomError(OneLLMError):
"""Custom error for specific use case."""
pass
class ModelNotSupportedError(InvalidRequestError):
"""Model doesn't support requested feature."""
pass
Error Middleware
class ErrorHandlingClient:
def __init__(self, client):
self.client = client
def create_with_fallback(self, primary_model, fallback_model, **kwargs):
try:
return self.client.chat.completions.create(
model=primary_model, **kwargs
)
except (RateLimitError, ServiceUnavailableError):
return self.client.chat.completions.create(
model=fallback_model, **kwargs
)
Provider-Specific Errors
Different providers may have unique errors:
OpenAI
try:
response = client.chat.completions.create(
model="openai/gpt-4-vision",
messages=[...]
)
except InvalidRequestError as e:
if "image" in str(e):
print("Image format not supported")
Anthropic
try:
response = client.chat.completions.create(
model="anthropic/claude-3",
messages=[...],
max_tokens=100000 # Too high
)
except InvalidRequestError as e:
if "max_tokens" in str(e):
print("Reduce max_tokens for this model")
Best Practices
1. Always Handle Authentication
if not os.environ.get("OPENAI_API_KEY"):
raise ConfigurationError("OPENAI_API_KEY not set")
2. Implement Graceful Degradation
def get_response_with_fallback(message):
models = ["openai/gpt-4", "openai/gpt-3.5-turbo"]
for model in models:
try:
return client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": message}]
)
except APIError:
continue
return {"error": "All models failed"}
3. Log Errors for Debugging
import json
try:
response = client.chat.completions.create(...)
except APIError as e:
error_log = {
"timestamp": datetime.now().isoformat(),
"error_type": type(e).__name__,
"message": str(e),
"provider": getattr(e, 'provider', 'unknown'),
"status_code": getattr(e, 'status_code', None)
}
logger.error(json.dumps(error_log))
4. Handle Timeouts
try:
response = client.chat.completions.create(
model="openai/gpt-4",
messages=[...],
timeout=30 # 30 second timeout
)
except TimeoutError:
print("Request timed out, try a faster model")
Testing Error Handling
import pytest
from unittest.mock import patch
from onellm.errors import RateLimitError
def test_rate_limit_handling():
with patch.object(client, 'chat') as mock_chat:
mock_chat.completions.create.side_effect = RateLimitError(
"Rate limit exceeded",
provider="openai",
status_code=429
)
with pytest.raises(RateLimitError) as exc_info:
client.chat.completions.create(...)
assert exc_info.value.status_code == 429
Next Steps
- Best Practices - Error handling best practices
- Troubleshooting - Common error solutions
- API Reference - Complete API documentation