Python

Last updated January 21, 2026

To get started with Python and AI Gateway, you can either call the OpenAI-Compatible or Anthropic-Compatible API directly, or use the official OpenAI and Anthropic Python SDKs, which are covered below.

Installation

Install your preferred SDK:

pip install openai

pip install anthropic

Quick start

quickstart.py

import os
from openai import OpenAI
 
client = OpenAI(
    api_key=os.getenv('AI_GATEWAY_API_KEY'),
    base_url='https://ai-gateway.vercel.sh/v1'
)
 
response = client.chat.completions.create(
    model='anthropic/claude-sonnet-4.5',
    messages=[
        {'role': 'user', 'content': 'Explain quantum computing in one paragraph.'}
    ]
)
 
print(response.choices[0].message.content)

quickstart.py

import os
import anthropic
 
client = anthropic.Anthropic(
    api_key=os.getenv('AI_GATEWAY_API_KEY'),
    base_url='https://ai-gateway.vercel.sh'
)
 
message = client.messages.create(
    model='anthropic/claude-sonnet-4.5',
    max_tokens=1024,
    messages=[
        {'role': 'user', 'content': 'Explain quantum computing in one paragraph.'}
    ]
)
 
print(message.content[0].text)

Authentication

Both SDKs support the same authentication methods. Use an API key for local development or OIDC tokens for Vercel deployments.

auth.py

import os
 
# Option 1: API key (recommended for local development)
api_key = os.getenv('AI_GATEWAY_API_KEY')
 
# Option 2: OIDC token (automatic on Vercel deployments)
api_key = os.getenv('VERCEL_OIDC_TOKEN')
 
# Fallback pattern for code that runs both locally and on Vercel
api_key = os.getenv('AI_GATEWAY_API_KEY') or os.getenv('VERCEL_OIDC_TOKEN')

Streaming

Stream responses for real-time output in chat applications or long-running generations.

streaming.py

import os
from openai import OpenAI
 
client = OpenAI(
    api_key=os.getenv('AI_GATEWAY_API_KEY'),
    base_url='https://ai-gateway.vercel.sh/v1'
)
 
stream = client.chat.completions.create(
    model='anthropic/claude-sonnet-4.5',
    messages=[
        {'role': 'user', 'content': 'Write a short story about a robot.'}
    ],
    stream=True
)
 
for chunk in stream:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end='', flush=True)

streaming.py

import os
import anthropic
 
client = anthropic.Anthropic(
    api_key=os.getenv('AI_GATEWAY_API_KEY'),
    base_url='https://ai-gateway.vercel.sh'
)
 
with client.messages.stream(
    model='anthropic/claude-sonnet-4.5',
    max_tokens=1024,
    messages=[
        {'role': 'user', 'content': 'Write a short story about a robot.'}
    ]
) as stream:
    for text in stream.text_stream:
        print(text, end='', flush=True)

Async support

Both SDKs provide async clients for use with asyncio.

async_client.py

import os
import asyncio
from openai import AsyncOpenAI
 
client = AsyncOpenAI(
    api_key=os.getenv('AI_GATEWAY_API_KEY'),
    base_url='https://ai-gateway.vercel.sh/v1'
)
 
async def main():
    response = await client.chat.completions.create(
        model='anthropic/claude-sonnet-4.5',
        messages=[
            {'role': 'user', 'content': 'Hello!'}
        ]
    )
    print(response.choices[0].message.content)
 
asyncio.run(main())

async_client.py

import os
import asyncio
import anthropic
 
client = anthropic.AsyncAnthropic(
    api_key=os.getenv('AI_GATEWAY_API_KEY'),
    base_url='https://ai-gateway.vercel.sh'
)
 
async def main():
    message = await client.messages.create(
        model='anthropic/claude-sonnet-4.5',
        max_tokens=1024,
        messages=[
            {'role': 'user', 'content': 'Hello!'}
        ]
    )
    print(message.content[0].text)
 
asyncio.run(main())

Tool calling

Enable models to call functions you define. This example shows a weather tool that the model can invoke.

tools.py

import os
import json
from openai import OpenAI
 
client = OpenAI(
    api_key=os.getenv('AI_GATEWAY_API_KEY'),
    base_url='https://ai-gateway.vercel.sh/v1'
)
 
tools = [{
    'type': 'function',
    'function': {
        'name': 'get_weather',
        'description': 'Get the current weather for a location',
        'parameters': {
            'type': 'object',
            'properties': {
                'location': {
                    'type': 'string',
                    'description': 'City name, e.g. San Francisco'
                }
            },
            'required': ['location']
        }
    }
}]
 
response = client.chat.completions.create(
    model='anthropic/claude-sonnet-4.5',
    messages=[
        {'role': 'user', 'content': "What's the weather in Tokyo?"}
    ],
    tools=tools
)
 
# Check if the model wants to call a tool
if response.choices[0].message.tool_calls:
    tool_call = response.choices[0].message.tool_calls[0]
    args = json.loads(tool_call.function.arguments)
    print(f"Model wants to call: {tool_call.function.name}")
    print(f"With arguments: {args}")

tools.py

import os
import anthropic
 
client = anthropic.Anthropic(
    api_key=os.getenv('AI_GATEWAY_API_KEY'),
    base_url='https://ai-gateway.vercel.sh'
)
 
tools = [{
    'name': 'get_weather',
    'description': 'Get the current weather for a location',
    'input_schema': {
        'type': 'object',
        'properties': {
            'location': {
                'type': 'string',
                'description': 'City name, e.g. San Francisco'
            }
        },
        'required': ['location']
    }
}]
 
message = client.messages.create(
    model='anthropic/claude-sonnet-4.5',
    max_tokens=1024,
    messages=[
        {'role': 'user', 'content': "What's the weather in Tokyo?"}
    ],
    tools=tools
)
 
# Check if the model wants to call a tool
for block in message.content:
    if block.type == 'tool_use':
        print(f"Model wants to call: {block.name}")
        print(f"With arguments: {block.input}")

See OpenAI-compatible tool calls or Anthropic-compatible tool calls for more examples.

Structured outputs

Generate responses that conform to a JSON schema for reliable parsing.

structured.py

import os
from openai import OpenAI
 
client = OpenAI(
    api_key=os.getenv('AI_GATEWAY_API_KEY'),
    base_url='https://ai-gateway.vercel.sh/v1'
)
 
response = client.chat.completions.create(
    model='anthropic/claude-sonnet-4.5',
    messages=[
        {'role': 'user', 'content': 'Extract: John is 30 years old and lives in NYC'}
    ],
    response_format={
        'type': 'json_schema',
        'json_schema': {
            'name': 'person',
            'schema': {
                'type': 'object',
                'properties': {
                    'name': {'type': 'string'},
                    'age': {'type': 'integer'},
                    'city': {'type': 'string'}
                },
                'required': ['name', 'age', 'city']
            }
        }
    }
)
 
import json
data = json.loads(response.choices[0].message.content)
print(data)  # {'name': 'John', 'age': 30, 'city': 'NYC'}

See structured outputs for more details.

Framework integrations

Python frameworks with dedicated AI Gateway support:

Framework	Integration
Pydantic AI	Native `VercelProvider` for type-safe agents
LlamaIndex	`llama-index-llms-vercel-ai-gateway` package
LiteLLM	Use `vercel_ai_gateway/` model prefix
LangChain	Configure via OpenAI-compatible endpoint

See Framework Integrations for the complete list and setup guides.

API reference

For complete API documentation, see:

OpenAI-compatible API — Chat completions, embeddings, streaming, tool calls, structured outputs, image inputs, and provider routing
Anthropic-compatible API — Messages API, streaming, tool calls, extended thinking, web search, and file attachments

OpenResponses API

Authentication & BYOK

Was this helpful?

AI Cloud

Core Platform

Security

Company

Learn

Open Source

Use Cases

Tools

Users