diff --git a/README.md b/README.md
index 874bbe4..de67bf5 100644
--- a/README.md
+++ b/README.md
@@ -269,6 +269,16 @@ result = client.llm.chat(
 )
 ```
 
+### Payment Network Selection
+
+By default, payments settle on the OG EVM network. To pay via Base Sepolia instead, set the `payment_network` parameter at client initialization. The Base Sepolia payment network uses the OG token at address `0x240b09731D96979f50B2C649C9CE10FcF9C7987F`.
+```python
+client = og.Client(
+    private_key=os.environ.get("OG_PRIVATE_KEY"),
+    payment_network=og.PaymentNetwork.BASE_SEPOLIA,
+)
+```
+
 ## Examples
 
 Additional code examples are available in the [examples](./examples) directory.
diff --git a/docs/opengradient/client/client.md b/docs/opengradient/client/client.md
index 91e0ad4..cf0c492 100644
--- a/docs/opengradient/client/client.md
+++ b/docs/opengradient/client/client.md
@@ -21,7 +21,7 @@ blockchain private key and optional Model Hub credentials.
 #### Constructor
 
 ```python
-def __init__(private_key: str, email: Optional[str] = None, password: Optional[str] = None, rpc_url: str = 'https://ogevmdevnet.opengradient.ai', api_url: str = 'https://sdk-devnet.opengradient.ai', contract_address: str = '0x8383C9bD7462F12Eb996DD02F78234C0421A6FaE', og_llm_server_url: Optional[str] = 'https://llmogevm.opengradient.ai', og_llm_streaming_server_url: Optional[str] = 'https://llmogevm.opengradient.ai')
+def __init__(private_key: str, email: Optional[str] = None, password: Optional[str] = None, rpc_url: str = 'https://ogevmdevnet.opengradient.ai', api_url: str = 'https://sdk-devnet.opengradient.ai', contract_address: str = '0x8383C9bD7462F12Eb996DD02F78234C0421A6FaE', og_llm_server_url: Optional[str] = 'https://llmogevm.opengradient.ai', og_llm_streaming_server_url: Optional[str] = 'https://llmogevm.opengradient.ai', payment_network: `PaymentNetwork` = PaymentNetwork.OG_EVM)
 ```
 
 **Arguments**
@@ -34,6 +34,7 @@ def __init__(private_key: str, email: Optional[str] = None, password: Optio
 * **`contract_address`**: Inference contract address.
 * **`og_llm_server_url`**: OpenGradient LLM server URL.
 * **`og_llm_streaming_server_url`**: OpenGradient LLM streaming server URL.
+* **`payment_network`**: Payment network for x402 transactions. Defaults to OG_EVM.
 
 #### Variables
 
diff --git a/docs/opengradient/client/index.md b/docs/opengradient/client/index.md
index e3fbf7a..e79f98b 100644
--- a/docs/opengradient/client/index.md
+++ b/docs/opengradient/client/index.md
@@ -66,7 +66,7 @@ blockchain private key and optional Model Hub credentials.
 #### Constructor
 
 ```python
-def __init__(private_key: str, email: Optional[str] = None, password: Optional[str] = None, rpc_url: str = 'https://ogevmdevnet.opengradient.ai', api_url: str = 'https://sdk-devnet.opengradient.ai', contract_address: str = '0x8383C9bD7462F12Eb996DD02F78234C0421A6FaE', og_llm_server_url: Optional[str] = 'https://llmogevm.opengradient.ai', og_llm_streaming_server_url: Optional[str] = 'https://llmogevm.opengradient.ai')
+def __init__(private_key: str, email: Optional[str] = None, password: Optional[str] = None, rpc_url: str = 'https://ogevmdevnet.opengradient.ai', api_url: str = 'https://sdk-devnet.opengradient.ai', contract_address: str = '0x8383C9bD7462F12Eb996DD02F78234C0421A6FaE', og_llm_server_url: Optional[str] = 'https://llmogevm.opengradient.ai', og_llm_streaming_server_url: Optional[str] = 'https://llmogevm.opengradient.ai', payment_network: `PaymentNetwork` = PaymentNetwork.OG_EVM)
 ```
 
 **Arguments**
@@ -79,6 +79,7 @@ def __init__(private_key: str, email: Optional[str] = None, password: Optio
 * **`contract_address`**: Inference contract address.
 * **`og_llm_server_url`**: OpenGradient LLM server URL.
 * **`og_llm_streaming_server_url`**: OpenGradient LLM streaming server URL.
+* **`payment_network`**: Payment network for x402 transactions. Defaults to OG_EVM.
 
 #### Variables
 
diff --git a/docs/opengradient/client/llm.md b/docs/opengradient/client/llm.md
index eb40f00..dbe7361 100644
--- a/docs/opengradient/client/llm.md
+++ b/docs/opengradient/client/llm.md
@@ -21,7 +21,7 @@ Supports both streaming and non-streaming responses.
 #### Constructor
 
 ```python
-def __init__(wallet_account: `LocalAccount`, og_llm_server_url: str, og_llm_streaming_server_url: str)
+def __init__(wallet_account: `LocalAccount`, og_llm_server_url: str, og_llm_streaming_server_url: str, network_filter: str = 'og-evm')
 ```
 
 #### Methods
diff --git a/docs/opengradient/index.md b/docs/opengradient/index.md
index 33641cb..c5d9a16 100644
--- a/docs/opengradient/index.md
+++ b/docs/opengradient/index.md
@@ -126,7 +126,7 @@ blockchain private key and optional Model Hub credentials.
 #### Constructor
 
 ```python
-def __init__(private_key: str, email: Optional[str] = None, password: Optional[str] = None, rpc_url: str = 'https://ogevmdevnet.opengradient.ai', api_url: str = 'https://sdk-devnet.opengradient.ai', contract_address: str = '0x8383C9bD7462F12Eb996DD02F78234C0421A6FaE', og_llm_server_url: Optional[str] = 'https://llmogevm.opengradient.ai', og_llm_streaming_server_url: Optional[str] = 'https://llmogevm.opengradient.ai')
+def __init__(private_key: str, email: Optional[str] = None, password: Optional[str] = None, rpc_url: str = 'https://ogevmdevnet.opengradient.ai', api_url: str = 'https://sdk-devnet.opengradient.ai', contract_address: str = '0x8383C9bD7462F12Eb996DD02F78234C0421A6FaE', og_llm_server_url: Optional[str] = 'https://llmogevm.opengradient.ai', og_llm_streaming_server_url: Optional[str] = 'https://llmogevm.opengradient.ai', payment_network: `PaymentNetwork` = PaymentNetwork.OG_EVM)
 ```
 
 **Arguments**
@@ -139,6 +139,7 @@ def __init__(private_key: str, email: Optional[str] = None, password: Optio
 * **`contract_address`**: Inference contract address.
 * **`og_llm_server_url`**: OpenGradient LLM server URL.
 * **`og_llm_streaming_server_url`**: OpenGradient LLM streaming server URL.
+* **`payment_network`**: Payment network for x402 transactions. Defaults to OG_EVM.
 
 #### Variables
 
@@ -156,6 +157,23 @@ Enum for the different inference modes available for inference (VANILLA, ZKML, T
 * static `VANILLA`
 * static `ZKML`
 
+### `PaymentNetwork`
+
+Payment network for x402 payment protocol transactions.
+
+Controls which blockchain network is used for paying for LLM inference.
+
+**Attributes**
+
+* **`OG_EVM`**: Pay on the OpenGradient EVM network (default).
+* **`BASE_SEPOLIA`**: Pay on the Base Sepolia testnet using the OG token
+        at address ``0x240b09731D96979f50B2C649C9CE10FcF9C7987F``.
+
+#### Variables
+
+* static `BASE_SEPOLIA`
+* static `OG_EVM`
+
 ### `TEE_LLM`
 
 Enum for LLM models available for TEE (Trusted Execution Environment) execution.
diff --git a/docs/opengradient/types.md b/docs/opengradient/types.md
index 71ecfd4..0f94103 100644
--- a/docs/opengradient/types.md
+++ b/docs/opengradient/types.md
@@ -220,6 +220,23 @@ def __init__(name: str, values: List[Tuple[int, int]])
 * static `name` : str
 * static `values` : List[Tuple[int, int]]
 
+### `PaymentNetwork`
+
+Payment network for x402 payment protocol transactions.
+
+Controls which blockchain network is used for paying for LLM inference.
+
+**Attributes**
+
+* **`OG_EVM`**: Pay on the OpenGradient EVM network (default).
+* **`BASE_SEPOLIA`**: Pay on the Base Sepolia testnet using the OG token
+        at address ``0x240b09731D96979f50B2C649C9CE10FcF9C7987F``.
+
+#### Variables
+
+* static `BASE_SEPOLIA`
+* static `OG_EVM`
+
 ### `SchedulerParams`
 
 #### Constructor
diff --git a/examples/run_x402_llm.py b/examples/run_x402_llm.py
index 8926de2..3db49ff 100644
--- a/examples/run_x402_llm.py
+++ b/examples/run_x402_llm.py
@@ -15,6 +15,8 @@
 
 client = og.Client(
     private_key=os.environ.get("OG_PRIVATE_KEY"),
+    # To pay via Base Sepolia instead of OG EVM:
+    # payment_network=og.PaymentNetwork.BASE_SEPOLIA,
 )
 
 messages = [
diff --git a/src/opengradient/__init__.py b/src/opengradient/__init__.py
index 375581b..2085021 100644
--- a/src/opengradient/__init__.py
+++ b/src/opengradient/__init__.py
@@ -86,6 +86,7 @@
     InferenceResult,
     ModelOutput,
     ModelRepository,
+    PaymentNetwork,
     SchedulerParams,
     TextGenerationOutput,
     TextGenerationStream,
@@ -132,6 +133,7 @@ def init(
     "init",
     "TEE_LLM",
     "InferenceMode",
+    "PaymentNetwork",
     "HistoricalInputQuery",
     "SchedulerParams",
     "CandleType",
diff --git a/src/opengradient/cli.py b/src/opengradient/cli.py
index 54225a7..72ffec5 100644
--- a/src/opengradient/cli.py
+++ b/src/opengradient/cli.py
@@ -324,9 +324,7 @@ def infer(ctx, model_cid: str, inference_mode: str, input_data, input_file: Path
                 model_input = json.load(file)
 
         click.echo(f'Running {inference_mode} inference for model "{model_cid}"')
-        inference_result = client.alpha.infer(
-            model_cid=model_cid, inference_mode=InferenceModes[inference_mode], model_input=model_input
-        )
+        inference_result = client.alpha.infer(model_cid=model_cid, inference_mode=InferenceModes[inference_mode], model_input=model_input)
 
         click.echo()  # Add a newline for better spacing
         click.secho("✅ Transaction successful", fg="green", bold=True)
diff --git a/src/opengradient/client/client.py b/src/opengradient/client/client.py
index 31086f6..1125ee4 100644
--- a/src/opengradient/client/client.py
+++ b/src/opengradient/client/client.py
@@ -11,6 +11,7 @@
     DEFAULT_OPENGRADIENT_LLM_STREAMING_SERVER_URL,
     DEFAULT_RPC_URL,
 )
+from ..types import PaymentNetwork
 from .alpha import Alpha
 from .llm import LLM
 from .model_hub import ModelHub
@@ -50,6 +51,7 @@ def __init__(
         contract_address: str = DEFAULT_INFERENCE_CONTRACT_ADDRESS,
         og_llm_server_url: Optional[str] = DEFAULT_OPENGRADIENT_LLM_SERVER_URL,
         og_llm_streaming_server_url: Optional[str] = DEFAULT_OPENGRADIENT_LLM_STREAMING_SERVER_URL,
+        payment_network: PaymentNetwork = PaymentNetwork.OG_EVM,
     ):
         """
         Initialize the OpenGradient client.
@@ -63,6 +65,7 @@ def __init__(
             contract_address: Inference contract address.
             og_llm_server_url: OpenGradient LLM server URL.
             og_llm_streaming_server_url: OpenGradient LLM streaming server URL.
+            payment_network: Payment network for x402 transactions. Defaults to OG_EVM.
         """
         blockchain = Web3(Web3.HTTPProvider(rpc_url))
         wallet_account = blockchain.eth.account.from_key(private_key)
@@ -78,6 +81,7 @@ def __init__(
             wallet_account=wallet_account,
             og_llm_server_url=og_llm_server_url,
             og_llm_streaming_server_url=og_llm_streaming_server_url,
+            network_filter=payment_network.value,
         )
 
         self.alpha = Alpha(
@@ -86,4 +90,3 @@ def __init__(
             inference_hub_contract_address=contract_address,
             api_url=api_url,
         )
-
diff --git a/src/opengradient/client/llm.py b/src/opengradient/client/llm.py
index 2d13224..d39b627 100644
--- a/src/opengradient/client/llm.py
+++ b/src/opengradient/client/llm.py
@@ -53,16 +53,23 @@ class LLM:
         result = client.llm.completion(model=TEE_LLM.CLAUDE_3_5_HAIKU, prompt="Hello")
     """
 
-    def __init__(self, wallet_account: LocalAccount, og_llm_server_url: str, og_llm_streaming_server_url: str):
+    def __init__(
+        self,
+        wallet_account: LocalAccount,
+        og_llm_server_url: str,
+        og_llm_streaming_server_url: str,
+        network_filter: str = DEFAULT_NETWORK_FILTER,
+    ):
         self._wallet_account = wallet_account
         self._og_llm_server_url = og_llm_server_url
         self._og_llm_streaming_server_url = og_llm_streaming_server_url
+        self._network_filter = network_filter
 
-    def _og_payment_selector(self, accepts, network_filter=DEFAULT_NETWORK_FILTER, scheme_filter=None, max_value=None):
+    def _og_payment_selector(self, accepts, network_filter=None, scheme_filter=None, max_value=None):
         """Custom payment selector for OpenGradient network."""
         return x402Client.default_payment_requirements_selector(
             accepts,
-            network_filter=network_filter,
+            network_filter=network_filter or self._network_filter,
             scheme_filter=scheme_filter,
             max_value=max_value,
         )
@@ -418,7 +425,7 @@ async def _tee_llm_chat_stream_async(
             limits=LIMITS,
             http2=False,
             follow_redirects=False,
-            auth=X402Auth(account=self._wallet_account, network_filter=DEFAULT_NETWORK_FILTER),  # type: ignore
+            auth=X402Auth(account=self._wallet_account, network_filter=self._network_filter),  # type: ignore
             verify=True,
         ) as client:
             headers = {
diff --git a/src/opengradient/defaults.py b/src/opengradient/defaults.py
index cd796da..581fa2e 100644
--- a/src/opengradient/defaults.py
+++ b/src/opengradient/defaults.py
@@ -9,3 +9,4 @@
 DEFAULT_OPENGRADIENT_LLM_SERVER_URL = "https://llmogevm.opengradient.ai"
 DEFAULT_OPENGRADIENT_LLM_STREAMING_SERVER_URL = "https://llmogevm.opengradient.ai"
 DEFAULT_NETWORK_FILTER = "og-evm"
+DEFAULT_BASE_SEPOLIA_NETWORK_FILTER = "base-sepolia"
diff --git a/src/opengradient/types.py b/src/opengradient/types.py
index fa89c98..f18cf1a 100644
--- a/src/opengradient/types.py
+++ b/src/opengradient/types.py
@@ -10,6 +10,28 @@
 import numpy as np
 
 
+class PaymentNetwork(str, Enum):
+    """
+    Payment network for x402 payment protocol transactions.
+
+    Controls which blockchain network is used for paying for LLM inference.
+
+    Attributes:
+        OG_EVM: Pay on the OpenGradient EVM network (default).
+        BASE_SEPOLIA: Pay on the Base Sepolia testnet using the OG token
+            at address ``0x240b09731D96979f50B2C649C9CE10FcF9C7987F``.
+
+    Examples:
+        >>> client = og.Client(
+        ...     private_key="0x...",
+        ...     payment_network=og.PaymentNetwork.BASE_SEPOLIA,
+        ... )
+    """
+
+    OG_EVM = "og-evm"
+    BASE_SEPOLIA = "base-sepolia"
+
+
 class x402SettlementMode(str, Enum):
     """
     Settlement modes for x402 payment protocol transactions.
diff --git a/tutorials/01-verifiable-ai-agent.md b/tutorials/01-verifiable-ai-agent.md
new file mode 100644
index 0000000..6d212ef
--- /dev/null
+++ b/tutorials/01-verifiable-ai-agent.md
@@ -0,0 +1,347 @@
+# Build a Verifiable AI Agent with On-Chain Tools
+
+Traditional AI agents operate as black boxes -- you send a prompt, get a response, and
+have no way to prove what model ran, what it saw, or what it actually produced. For
+financial applications, compliance workflows, or any context where trust matters, this
+opacity is a serious problem.
+
+OpenGradient solves this by running every LLM call inside a **Trusted Execution
+Environment (TEE)** and settling every inference on-chain via the **x402 payment
+protocol**. This means you get cryptographic proof that a specific model processed
+your exact input and produced the exact output you received -- no one, not even the
+infrastructure operator, can tamper with it.
+
+In this tutorial you will build a LangChain ReAct agent that combines TEE-verified
+LLM reasoning with on-chain ONNX model inference. The agent can look up a crypto
+portfolio *and* call a volatility model that executes directly on the OpenGradient
+blockchain, giving you a fully verifiable AI financial advisor.
+
+## Prerequisites
+
+```bash
+pip install opengradient langgraph
+```
+
+You also need an OpenGradient private key funded with test tokens. Any standard
+Ethereum private key works -- you can generate one with any Ethereum wallet.
+
+```bash
+export OG_PRIVATE_KEY="0x..."
+```
+
+> **Faucet:** Get free test tokens at <https://faucet.opengradient.ai/> so your
+> wallet can pay for inference transactions.
+
+## Step 1: Initialize the Client and LangChain Adapter
+
+The `og.agents.langchain_adapter` function returns a LangChain-compatible chat model
+that routes all requests through OpenGradient's TEE infrastructure.
+
+```python
+import os
+import opengradient as og
+
+# Create the LangChain chat model backed by OpenGradient TEE.
+# Note: the adapter creates its own internal Client, separate from any client
+# created via og.init(). This is why private_key is passed here explicitly.
+llm = og.agents.langchain_adapter(
+    private_key=os.environ["OG_PRIVATE_KEY"],
+    model_cid=og.TEE_LLM.GPT_4_1_2025_04_14,
+    max_tokens=500,
+    x402_settlement_mode=og.x402SettlementMode.SETTLE_BATCH,
+)
+```
+
+Under the hood this creates an `OpenGradientChatModel` that implements LangChain's
+`BaseChatModel` interface. It handles message format conversion, tool call parsing,
+and x402 payment signing automatically.
+
+## Step 2: Create a Standard Tool
+
+Let's give the agent a simple tool to look up portfolio holdings. This is a regular
+LangChain `@tool` -- nothing OpenGradient-specific yet.
+
+```python
+import json
+from langchain_core.tools import tool
+
+PORTFOLIO = {
+    "ETH": {"amount": 10.0, "avg_cost_usd": 1950.00},
+    "BTC": {"amount": 0.5, "avg_cost_usd": 42000.00},
+}
+
+@tool
+def get_portfolio_holdings() -> str:
+    """Returns the user's current crypto portfolio holdings including token, amount, and average cost."""
+    return json.dumps(PORTFOLIO, indent=2)
+```
+
+## Step 3: Create an AlphaSense On-Chain Model Tool
+
+This is where things get interesting. `create_run_model_tool` wraps an ONNX model
+that lives on the OpenGradient blockchain as a LangChain `StructuredTool`. When the
+agent calls this tool, it triggers an actual on-chain transaction -- the inference
+runs on a blockchain node and the result is recorded in a transaction.
+
+You need three pieces:
+
+1. **A Pydantic input schema** -- defines what the LLM agent passes to the tool
+2. **A model input provider** -- converts the agent's tool call into model tensors
+3. **A model output formatter** -- turns the raw `InferenceResult` into a string
+
+```python
+from enum import Enum
+from pydantic import BaseModel, Field
+from opengradient.alphasense import create_run_model_tool, ToolType
+
+# The model CID for a public ETH volatility model on the Alpha Testnet.
+VOLATILITY_MODEL_CID = "hJD2Ja3akZFt1A2LT-D_1oxOCz_OtuGYw4V9eE1m39M"
+
+class Token(str, Enum):
+    ETH = "ethereum"
+    BTC = "bitcoin"
+
+class VolatilityInput(BaseModel):
+    token: Token = Field(
+        default=Token.ETH,
+        description="The cryptocurrency to measure volatility for.",
+    )
+
+# Sample price data. In production, fetch from an exchange API or oracle.
+SAMPLE_PRICES = {
+    Token.ETH: [2010.1, 2012.3, 2020.1, 2019.2, 2025.0, 2018.7, 2030.5, 2028.1],
+    Token.BTC: [67100.0, 67250.0, 67180.0, 67320.0, 67150.0, 67400.0, 67280.0, 67350.0],
+}
+
+def provide_model_input(**llm_input) -> dict:
+    """Convert the agent's tool call into model input tensors."""
+    token = llm_input.get("token", Token.ETH)
+    return {"price_series": SAMPLE_PRICES.get(token, SAMPLE_PRICES[Token.ETH])}
+
+def format_model_output(inference_result: og.InferenceResult) -> str:
+    """Format the on-chain model's output into a readable string."""
+    std = float(inference_result.model_output["std"].item())
+    return (
+        f"Volatility (std dev of returns): {std:.4f} ({std:.2%} annualized). "
+        f"On-chain tx: {inference_result.transaction_hash}"
+    )
+```
+
+Now create the tool. You need an initialized client to pass `client.alpha` as the
+inference backend:
+
+```python
+client = og.init(private_key=os.environ["OG_PRIVATE_KEY"])
+
+volatility_tool = create_run_model_tool(
+    tool_type=ToolType.LANGCHAIN,
+    model_cid=VOLATILITY_MODEL_CID,
+    tool_name="crypto_volatility",
+    tool_description=(
+        "Measures the return volatility (standard deviation of returns) for a "
+        "cryptocurrency using an on-chain ONNX model. Use this when the user "
+        "asks about risk, volatility, or position sizing."
+    ),
+    model_input_provider=provide_model_input,
+    model_output_formatter=format_model_output,
+    inference=client.alpha,
+    tool_input_schema=VolatilityInput,
+    inference_mode=og.InferenceMode.VANILLA,
+)
+```
+
+When the agent invokes `crypto_volatility`, the SDK:
+1. Calls `provide_model_input()` with the LLM's chosen arguments
+2. Submits an on-chain transaction to run the ONNX model
+3. Waits for the transaction receipt and parses the model output
+4. Calls `format_model_output()` and returns the string to the agent
+
+## Step 4: Wire Up the ReAct Agent
+
+With both tools ready, combine them into a ReAct agent using `langgraph`:
+
+```python
+from langgraph.prebuilt import create_react_agent
+
+agent = create_react_agent(
+    model=llm,
+    tools=[get_portfolio_holdings, volatility_tool],
+)
+
+# Ask a question that requires both tools.
+result = agent.invoke({
+    "messages": [
+        {
+            "role": "user",
+            "content": (
+                "I hold 10 ETH. Based on the current volatility from the "
+                "on-chain model, should I increase my position? What's the risk?"
+            ),
+        }
+    ],
+})
+
+# Print the agent's final answer.
+final_message = result["messages"][-1]
+print(final_message.content)
+```
+
+The agent will:
+1. Call `get_portfolio_holdings` to see what you own
+2. Call `crypto_volatility` with `token="ethereum"` to get on-chain volatility
+3. Reason about the numbers and give you a recommendation
+
+## Step 5: Examine the Output
+
+Every step in the agent's execution leaves a verifiable trail:
+
+- **LLM calls**: Each reasoning step ran in a TEE. The x402 payment hash in the
+  response header is your cryptographic receipt. You can look it up on-chain to
+  verify the model, input, and output.
+
+- **On-chain model inference**: The volatility tool call produced a blockchain
+  transaction hash. You can inspect this on the OpenGradient block explorer to see
+  the exact model CID, input tensors, and output tensors that were recorded.
+
+The `format_model_output` function above prints the transaction hash. In a production
+app you would store these hashes for audit purposes.
+
+## Understanding Settlement Modes
+
+When calling the LLM, the `x402_settlement_mode` parameter controls how much data
+is recorded on-chain:
+
+| Mode | What's Stored | Best For |
+|------|--------------|----------|
+| `SETTLE` | Hashes of input and output only | **Privacy** -- proves execution happened without revealing content |
+| `SETTLE_BATCH` | Batch hash of multiple inferences | **Cost efficiency** -- reduces per-call gas costs (default) |
+| `SETTLE_METADATA` | Full model info, input, output, and metadata | **Transparency** -- complete auditability for compliance |
+
+Choose based on your requirements:
+
+```python
+# For development and testing -- cheapest option
+llm_dev = og.agents.langchain_adapter(
+    private_key=os.environ["OG_PRIVATE_KEY"],
+    model_cid=og.TEE_LLM.GPT_4_1_2025_04_14,
+    x402_settlement_mode=og.x402SettlementMode.SETTLE_BATCH,
+)
+
+# For production financial applications -- full audit trail
+llm_prod = og.agents.langchain_adapter(
+    private_key=os.environ["OG_PRIVATE_KEY"],
+    model_cid=og.TEE_LLM.GPT_4_1_2025_04_14,
+    x402_settlement_mode=og.x402SettlementMode.SETTLE_METADATA,
+)
+
+# For privacy-sensitive applications -- minimal on-chain footprint
+llm_private = og.agents.langchain_adapter(
+    private_key=os.environ["OG_PRIVATE_KEY"],
+    model_cid=og.TEE_LLM.GPT_4_1_2025_04_14,
+    x402_settlement_mode=og.x402SettlementMode.SETTLE,
+)
+```
+
+## Complete Code
+
+```python
+"""Verifiable AI Financial Agent -- complete working example."""
+
+import json
+import os
+from enum import Enum
+
+from langchain_core.tools import tool
+from langgraph.prebuilt import create_react_agent
+from pydantic import BaseModel, Field
+
+import opengradient as og
+from opengradient.alphasense import ToolType, create_run_model_tool
+
+# ── Config ────────────────────────────────────────────────────────────────
+VOLATILITY_MODEL_CID = "hJD2Ja3akZFt1A2LT-D_1oxOCz_OtuGYw4V9eE1m39M"
+
+PORTFOLIO = {
+    "ETH": {"amount": 10.0, "avg_cost_usd": 1950.00},
+    "BTC": {"amount": 0.5, "avg_cost_usd": 42000.00},
+}
+
+# ── On-chain model tool (Token must be defined before SAMPLE_PRICES) ─────
+class Token(str, Enum):
+    ETH = "ethereum"
+    BTC = "bitcoin"
+
+SAMPLE_PRICES = {
+    Token.ETH: [2010.1, 2012.3, 2020.1, 2019.2, 2025.0, 2018.7, 2030.5, 2028.1],
+    Token.BTC: [67100.0, 67250.0, 67180.0, 67320.0, 67150.0, 67400.0, 67280.0, 67350.0],
+}
+
+# ── Client ────────────────────────────────────────────────────────────────
+# og.init() creates the global client (used by AlphaSense tools).
+# langchain_adapter() creates its own internal client for LLM calls.
+private_key = os.environ["OG_PRIVATE_KEY"]
+client = og.init(private_key=private_key)
+
+llm = og.agents.langchain_adapter(
+    private_key=private_key,
+    model_cid=og.TEE_LLM.GPT_4_1_2025_04_14,
+    max_tokens=500,
+    x402_settlement_mode=og.x402SettlementMode.SETTLE_BATCH,
+)
+
+# ── Standard tool ─────────────────────────────────────────────────────────
+@tool
+def get_portfolio_holdings() -> str:
+    """Returns the user's current crypto portfolio holdings."""
+    return json.dumps(PORTFOLIO, indent=2)
+
+class VolatilityInput(BaseModel):
+    token: Token = Field(default=Token.ETH, description="Cryptocurrency to check.")
+
+def provide_model_input(**llm_input) -> dict:
+    token = llm_input.get("token", Token.ETH)
+    return {"price_series": SAMPLE_PRICES.get(token, SAMPLE_PRICES[Token.ETH])}
+
+def format_model_output(result: og.InferenceResult) -> str:
+    std = float(result.model_output["std"].item())
+    return f"Volatility: {std:.4f} ({std:.2%}). Tx: {result.transaction_hash}"
+
+volatility_tool = create_run_model_tool(
+    tool_type=ToolType.LANGCHAIN,
+    model_cid=VOLATILITY_MODEL_CID,
+    tool_name="crypto_volatility",
+    tool_description="Measures return volatility for a crypto token using an on-chain model.",
+    model_input_provider=provide_model_input,
+    model_output_formatter=format_model_output,
+    inference=client.alpha,
+    tool_input_schema=VolatilityInput,
+    inference_mode=og.InferenceMode.VANILLA,
+)
+
+# ── Agent ─────────────────────────────────────────────────────────────────
+agent = create_react_agent(model=llm, tools=[get_portfolio_holdings, volatility_tool])
+
+if __name__ == "__main__":
+    result = agent.invoke({
+        "messages": [{
+            "role": "user",
+            "content": (
+                "I hold 10 ETH. Based on the current volatility, "
+                "should I increase my position? What's the risk?"
+            ),
+        }],
+    })
+    print(result["messages"][-1].content)
+```
+
+## Next Steps
+
+- **Swap models**: Replace `GPT_4_1_2025_04_14` with `CLAUDE_4_0_SONNET` or
+  `GEMINI_2_5_PRO` -- the rest of your code stays the same.
+- **Add more on-chain tools**: Use `create_run_model_tool` with different model CIDs
+  to give your agent access to price prediction, sentiment analysis, or other ML
+  models deployed on OpenGradient.
+- **Read workflow results**: Use `og.alphasense.create_read_workflow_tool` to read
+  from scheduled on-chain workflows that run models automatically.
+- **Go to production**: Switch settlement mode to `SETTLE_METADATA` and store the
+  payment hashes and transaction hashes for your compliance records.
diff --git a/tutorials/02-streaming-multi-provider.md b/tutorials/02-streaming-multi-provider.md
new file mode 100644
index 0000000..ff5604f
--- /dev/null
+++ b/tutorials/02-streaming-multi-provider.md
@@ -0,0 +1,387 @@
+# Streaming Multi-Provider Chat with Settlement Modes
+
+Most LLM APIs lock you into a single provider. If you want to switch from OpenAI to
+Anthropic or Google, you rewrite your integration, change authentication, and update
+response parsing. OpenGradient gives you a single unified API that wraps every major
+provider -- swap one enum value and everything else stays the same.
+
+But the real differentiator is settlement. Every inference call settles on-chain via
+the x402 payment protocol, producing a cryptographic receipt you can use for
+compliance, billing audits, or dispute resolution. You choose how much data goes
+on-chain: just a hash (privacy), a batch digest (cost savings), or full metadata
+(complete transparency).
+
+This tutorial walks through the `client.llm.chat()` API, covering non-streaming and
+streaming responses, multi-provider switching, settlement modes, payment networks,
+and function calling -- all in one place.
+
+## Prerequisites
+
+```bash
+pip install opengradient
+```
+
+Export your OpenGradient private key:
+
+```bash
+export OG_PRIVATE_KEY="0x..."
+```
+
+> **Faucet:** Get free testnet tokens at https://faucet.opengradient.ai/
+>
+> If you see x402 payment errors, make sure your wallet has sufficient testnet tokens.
+
+## Step 1: Basic Non-Streaming Chat
+
+Start with the simplest possible call -- send a message and get a response.
+
+```python
+import os
+import sys
+import opengradient as og
+
+private_key = os.environ.get("OG_PRIVATE_KEY")
+if not private_key:
+    print("Error: set the OG_PRIVATE_KEY environment variable.")
+    sys.exit(1)
+
+client = og.init(private_key=private_key)
+
+result = client.llm.chat(
+    model=og.TEE_LLM.GPT_4O,
+    messages=[{"role": "user", "content": "What is the x402 payment protocol?"}],
+    max_tokens=200,
+    temperature=0.0,
+)
+
+# result is a TextGenerationOutput dataclass
+print(result.chat_output.get("content", ""))  # The model's text response
+print(result.finish_reason)            # "stop", "length", or "tool_calls"
+print(result.payment_hash)             # On-chain x402 receipt
+```
+
+The `chat_output` dictionary follows the OpenAI message format: it has `role`,
+`content`, and optionally `tool_calls` keys. The `payment_hash` is your on-chain
+settlement proof -- every call gets one.
+
+## Step 2: Switch Providers with One Line
+
+The `model` parameter accepts any `og.TEE_LLM` enum value. Swap the model and
+everything else -- message format, authentication, response parsing -- stays
+identical.
+
+```python
+# OpenAI
+result_openai = client.llm.chat(
+    model=og.TEE_LLM.GPT_4O,
+    messages=[{"role": "user", "content": "Hello from OpenAI!"}],
+    max_tokens=100,
+)
+
+# Anthropic
+result_anthropic = client.llm.chat(
+    model=og.TEE_LLM.CLAUDE_3_7_SONNET,
+    messages=[{"role": "user", "content": "Hello from Anthropic!"}],
+    max_tokens=100,
+)
+
+# Google
+result_google = client.llm.chat(
+    model=og.TEE_LLM.GEMINI_2_5_FLASH,
+    messages=[{"role": "user", "content": "Hello from Google!"}],
+    max_tokens=100,
+)
+
+# xAI
+result_xai = client.llm.chat(
+    model=og.TEE_LLM.GROK_3_BETA,
+    messages=[{"role": "user", "content": "Hello from xAI!"}],
+    max_tokens=100,
+)
+```
+
+This makes A/B testing trivial -- run the same prompt across providers and compare
+quality, latency, and cost without changing any infrastructure.
+
+## Step 3: Enable Streaming
+
+For chat UIs and real-time applications, pass `stream=True` to get tokens as they
+are generated. The return value changes from a `TextGenerationOutput` to a generator
+that yields `StreamChunk` objects.
+
+```python
+stream = client.llm.chat(
+    model=og.TEE_LLM.GPT_4O,
+    messages=[
+        {"role": "system", "content": "You are a concise technical writer."},
+        {"role": "user", "content": "Explain TEEs in one paragraph."},
+    ],
+    max_tokens=300,
+    temperature=0.0,
+    stream=True,
+)
+
+for chunk in stream:
+    # Each chunk has a choices list. The first choice's delta
+    # contains the incremental content for this token.
+    delta = chunk.choices[0].delta
+
+    if delta.content:
+        print(delta.content, end="", flush=True)
+
+    # The final chunk has a finish_reason and optional usage stats.
+    if chunk.is_final:
+        print(f"\n\nModel: {chunk.model}")
+        if chunk.usage:
+            print(f"Tokens used: {chunk.usage.total_tokens}")
+```
+
+The `StreamChunk` dataclass has these fields:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `choices` | `List[StreamChoice]` | Incremental choices (usually one) |
+| `model` | `str` | Model identifier |
+| `usage` | `StreamUsage` or `None` | Token counts (final chunk only) |
+| `is_final` | `bool` | `True` when the stream is ending |
+
+Each `StreamChoice` contains a `StreamDelta` with optional `content`, `role`, and
+`tool_calls` fields.
+
+## Step 4: Settlement Modes
+
+Every LLM call settles on-chain. The `x402_settlement_mode` parameter controls the
+privacy/cost/transparency trade-off:
+
+| Mode | On-Chain Data | Use Case |
+|------|--------------|----------|
+| `SETTLE` | Input/output hashes only | **Privacy** -- prove execution without revealing content |
+| `SETTLE_BATCH` | Batch digest of multiple calls | **Cost efficiency** -- lower gas per inference (default) |
+| `SETTLE_METADATA` | Full model, input, output, metadata | **Transparency** -- complete audit trail |
+
+```python
+# Privacy-first: only hashes stored on-chain
+result_private = client.llm.chat(
+    model=og.TEE_LLM.CLAUDE_3_7_SONNET,
+    messages=[{"role": "user", "content": "Sensitive query here."}],
+    max_tokens=100,
+    x402_settlement_mode=og.x402SettlementMode.SETTLE,
+)
+print(f"Payment hash (SETTLE): {result_private.payment_hash}")
+
+# Cost-efficient: batched settlement (this is the default)
+result_batch = client.llm.chat(
+    model=og.TEE_LLM.CLAUDE_3_7_SONNET,
+    messages=[{"role": "user", "content": "Regular query."}],
+    max_tokens=100,
+    x402_settlement_mode=og.x402SettlementMode.SETTLE_BATCH,
+)
+print(f"Payment hash (SETTLE_BATCH): {result_batch.payment_hash}")
+
+# Full transparency: everything on-chain
+result_transparent = client.llm.chat(
+    model=og.TEE_LLM.CLAUDE_3_7_SONNET,
+    messages=[{"role": "user", "content": "Auditable query."}],
+    max_tokens=100,
+    x402_settlement_mode=og.x402SettlementMode.SETTLE_METADATA,
+)
+print(f"Payment hash (SETTLE_METADATA): {result_transparent.payment_hash}")
+```
+
+All three calls return a `payment_hash` you can look up on-chain. The difference is
+how much detail the on-chain record contains. Store these hashes if you need an
+audit trail -- they are the on-chain receipts for each inference call.
+
+You can also choose which blockchain network to settle on by passing
+`payment_network` at client creation time. See Step 5 for details.
+
+## Step 5: Payment Network Selection
+
+By default, x402 payments settle on the OpenGradient EVM network. You can also pay
+on Base Sepolia testnet by passing `payment_network` when creating the client:
+
+```python
+# Default: pay on OpenGradient EVM
+client_og = og.init(
+    private_key=private_key,
+)
+
+# Alternative: pay on Base Sepolia
+client_base = og.Client(
+    private_key=private_key,
+    payment_network=og.PaymentNetwork.BASE_SEPOLIA,
+)
+
+# Both clients use the exact same llm.chat() API
+result = client_base.llm.chat(
+    model=og.TEE_LLM.GPT_4O,
+    messages=[{"role": "user", "content": "Hello via Base Sepolia!"}],
+    max_tokens=100,
+)
+```
+
+The payment network affects only *where* the settlement transaction lands -- the TEE
+verification and API interface are identical regardless of which network you choose.
+
+## Step 6: Function Calling
+
+You can pass tools to `client.llm.chat()` in the standard OpenAI function-calling
+format. This works with any model that supports tool use.
+
+```python
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_token_price",
+            "description": "Get the current USD price of a cryptocurrency.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "symbol": {
+                        "type": "string",
+                        "description": "Token ticker symbol, e.g. ETH, BTC.",
+                    }
+                },
+                "required": ["symbol"],
+            },
+        },
+    }
+]
+
+result = client.llm.chat(
+    model=og.TEE_LLM.GEMINI_2_5_FLASH,
+    messages=[{"role": "user", "content": "What's the current price of ETH?"}],
+    max_tokens=200,
+    tools=tools,
+    tool_choice="auto",
+)
+
+if result.chat_output.get("tool_calls"):
+    # The model decided to call a tool instead of responding with text.
+    # We check for tool_calls in the message rather than relying on finish_reason,
+    # since the exact finish_reason string may vary by provider.
+    for tc in result.chat_output["tool_calls"]:
+        func = tc["function"]
+        print(f"Tool: {func['name']}, Args: {func['arguments']}")
+else:
+    print(result.chat_output.get("content", ""))
+```
+
+When the model returns tool calls, execute the requested functions locally,
+then send the results back in a follow-up `client.llm.chat()` call with a `"tool"`
+role message. See **Tutorial 3** for a complete multi-turn tool-calling loop.
+
+## Complete Code
+
+```python
+"""Streaming Multi-Provider Chat -- complete working example."""
+
+import os
+import sys
+import opengradient as og
+
+# ── Initialize ────────────────────────────────────────────────────────────
+private_key = os.environ.get("OG_PRIVATE_KEY")
+if not private_key:
+    print("Error: set the OG_PRIVATE_KEY environment variable.")
+    sys.exit(1)
+
+client = og.init(private_key=private_key)
+
+PROMPT = "Explain what a Trusted Execution Environment is in two sentences."
+
+# ── Multi-provider comparison ─────────────────────────────────────────────
+models = [
+    ("GPT-4o",           og.TEE_LLM.GPT_4O),
+    ("Claude 3.7 Sonnet", og.TEE_LLM.CLAUDE_3_7_SONNET),
+    ("Gemini 2.5 Flash", og.TEE_LLM.GEMINI_2_5_FLASH),
+    ("Grok 3 Beta",      og.TEE_LLM.GROK_3_BETA),
+]
+
+for name, model in models:
+    try:
+        result = client.llm.chat(
+            model=model,
+            messages=[{"role": "user", "content": PROMPT}],
+            max_tokens=200,
+            temperature=0.0,
+        )
+        print(f"[{name}] {result.chat_output.get('content', '')}")
+        print(f"  Payment hash: {result.payment_hash}\n")
+    except Exception as e:
+        print(f"[{name}] Error: {e}\n")
+
+# ── Streaming ─────────────────────────────────────────────────────────────
+print("--- Streaming from GPT-4o ---")
+stream = client.llm.chat(
+    model=og.TEE_LLM.GPT_4O,
+    messages=[{"role": "user", "content": "What is x402? Keep it under 50 words."}],
+    max_tokens=100,
+    stream=True,
+)
+
+for chunk in stream:
+    if chunk.choices[0].delta.content:
+        print(chunk.choices[0].delta.content, end="", flush=True)
+print("\n")
+
+# ── Settlement modes ──────────────────────────────────────────────────────
+for mode_name, mode in [
+    ("SETTLE",          og.x402SettlementMode.SETTLE),
+    ("SETTLE_BATCH",    og.x402SettlementMode.SETTLE_BATCH),
+    ("SETTLE_METADATA", og.x402SettlementMode.SETTLE_METADATA),
+]:
+    try:
+        r = client.llm.chat(
+            model=og.TEE_LLM.CLAUDE_3_7_SONNET,
+            messages=[{"role": "user", "content": "Say hello."}],
+            max_tokens=50,
+            x402_settlement_mode=mode,
+        )
+        print(f"[{mode_name}] payment_hash={r.payment_hash}")
+    except Exception as e:
+        print(f"[{mode_name}] Error: {e}")
+
+# ── Function calling ──────────────────────────────────────────────────────
+tools = [{
+    "type": "function",
+    "function": {
+        "name": "get_token_price",
+        "description": "Get the current USD price of a cryptocurrency.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "symbol": {"type": "string", "description": "Token ticker, e.g. ETH."}
+            },
+            "required": ["symbol"],
+        },
+    },
+}]
+
+result = client.llm.chat(
+    model=og.TEE_LLM.GEMINI_2_5_FLASH,
+    messages=[{"role": "user", "content": "What is the price of ETH?"}],
+    max_tokens=200,
+    tools=tools,
+    tool_choice="auto",
+)
+
+if result.chat_output.get("tool_calls"):
+    for tc in result.chat_output["tool_calls"]:
+        func = tc["function"]
+        print(f"Tool call: {func['name']}({func['arguments']})")
+else:
+    print(result.chat_output.get("content", ""))
+```
+
+## Next Steps
+
+- **Build a chat UI**: Use the streaming API with a web framework to build a
+  real-time chat interface backed by verifiable inference.
+- **Add tool calling**: See **Tutorial 3** for a full multi-turn agent loop with
+  tool dispatch and result feeding.
+- **Go multi-chain**: Try `og.PaymentNetwork.BASE_SEPOLIA` to settle on Base instead
+  of the OpenGradient EVM.
+- **Build an agent**: See **Tutorial 1** to combine LangChain with on-chain model
+  tools for a fully verifiable AI agent.
diff --git a/tutorials/03-verified-tool-calling.md b/tutorials/03-verified-tool-calling.md
new file mode 100644
index 0000000..61e1481
--- /dev/null
+++ b/tutorials/03-verified-tool-calling.md
@@ -0,0 +1,402 @@
+# Tool-Calling Agent with Verified Reasoning
+
+When an LLM agent decides to call a function, you typically have no way to prove
+*why* it made that decision. The model's reasoning is opaque -- it could have been
+influenced by a prompt injection, a poisoned context, or a simple hallucination.
+You only see the tool call and hope the model made the right choice.
+
+OpenGradient changes this by running every LLM call inside a Trusted Execution
+Environment (TEE). The model's reasoning -- including its decision to call a tool,
+which tool to call, and what arguments to pass -- is cryptographically attested and
+settled on-chain via the x402 payment protocol. The tool executions themselves run
+locally, but the AI reasoning that drives them is verifiable.
+
+In this tutorial you will build a personal crypto portfolio assistant that can look
+up holdings, check prices, and calculate risk metrics. The agent uses a multi-turn
+conversation loop where the LLM decides which tools to call and synthesizes the
+results into actionable advice.
+
+## Prerequisites
+
+```bash
+pip install opengradient
+```
+
+You need an OpenGradient private key funded with test tokens:
+
+```bash
+export OG_PRIVATE_KEY="0x..."
+```
+
+> **Faucet:** Get free testnet tokens at https://faucet.opengradient.ai/
+>
+> If you see x402 payment errors, make sure your wallet has sufficient testnet tokens.
+
+## Step 1: Initialize the Client
+
+```python
+import json
+import os
+import sys
+
+import opengradient as og
+
+private_key = os.environ.get("OG_PRIVATE_KEY")
+if not private_key:
+    print("Error: set the OG_PRIVATE_KEY environment variable.")
+    sys.exit(1)
+
+client = og.init(private_key=private_key)
+```
+
+## Step 2: Define Local Tool Implementations
+
+These are the functions the agent can call. In a real application they would query a
+database, exchange API, or on-chain contract. Here we use hardcoded data so the
+tutorial runs without external dependencies.
+
+```python
+PORTFOLIO = {
+    "ETH":  {"amount": 5.0,  "avg_cost": 1950.00},
+    "BTC":  {"amount": 0.25, "avg_cost": 42000.00},
+    "SOL":  {"amount": 100,  "avg_cost": 95.00},
+}
+
+CURRENT_PRICES = {"ETH": 2120.50, "BTC": 67250.00, "SOL": 148.30}
+VOLATILITY     = {"ETH": 0.65,    "BTC": 0.55,     "SOL": 0.85}
+
+def get_portfolio() -> str:
+    """Return the user's portfolio holdings as a JSON string."""
+    rows = [{"token": t, "amount": v["amount"], "avg_cost_usd": v["avg_cost"]}
+            for t, v in PORTFOLIO.items()]
+    return json.dumps(rows, indent=2)
+
+def get_price(token: str) -> str:
+    """Return the current price for a single token."""
+    token = token.upper()
+    price = CURRENT_PRICES.get(token)
+    if price is None:
+        return json.dumps({"error": f"Unknown token: {token}"})
+    return json.dumps({"token": token, "price_usd": price})
+
+def calculate_risk(token: str) -> str:
+    """Return simplified risk metrics for a token."""
+    token = token.upper()
+    vol = VOLATILITY.get(token)
+    if vol is None:
+        return json.dumps({"error": f"Unknown token: {token}"})
+    holding = PORTFOLIO.get(token)
+    price = CURRENT_PRICES.get(token)
+    position_value = holding["amount"] * price if holding and price else 0
+    daily_vol = vol / (252 ** 0.5)
+    var_95 = position_value * daily_vol * 1.645
+    return json.dumps({
+        "token": token,
+        "annualized_volatility": f"{vol:.0%}",
+        "position_value_usd": round(position_value, 2),
+        "daily_var_95_usd": round(var_95, 2),
+    })
+
+# Dispatch table for executing tool calls by name.
+TOOL_DISPATCH = {
+    "get_portfolio":  lambda **kw: get_portfolio(),
+    "get_price":      lambda **kw: get_price(kw["token"]),
+    "calculate_risk": lambda **kw: calculate_risk(kw["token"]),
+}
+```
+
+## Step 3: Define Tools in OpenAI Function-Calling Format
+
+Each tool is described as a JSON object with `type`, `function.name`,
+`function.description`, and a `function.parameters` JSON Schema. This format is
+the same one used by the OpenAI API and is supported across all OpenGradient
+providers.
+
+```python
+TOOLS = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_portfolio",
+            "description": "Returns the user's current crypto portfolio holdings.",
+            "parameters": {"type": "object", "properties": {}, "required": []},
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "get_price",
+            "description": "Returns the current USD price for a cryptocurrency token.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "token": {"type": "string", "description": "Token ticker, e.g. ETH, BTC, SOL."},
+                },
+                "required": ["token"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "calculate_risk",
+            "description": "Calculates risk metrics: volatility, position value, and daily VaR.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "token": {"type": "string", "description": "Token ticker, e.g. ETH, BTC, SOL."},
+                },
+                "required": ["token"],
+            },
+        },
+    },
+]
+```
+
+## Step 4: Pass Tools to `client.llm.chat`
+
+Pass the `tools` list and `tool_choice` parameter to any `client.llm.chat()` call.
+
+```python
+result = client.llm.chat(
+    model=og.TEE_LLM.GPT_4O,
+    messages=[
+        {"role": "system", "content": "You are a crypto portfolio assistant."},
+        {"role": "user", "content": "What's my portfolio worth?"},
+    ],
+    max_tokens=600,
+    temperature=0.0,
+    tools=TOOLS,
+    # "auto" lets the model decide whether to call a tool or respond with text.
+    # "none" forces a text-only response.
+    tool_choice="auto",
+    x402_settlement_mode=og.x402SettlementMode.SETTLE_BATCH,
+)
+```
+
+When the model decides to call a tool, `result.finish_reason` will be `"tool_calls"`
+(following the OpenAI convention). The tool call details are in
+`result.chat_output["tool_calls"]`.
+
+## Step 5: Handle Tool Calls and Feed Results Back
+
+The core pattern for a tool-calling agent is a loop:
+
+1. Send messages + tools to the LLM
+2. If `finish_reason == "tool_calls"`, execute each tool locally
+3. Append the assistant message AND tool results to the conversation
+4. Call the LLM again so it can see the tool output
+5. Repeat until the model responds with a regular text message
+
+```python
+def run_agent(client: og.Client, user_query: str) -> str:
+    """Run a multi-turn tool-calling agent loop."""
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                "You are a helpful crypto portfolio assistant. Use the provided "
+                "tools to look up holdings, prices, and risk metrics. Always check "
+                "the portfolio and relevant prices before giving advice. Be concise."
+            ),
+        },
+        {"role": "user", "content": user_query},
+    ]
+
+    max_iterations = 5  # Safety limit to prevent runaway loops
+
+    for i in range(max_iterations):
+        print(f"\n  [Round {i + 1}] Calling LLM...")
+
+        try:
+            result = client.llm.chat(
+                model=og.TEE_LLM.GPT_4O,
+                messages=messages,
+                max_tokens=600,
+                temperature=0.0,
+                tools=TOOLS,
+                tool_choice="auto",
+                x402_settlement_mode=og.x402SettlementMode.SETTLE_BATCH,
+            )
+        except Exception as e:
+            print(f"  LLM call failed: {e}")
+            return f"Error: {e}"
+
+        print(f"  Finish reason: {result.finish_reason}")
+        print(f"  Payment hash: {result.payment_hash}")
+
+        # -- The model wants to call one or more tools --
+        # "tool_calls" finish reason follows the OpenAI convention and is used
+        # consistently across all providers on OpenGradient.
+        if result.finish_reason == "tool_calls":
+            tool_calls = result.chat_output.get("tool_calls", [])
+
+            # Append the assistant's message (contains tool_calls) to history.
+            messages.append(result.chat_output)
+
+            for tc in tool_calls:
+                func = tc.get("function", tc)
+                tool_name = func["name"]
+                tool_args = json.loads(func.get("arguments", "{}"))
+                call_id = tc.get("id", "")
+
+                print(f"  -> Tool call: {tool_name}({tool_args})")
+
+                handler = TOOL_DISPATCH.get(tool_name)
+                tool_result = handler(**tool_args) if handler else json.dumps({"error": f"Unknown tool: {tool_name}"})
+
+                print(f"  <- Result: {tool_result[:120]}...")
+
+                # Feed the result back as a "tool" role message.
+                messages.append({
+                    "role": "tool",
+                    "tool_call_id": call_id,
+                    "content": tool_result,
+                })
+            continue
+
+        # -- The model produced a regular text response --
+        content = result.chat_output.get("content", "")
+        print(f"\n  [Final answer received]")
+        return content
+
+    return "Agent reached maximum iterations without a final answer."
+```
+
+## Step 6: Run the Agent
+
+```python
+if __name__ == "__main__":
+    queries = [
+        "What does my portfolio look like right now? What's the total value?",
+        "Which of my holdings has the highest risk? Should I rebalance?",
+    ]
+
+    for query in queries:
+        print("\n" + "=" * 70)
+        print(f"USER: {query}")
+        print("=" * 70)
+        answer = run_agent(client, query)
+        print(f"\nASSISTANT: {answer}")
+```
+
+Every LLM call in the loop above was TEE-verified and settled on-chain. The tool
+executions ran locally, but the model's reasoning about *when* and *how* to call
+tools was cryptographically attested.
+
+## Complete Code
+
+```python
+"""Tool-Calling Agent with Verified Reasoning -- complete working example."""
+
+import json
+import os
+import sys
+
+import opengradient as og
+
+# ── Initialize ────────────────────────────────────────────────────────────
+private_key = os.environ.get("OG_PRIVATE_KEY")
+if not private_key:
+    print("Error: set the OG_PRIVATE_KEY environment variable.")
+    sys.exit(1)
+
+client = og.init(private_key=private_key)
+
+# ── Mock data ─────────────────────────────────────────────────────────────
+PORTFOLIO      = {"ETH": {"amount": 5.0, "avg_cost": 1950.00},
+                  "BTC": {"amount": 0.25, "avg_cost": 42000.00},
+                  "SOL": {"amount": 100, "avg_cost": 95.00}}
+CURRENT_PRICES = {"ETH": 2120.50, "BTC": 67250.00, "SOL": 148.30}
+VOLATILITY     = {"ETH": 0.65,    "BTC": 0.55,     "SOL": 0.85}
+
+def get_portfolio() -> str:
+    rows = [{"token": t, "amount": v["amount"], "avg_cost_usd": v["avg_cost"]}
+            for t, v in PORTFOLIO.items()]
+    return json.dumps(rows, indent=2)
+
+def get_price(token: str) -> str:
+    token = token.upper()
+    price = CURRENT_PRICES.get(token)
+    return json.dumps({"error": f"Unknown token: {token}"}) if price is None else json.dumps({"token": token, "price_usd": price})
+
+def calculate_risk(token: str) -> str:
+    token = token.upper()
+    vol = VOLATILITY.get(token)
+    if vol is None:
+        return json.dumps({"error": f"Unknown token: {token}"})
+    holding, price = PORTFOLIO.get(token), CURRENT_PRICES.get(token)
+    pv = holding["amount"] * price if holding and price else 0
+    return json.dumps({"token": token, "annualized_volatility": f"{vol:.0%}",
+                       "position_value_usd": round(pv, 2),
+                       "daily_var_95_usd": round(pv * (vol / 252**0.5) * 1.645, 2)})
+
+TOOL_DISPATCH = {
+    "get_portfolio":  lambda **kw: get_portfolio(),
+    "get_price":      lambda **kw: get_price(kw["token"]),
+    "calculate_risk": lambda **kw: calculate_risk(kw["token"]),
+}
+
+# ── Tool definitions ──────────────────────────────────────────────────────
+TOOLS = [
+    {"type": "function", "function": {"name": "get_portfolio",
+        "description": "Returns the user's crypto portfolio holdings.",
+        "parameters": {"type": "object", "properties": {}, "required": []}}},
+    {"type": "function", "function": {"name": "get_price",
+        "description": "Returns the current USD price for a cryptocurrency.",
+        "parameters": {"type": "object", "properties": {
+            "token": {"type": "string", "description": "Token ticker, e.g. ETH."}},
+            "required": ["token"]}}},
+    {"type": "function", "function": {"name": "calculate_risk",
+        "description": "Calculates risk metrics: volatility, position value, and daily VaR.",
+        "parameters": {"type": "object", "properties": {
+            "token": {"type": "string", "description": "Token ticker, e.g. ETH."}},
+            "required": ["token"]}}},
+]
+
+# ── Agent loop ────────────────────────────────────────────────────────────
+def run_agent(user_query: str) -> str:
+    messages = [
+        {"role": "system", "content": "You are a crypto portfolio assistant. Use tools to look up data. Be concise."},
+        {"role": "user", "content": user_query},
+    ]
+    for i in range(5):
+        try:
+            result = client.llm.chat(
+                model=og.TEE_LLM.GPT_4O, messages=messages, max_tokens=600,
+                temperature=0.0, tools=TOOLS, tool_choice="auto",
+                x402_settlement_mode=og.x402SettlementMode.SETTLE_BATCH,
+            )
+        except Exception as e:
+            return f"Error: {e}"
+
+        if result.finish_reason == "tool_calls":
+            messages.append(result.chat_output)
+            for tc in result.chat_output.get("tool_calls", []):
+                func = tc.get("function", tc)
+                name, args = func["name"], json.loads(func.get("arguments", "{}"))
+                handler = TOOL_DISPATCH.get(name)
+                messages.append({"role": "tool", "tool_call_id": tc.get("id", ""),
+                                 "content": handler(**args) if handler else f'{{"error": "unknown tool"}}'})
+            continue
+        return result.chat_output.get("content", "")
+    return "Max iterations reached."
+
+# ── Run ───────────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    for q in ["What's my portfolio worth?", "Which holding has the highest risk?"]:
+        print(f"\nUSER: {q}")
+        print(f"ASSISTANT: {run_agent(q)}")
+```
+
+## Next Steps
+
+- **Add on-chain model tools**: See **Tutorial 1** for wrapping ONNX models as
+  LangChain tools with `create_run_model_tool`, giving the agent access to on-chain
+  ML predictions alongside local function calls.
+- **Stream tool-calling responses**: Pass `stream=True` to get incremental tokens
+  even during multi-turn tool loops. See **Tutorial 2** for streaming basics.
+- **Use different providers**: Swap `og.TEE_LLM.GPT_4O` for `CLAUDE_3_7_SONNET` or
+  `GEMINI_2_5_FLASH` -- tool calling works across all providers.
+- **Add settlement transparency**: Switch to `SETTLE_METADATA` to store the full
+  tool-calling reasoning chain on-chain for audit purposes.