TUTORIAL
Nov 13, 2025
10 min read

Integrating TOON in Your AI Application

Complete guide with code examples for Python, Node.js, and LangChain. From basic parsing to production RAG systems.

Quick Start: 5-Minute Integration

Step 1: Convert JSON to TOON

Use our online converter or implement the conversion logic:

Python
import json

def json_to_toon(data):
    """Convert JSON array to TOON format"""
    if not isinstance(data, list) or len(data) == 0:
        raise ValueError("Input must be non-empty array")
    
    # Extract schema from first object
    schema = list(data[0].keys())
    
    # Build TOON string
    toon = "TOON.schema: " + " | ".join(schema) + "\n"
    
    for obj in data:
        values = [str(obj.get(key, "")) for key in schema]
        toon += " | ".join(values) + "\n"
    
    return toon

# Example usage
users = [
    {"id": 1, "name": "Alice", "email": "alice@example.com"},
    {"id": 2, "name": "Bob", "email": "bob@example.com"}
]

toon_output = json_to_toon(users)
print(toon_output)
Output:
TOON.schema: id | name | email
1 | Alice | alice@example.com
2 | Bob | bob@example.com

Step 2: Send to LLM

Python
from openai import OpenAI

client = OpenAI(api_key="your-api-key")

# Get database records
users = fetch_users_from_db()  # Returns list of dicts

# Convert to TOON
toon_data = json_to_toon(users)

# Send to GPT
response = client.chat.completions.create(
    model="gpt-4",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": f"Here is user data in TOON format:\n{toon_data}\n\nWho are the users from California?"}
    ]
)

print(response.choices[0].message.content)

Node.js Implementation

TOON Parser (TypeScript)

TypeScript
export interface ToonData {
  schema: string[];
  rows: string[][];
}

export function jsonToToon(data: any[]): string {
  if (!Array.isArray(data) || data.length === 0) {
    throw new Error('Input must be non-empty array');
  }

  // Extract schema
  const schema = Object.keys(data[0]);
  
  // Build TOON
  let toon = `TOON.schema: ${schema.join(' | ')}\n`;
  
  for (const obj of data) {
    const values = schema.map(key => String(obj[key] ?? ''));
    toon += values.join(' | ') + '\n';
  }
  
  return toon;
}

export function parseToon(toonStr: string): ToonData {
  const lines = toonStr.trim().split('\n');
  
  // Parse schema
  const schemaLine = lines[0];
  if (!schemaLine.startsWith('TOON.schema:')) {
    throw new Error('Invalid TOON format');
  }
  
  const schema = schemaLine
    .replace('TOON.schema:', '')
    .split('|')
    .map(s => s.trim());
  
  // Parse rows
  const rows = lines.slice(1).map(line =>
    line.split('|').map(s => s.trim())
  );
  
  return { schema, rows };
}

// Convert back to JSON
export function toonToJson(toonStr: string): any[] {
  const { schema, rows } = parseToon(toonStr);
  
  return rows.map(row => {
    const obj: any = {};
    schema.forEach((key, i) => {
      obj[key] = row[i];
    });
    return obj;
  });
}

Usage Example

TypeScript
import OpenAI from 'openai';
import { jsonToToon, toonToJson } from './toon-parser';

const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });

async function queryWithToon(data: any[], query: string) {
  // Convert to TOON
  const toonData = jsonToToon(data);
  
  // Send to GPT
  const response = await openai.chat.completions.create({
    model: 'gpt-4',
    messages: [
      { role: 'system', content: 'You are a helpful assistant.' },
      { role: 'user', content: `Data (TOON format):\n${toonData}\n\nQuery: ${query}` }
    ]
  });
  
  return response.choices[0].message.content;
}

// Usage
const products = [
  { id: 1, name: 'Laptop', price: 999 },
  { id: 2, name: 'Mouse', price: 29 }
];

const answer = await queryWithToon(
  products,
  'Which products are under $50?'
);
console.log(answer);

LangChain Integration

Custom Document Loader

Python
from langchain.schema import Document
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

def create_toon_documents(data, chunk_size=50):
    """Split large dataset into TOON chunks"""
    documents = []
    
    for i in range(0, len(data), chunk_size):
        chunk = data[i:i + chunk_size]
        toon_text = json_to_toon(chunk)
        
        doc = Document(
            page_content=toon_text,
            metadata={"format": "toon", "rows": len(chunk)}
        )
        documents.append(doc)
    
    return documents

# Example: RAG with TOON
products = fetch_products_from_db()  # 10,000 products

# Convert to TOON documents
toon_docs = create_toon_documents(products, chunk_size=100)

# Create vector store
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(toon_docs, embeddings)

# Create QA chain
llm = ChatOpenAI(model="gpt-4", temperature=0)
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(search_kwargs={"k": 3})
)

# Query
result = qa_chain.run("What are the top-rated laptops under $1000?")
print(result)

Why this works: By chunking large datasets into 100-row TOON documents, we get better retrieval than JSON (less tokens = more relevant chunks fit in context).

Production Best Practices

1. Error Handling

Python
def safe_json_to_toon(data):
    try:
        if not isinstance(data, list):
            raise ValueError("Input must be list")
        
        if len(data) == 0:
            return "TOON.schema: empty\n"
        
        # Check schema consistency
        schema = set(data[0].keys())
        for obj in data[1:]:
            if set(obj.keys()) != schema:
                raise ValueError("Inconsistent schema")
        
        return json_to_toon(data)
    
    except Exception as e:
        logger.error(f"TOON conversion failed: {e}")
        # Fallback to JSON
        return json.dumps(data)

2. Handling Null Values

Python
def json_to_toon_with_nulls(data):
    schema = list(data[0].keys())
    toon = "TOON.schema: " + " | ".join(schema) + "\n"
    
    for obj in data:
        values = []
        for key in schema:
            value = obj.get(key)
            # Represent null as empty string or special marker
            values.append(str(value) if value is not None else "NULL")
        toon += " | ".join(values) + "\n"
    
    return toon

3. Escaping Pipe Characters

If your data contains | characters, escape them:

Python
def escape_toon_value(value):
    return str(value).replace("|", "\\|")

def json_to_toon_safe(data):
    schema = list(data[0].keys())
    toon = "TOON.schema: " + " | ".join(schema) + "\n"
    
    for obj in data:
        values = [escape_toon_value(obj.get(key, "")) for key in schema]
        toon += " | ".join(values) + "\n"
    
    return toon

4. Caching TOON Conversions

If you query the same dataset repeatedly, cache the TOON conversion:

Python
from functools import lru_cache
import hashlib
import json

@lru_cache(maxsize=100)
def cached_json_to_toon(data_hash):
    # Look up original data by hash
    data = get_data_from_cache(data_hash)
    return json_to_toon(data)

def get_toon_cached(data):
    # Hash the data
    data_json = json.dumps(data, sort_keys=True)
    data_hash = hashlib.md5(data_json.encode()).hexdigest()
    
    return cached_json_to_toon(data_hash)

Testing Your Integration

Unit Tests

Python (pytest)
import pytest
from toon_converter import json_to_toon, toon_to_json

def test_basic_conversion():
    data = [
        {"id": 1, "name": "Alice"},
        {"id": 2, "name": "Bob"}
    ]
    
    toon = json_to_toon(data)
    assert "TOON.schema:" in toon
    assert "Alice" in toon
    assert "Bob" in toon

def test_roundtrip():
    original = [{"id": 1, "name": "Alice", "age": 30}]
    
    toon = json_to_toon(original)
    reconstructed = toon_to_json(toon)
    
    assert reconstructed == original

def test_empty_array():
    with pytest.raises(ValueError):
        json_to_toon([])

def test_null_values():
    data = [{"id": 1, "name": None}]
    toon = json_to_toon(data)
    assert "NULL" in toon or toon.endswith(" | \n")

Performance Optimization

Benchmark: TOON vs JSON Token Count

Python
import tiktoken

def count_tokens(text, model="gpt-4"):
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(text))

# Compare formats
data = fetch_large_dataset()  # 1000 rows

json_text = json.dumps(data)
toon_text = json_to_toon(data)

json_tokens = count_tokens(json_text)
toon_tokens = count_tokens(toon_text)

print(f"JSON: {json_tokens} tokens")
print(f"TOON: {toon_tokens} tokens")
print(f"Savings: {((json_tokens - toon_tokens) / json_tokens * 100):.1f}%")
Expected output for 1000-row dataset:
JSON: 42,300 tokens
TOON: 18,900 tokens
Savings: 55.3%

Need to Convert Data First?

Use our online converter to test TOON with your actual data

TRY_CONVERTER