Version: Next

Performance Optimization

Learn how to optimize your Apache AGE graph database queries and operations for maximum performance.

Query Optimization

Use Indexes Effectively

// Create indexes for frequently queried properties
await client.query()
  .raw('CREATE INDEX FOR (p:Person) ON (p.name)')
  .execute();

await client.query()
  .raw('CREATE INDEX FOR (p:Person) ON (p.email)')
  .execute();

// Composite indexes for multi-property queries
await client.query()
  .raw('CREATE INDEX FOR (p:Person) ON (p.age, p.city)')
  .execute();

Optimize MATCH Patterns

// ❌ Inefficient: No labels specified
const inefficient = await client.query()
  .match('(a)-[r]->(b)')
  .where({ 'a.name': 'Alice' })
  .return('b')
  .execute();

// ✅ Efficient: Labels specified
const efficient = await client.query()
  .match('(a:Person)-[r:KNOWS]->(b:Person)')
  .where({ 'a.name': 'Alice' })
  .return('b')
  .execute();

// ✅ Even better: Use indexed properties first
const optimized = await client.query()
  .match('(a:Person)')
  .where({ 'a.name': 'Alice' })  // Indexed property first
  .match('(a)-[r:KNOWS]->(b:Person)')
  .return('b')
  .execute();

Limit Result Sets

// Always use LIMIT for large result sets
const limitedResults = await client.query()
  .match('(p:Person)')
  .return('p')
  .orderBy('p.name')
  .limit(100)
  .execute();

// Use SKIP for pagination
const page2 = await client.query()
  .match('(p:Person)')
  .return('p')
  .orderBy('p.name')
  .skip(100)
  .limit(100)
  .execute();

Optimize Path Queries

// ❌ Inefficient: Unbounded path
const unbounded = await client.query()
  .match('(a:Person)-[:KNOWS*]->(b:Person)')
  .where({ 'a.name': 'Alice' })
  .return('b')
  .execute();

// ✅ Efficient: Bounded path
const bounded = await client.query()
  .match('(a:Person)-[:KNOWS*1..3]->(b:Person)')
  .where({ 'a.name': 'Alice' })
  .return('b')
  .execute();

// ✅ Even better: Use shortest path when appropriate
const shortestPath = await client.query()
  .match('path = shortestPath((a:Person)-[:KNOWS*]->(b:Person))')
  .where({ 'a.name': 'Alice', 'b.name': 'Bob' })
  .return('path')
  .execute();

Connection Pool Optimization

Configure Pool Settings

const optimizedClient = new AgeSchemaClient({
  connectionString: 'postgresql://user:pass@localhost:5432/graphdb',
  graphName: 'my_graph',
  poolConfig: {
    max: 20,                    // Maximum connections
    min: 5,                     // Minimum connections
    idleTimeoutMillis: 30000,   // Close idle connections after 30s
    connectionTimeoutMillis: 10000, // Timeout for new connections
    acquireTimeoutMillis: 60000,    // Timeout for acquiring connection
    createTimeoutMillis: 30000,     // Timeout for creating connection
    destroyTimeoutMillis: 5000,     // Timeout for destroying connection
    reapIntervalMillis: 1000,       // How often to check for idle connections
    createRetryIntervalMillis: 200, // Retry interval for failed connections
    propagateCreateError: false     // Don't fail fast on connection errors
  }
});

Monitor Pool Health

// Monitor connection pool metrics
function logPoolStats() {
  const pool = client.getPool();
  console.log('Pool Stats:', {
    totalCount: pool.totalCount,
    idleCount: pool.idleCount,
    waitingCount: pool.waitingCount
  });
}

// Set up periodic monitoring
setInterval(logPoolStats, 30000); // Every 30 seconds

Batch Operations

Optimize Batch Sizes

// Test different batch sizes to find optimal performance
async function findOptimalBatchSize(data: any[]) {
  const batchSizes = [100, 500, 1000, 2000, 5000];
  const results: { size: number; time: number }[] = [];
  
  for (const batchSize of batchSizes) {
    const startTime = Date.now();
    
    for (let i = 0; i < data.length; i += batchSize) {
      const batch = data.slice(i, i + batchSize);
      await client.createBatchLoader().load({
        vertices: batch,
        edges: []
      });
    }
    
    const endTime = Date.now();
    results.push({ size: batchSize, time: endTime - startTime });
    
    console.log(`Batch size ${batchSize}: ${endTime - startTime}ms`);
  }
  
  // Find optimal batch size
  const optimal = results.reduce((min, current) => 
    current.time < min.time ? current : min
  );
  
  console.log(`Optimal batch size: ${optimal.size} (${optimal.time}ms)`);
  return optimal.size;
}

Parallel Processing

// Process multiple batches in parallel
async function parallelBatchLoad(data: any[], batchSize: number = 1000) {
  const chunks = [];
  for (let i = 0; i < data.length; i += batchSize) {
    chunks.push(data.slice(i, i + batchSize));
  }
  
  // Process chunks in parallel (limit concurrency)
  const CONCURRENCY_LIMIT = 5;
  
  for (let i = 0; i < chunks.length; i += CONCURRENCY_LIMIT) {
    const parallelChunks = chunks.slice(i, i + CONCURRENCY_LIMIT);
    
    await Promise.all(
      parallelChunks.map(async (chunk) => {
        const batchLoader = client.createBatchLoader();
        await batchLoader.load({
          vertices: chunk,
          edges: []
        });
      })
    );
    
    console.log(`Processed ${Math.min(i + CONCURRENCY_LIMIT, chunks.length)} of ${chunks.length} batches`);
  }
}

Query Caching

Implement Query Result Caching

class QueryCache {
  private cache = new Map<string, { result: any; timestamp: number }>();
  private ttl = 5 * 60 * 1000; // 5 minutes

  private getCacheKey(query: string, params: any): string {
    return `${query}:${JSON.stringify(params)}`;
  }

  get(query: string, params: any): any | null {
    const key = this.getCacheKey(query, params);
    const cached = this.cache.get(key);
    
    if (cached && Date.now() - cached.timestamp < this.ttl) {
      return cached.result;
    }
    
    this.cache.delete(key);
    return null;
  }

  set(query: string, params: any, result: any): void {
    const key = this.getCacheKey(query, params);
    this.cache.set(key, { result, timestamp: Date.now() });
  }

  clear(): void {
    this.cache.clear();
  }
}

// Usage
const queryCache = new QueryCache();

async function cachedQuery(query: string, params: any = {}) {
  const cached = queryCache.get(query, params);
  if (cached) {
    console.log('Cache hit');
    return cached;
  }
  
  console.log('Cache miss - executing query');
  const result = await client.query()
    .raw(query)
    .setParams(params)
    .execute();
  
  queryCache.set(query, params, result);
  return result;
}

Memory Management

Optimize Large Result Sets

// Stream large result sets instead of loading all into memory
async function* streamLargeResults(query: string) {
  const BATCH_SIZE = 1000;
  let offset = 0;
  
  while (true) {
    const batch = await client.query()
      .raw(`${query} SKIP ${offset} LIMIT ${BATCH_SIZE}`)
      .execute();
    
    if (batch.length === 0) break;
    
    for (const row of batch) {
      yield row;
    }
    
    offset += BATCH_SIZE;
  }
}

// Usage
for await (const person of streamLargeResults('MATCH (p:Person) RETURN p')) {
  // Process one person at a time
  console.log(person);
}

Garbage Collection Optimization

// Force garbage collection periodically during large operations
async function loadWithGC(data: any[]) {
  const BATCH_SIZE = 1000;
  const GC_INTERVAL = 10; // Run GC every 10 batches
  
  for (let i = 0; i < data.length; i += BATCH_SIZE) {
    const batch = data.slice(i, i + BATCH_SIZE);
    
    await client.createBatchLoader().load({
      vertices: batch,
      edges: []
    });
    
    // Trigger garbage collection periodically
    if ((i / BATCH_SIZE) % GC_INTERVAL === 0 && global.gc) {
      global.gc();
      console.log(`GC triggered after batch ${i / BATCH_SIZE}`);
    }
  }
}

Database Configuration

PostgreSQL Optimization

-- Optimize PostgreSQL settings for graph workloads
-- Add to postgresql.conf

-- Memory settings
shared_buffers = '256MB'              -- 25% of RAM
effective_cache_size = '1GB'          -- 75% of RAM
work_mem = '16MB'                     -- Per operation memory
maintenance_work_mem = '64MB'         -- For maintenance operations

-- Connection settings
max_connections = 100                 -- Adjust based on your needs

-- Checkpoint settings
checkpoint_completion_target = 0.9
wal_buffers = '16MB'
checkpoint_timeout = '10min'

-- Query planner settings
random_page_cost = 1.1               -- For SSD storage
effective_io_concurrency = 200       -- For SSD storage

-- Logging (for performance analysis)
log_min_duration_statement = 1000    -- Log slow queries (>1s)
log_statement = 'none'               -- Don't log all statements
log_duration = off

Apache AGE Configuration

-- Load AGE extension with optimized settings
LOAD 'age';
SET search_path = ag_catalog, "$user", public;

-- Create graph with optimized settings
SELECT create_graph('optimized_graph');

-- Analyze tables regularly for better query plans
ANALYZE ag_catalog.ag_graph;
ANALYZE ag_catalog.ag_label;

Monitoring and Profiling

Query Performance Analysis

// Wrapper to measure query performance
async function profileQuery(queryFn: () => Promise<any>, label: string) {
  const startTime = process.hrtime.bigint();
  const startMemory = process.memoryUsage();
  
  try {
    const result = await queryFn();
    
    const endTime = process.hrtime.bigint();
    const endMemory = process.memoryUsage();
    
    const duration = Number(endTime - startTime) / 1000000; // Convert to ms
    const memoryDelta = endMemory.heapUsed - startMemory.heapUsed;
    
    console.log(`${label}:`);
    console.log(`  Duration: ${duration.toFixed(2)}ms`);
    console.log(`  Memory delta: ${(memoryDelta / 1024 / 1024).toFixed(2)}MB`);
    console.log(`  Result count: ${Array.isArray(result) ? result.length : 1}`);
    
    return result;
  } catch (error) {
    console.error(`${label} failed:`, error);
    throw error;
  }
}

// Usage
const result = await profileQuery(
  () => client.query()
    .match('(p:Person)-[:KNOWS]->(f:Person)')
    .return('p.name, f.name')
    .execute(),
  'Friends query'
);

System Resource Monitoring

// Monitor system resources during operations
class ResourceMonitor {
  private interval: NodeJS.Timeout | null = null;
  
  start(intervalMs: number = 5000) {
    this.interval = setInterval(() => {
      const memory = process.memoryUsage();
      const cpu = process.cpuUsage();
      
      console.log('Resource Usage:', {
        memory: {
          rss: `${(memory.rss / 1024 / 1024).toFixed(2)}MB`,
          heapUsed: `${(memory.heapUsed / 1024 / 1024).toFixed(2)}MB`,
          heapTotal: `${(memory.heapTotal / 1024 / 1024).toFixed(2)}MB`
        },
        cpu: {
          user: `${(cpu.user / 1000).toFixed(2)}ms`,
          system: `${(cpu.system / 1000).toFixed(2)}ms`
        }
      });
    }, intervalMs);
  }
  
  stop() {
    if (this.interval) {
      clearInterval(this.interval);
      this.interval = null;
    }
  }
}

// Usage
const monitor = new ResourceMonitor();
monitor.start();

// ... perform operations ...

monitor.stop();

Best Practices Summary

Always use labels in MATCH clauses
Create indexes for frequently queried properties
Limit result sets with LIMIT and SKIP
Bound path queries to prevent infinite traversals
Use appropriate batch sizes for bulk operations
Monitor connection pool health
Cache frequently used queries
Stream large result sets to manage memory
Profile queries to identify bottlenecks
Optimize PostgreSQL configuration for your workload

Next Steps

Bulk Loading - Efficient data loading strategies
Error Handling - Robust error management
Troubleshooting - Common issues and solutions

Query Optimization​

Use Indexes Effectively​

Optimize MATCH Patterns​

Limit Result Sets​

Optimize Path Queries​

Connection Pool Optimization​

Configure Pool Settings​

Monitor Pool Health​

Batch Operations​

Optimize Batch Sizes​

Parallel Processing​

Query Caching​

Implement Query Result Caching​

Memory Management​

Optimize Large Result Sets​

Garbage Collection Optimization​

Database Configuration​

PostgreSQL Optimization​

Apache AGE Configuration​

Monitoring and Profiling​

Query Performance Analysis​

System Resource Monitoring​

Best Practices Summary​

Next Steps​

Query Optimization

Use Indexes Effectively

Optimize MATCH Patterns

Limit Result Sets

Optimize Path Queries

Connection Pool Optimization

Configure Pool Settings

Monitor Pool Health

Batch Operations

Optimize Batch Sizes

Parallel Processing

Query Caching

Implement Query Result Caching

Memory Management

Optimize Large Result Sets

Garbage Collection Optimization

Database Configuration

PostgreSQL Optimization

Apache AGE Configuration

Monitoring and Profiling

Query Performance Analysis

System Resource Monitoring

Best Practices Summary

Next Steps