Rate Limiting AI API Calls in Node.js with Bottleneck

Rate limiting is critical for AI APIs. Here’s a robust implementation:

import Bottleneck from 'bottleneck';

const limiter = new Bottleneck({
  reservoir: 60,           // 60 requests
  reservoirRefreshAmount: 60,
  reservoirRefreshInterval: 60 * 1000, // per minute
  maxConcurrent: 5,
  minTime: 100             // 100ms between requests
});

// Wrap OpenAI calls
const rateLimitedChat = limiter.wrap(async (prompt) => {
  return openai.chat.completions.create({
    model: 'gpt-4',
    messages: [{ role: 'user', content: prompt }]
  });
});

// Use with automatic queuing
const results = await Promise.all(
  prompts.map(p => rateLimitedChat(p))
);

Exponential Backoff

async function withRetry(fn, maxRetries = 3) {
  for (let i = 0; i < maxRetries; i++) {
    try {
      return await fn();
    } catch (e) {
      if (e.status === 429 && i < maxRetries - 1) {
        await new Promise(r => setTimeout(r, Math.pow(2, i) * 1000));
      } else throw e;
    }
  }
}

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.