Streaming responses from LLMs provides a much better UX. Here’s how to implement it properly:
import OpenAI from 'openai';
import { Readable } from 'stream';
const openai = new OpenAI();
async function streamChat(prompt) {
const stream = await openai.chat.completions.create({
model: 'gpt-4',
messages: [{ role: 'user', content: prompt }],
stream: true
});
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content || '';
process.stdout.write(content);
}
}
await streamChat('Explain async iterators');
Express.js SSE Integration
app.get('/chat', async (req, res) => {
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
const stream = await openai.chat.completions.create({
model: 'gpt-4',
messages: [{ role: 'user', content: req.query.prompt }],
stream: true
});
for await (const chunk of stream) {
res.write(`data: ${JSON.stringify(chunk)}
`);
}
res.end();
});
