Files
gh-balloob-llm-skills-smart…/skills/ollama/references/nodejs_api.md
2025-11-29 17:59:39 +08:00

12 KiB

Ollama Node.js API Reference

This reference provides comprehensive examples for integrating Ollama into Node.js projects using the official ollama npm package.

IMPORTANT: Always use streaming responses for better user experience.

Table of Contents

  1. Package Setup
  2. Installation & Setup
  3. Verifying Ollama Connection
  4. Model Selection
  5. Generate API (Text Completion)
  6. Chat API (Conversational)
  7. Embeddings
  8. Error Handling

Package Setup

ES Modules (package.json)

When creating Node.js scripts for users, always use ES modules. Create a package.json with:

{
  "type": "module",
  "dependencies": {
    "ollama": "^0.5.0"
  }
}

This allows using modern import syntax instead of require.

Running Scripts

# Install dependencies
npm install

# Run script
node script.js

Installation & Setup

Installation

npm install ollama

Import

import { Ollama } from 'ollama';

Configuration

IMPORTANT: Always ask users for their Ollama URL. Do not assume localhost.

import { Ollama } from 'ollama';

// Create client with custom URL
const ollama = new Ollama({ host: 'http://localhost:11434' });

// Or for remote Ollama instance
// const ollama = new Ollama({ host: 'http://192.168.1.100:11434' });

Verifying Ollama Connection

Check Connection (Development)

During development, verify Ollama is running and check available models using curl:

# Check Ollama is running and get version
curl http://localhost:11434/api/version

# List available models
curl http://localhost:11434/api/tags

Check Ollama Version (Node.js)

import { Ollama } from 'ollama';

const ollama = new Ollama();

async function checkOllama() {
  try {
    // Simple way to verify connection
    const models = await ollama.list();
    console.log('✓ Connected to Ollama');
    console.log(`  Available models: ${models.models.length}`);
    return true;
  } catch (error) {
    console.log(`✗ Failed to connect to Ollama: ${error.message}`);
    return false;
  }
}

// Usage
await checkOllama();

Model Selection

IMPORTANT: Always ask users which model they want to use. Don't assume a default.

Listing Available Models

import { Ollama } from 'ollama';

const ollama = new Ollama();

async function listAvailableModels() {
  const { models } = await ollama.list();
  return models.map(m => m.name);
}

// Usage - show available models to user
const available = await listAvailableModels();
console.log('Available models:');
available.forEach(model => {
  console.log(`  - ${model}`);
});

Finding Models

If the user doesn't have a model installed or wants to use a different one:

  • Browse models: Direct them to https://ollama.com/search
  • Popular choices: llama3.2, llama3.1, mistral, phi3, qwen2.5
  • Specialized models: codellama (coding), llava (vision), nomic-embed-text (embeddings)

Model Selection Flow

async function selectModel() {
  const available = await listAvailableModels();

  if (available.length === 0) {
    console.log('No models installed!');
    console.log('Visit https://ollama.com/search to find models');
    console.log('Then run: ollama pull <model-name>');
    return null;
  }

  console.log('Available models:');
  available.forEach((model, i) => {
    console.log(`  ${i + 1}. ${model}`);
  });

  // In practice, you'd ask the user to choose
  return available[0];  // Default to first available
}

Generate API (Text Completion)

Streaming Text Generation

import { Ollama } from 'ollama';

const ollama = new Ollama();

async function generateStream(prompt, model = 'llama3.2') {
  const response = await ollama.generate({
    model: model,
    prompt: prompt,
    stream: true
  });

  for await (const chunk of response) {
    process.stdout.write(chunk.response);
  }
}

// Usage
process.stdout.write('Response: ');
await generateStream('Why is the sky blue?', 'llama3.2');
process.stdout.write('\n');

With Options (Temperature, Top-P, etc.)

async function generateWithOptions(prompt, model = 'llama3.2') {
  const response = await ollama.generate({
    model: model,
    prompt: prompt,
    stream: true,
    options: {
      temperature: 0.7,
      top_p: 0.9,
      top_k: 40,
      num_predict: 100  // Max tokens
    }
  });

  for await (const chunk of response) {
    process.stdout.write(chunk.response);
  }
}

// Usage
process.stdout.write('Response: ');
await generateWithOptions('Write a haiku about programming');
process.stdout.write('\n');

Chat API (Conversational)

Streaming Chat

import { Ollama } from 'ollama';

const ollama = new Ollama();

async function chatStream(messages, model = 'llama3.2') {
  /*
   * Chat with a model using conversation history with streaming.
   *
   * Args:
   *   messages: Array of message objects with 'role' and 'content'
   *            role can be 'system', 'user', or 'assistant'
   */
  const response = await ollama.chat({
    model: model,
    messages: messages,
    stream: true
  });

  for await (const chunk of response) {
    process.stdout.write(chunk.message.content);
  }
}

// Usage
const messages = [
  { role: 'system', content: 'You are a helpful assistant.' },
  { role: 'user', content: 'What is the capital of France?' }
];

process.stdout.write('Response: ');
await chatStream(messages);
process.stdout.write('\n');

Multi-turn Conversation

import * as readline from 'readline';

async function conversationLoop(model = 'llama3.2') {
  const rl = readline.createInterface({
    input: process.stdin,
    output: process.stdout
  });

  const messages = [
    { role: 'system', content: 'You are a helpful assistant.' }
  ];

  const askQuestion = () => {
    rl.question('\nYou: ', async (input) => {
      if (input.toLowerCase() === 'exit' || input.toLowerCase() === 'quit') {
        rl.close();
        return;
      }

      // Add user message
      messages.push({ role: 'user', content: input });

      // Stream response
      process.stdout.write('Assistant: ');
      let fullResponse = '';

      const response = await ollama.chat({
        model: model,
        messages: messages,
        stream: true
      });

      for await (const chunk of response) {
        const content = chunk.message.content;
        process.stdout.write(content);
        fullResponse += content;
      }
      process.stdout.write('\n');

      // Add assistant response to history
      messages.push({ role: 'assistant', content: fullResponse });

      askQuestion();
    });
  };

  askQuestion();
}

// Usage
await conversationLoop();

Embeddings

Generate Embeddings

import { Ollama } from 'ollama';

const ollama = new Ollama();

async function getEmbeddings(text, model = 'nomic-embed-text') {
  /*
   * Generate embeddings for text.
   *
   * Note: Use an embedding-specific model like 'nomic-embed-text'
   * Regular models can generate embeddings, but dedicated models work better.
   */
  const response = await ollama.embeddings({
    model: model,
    prompt: text
  });

  return response.embedding;
}

// Usage
const embedding = await getEmbeddings('Hello, world!');
console.log(`Embedding dimension: ${embedding.length}`);
console.log(`First 5 values: ${embedding.slice(0, 5)}`);

Semantic Similarity

function cosineSimilarity(vec1, vec2) {
  const dotProduct = vec1.reduce((sum, val, i) => sum + val * vec2[i], 0);
  const magnitude1 = Math.sqrt(vec1.reduce((sum, val) => sum + val * val, 0));
  const magnitude2 = Math.sqrt(vec2.reduce((sum, val) => sum + val * val, 0));
  return dotProduct / (magnitude1 * magnitude2);
}

// Usage
const text1 = 'The cat sat on the mat';
const text2 = 'A feline rested on a rug';
const text3 = 'JavaScript is a programming language';

const emb1 = await getEmbeddings(text1);
const emb2 = await getEmbeddings(text2);
const emb3 = await getEmbeddings(text3);

console.log(`Similarity 1-2: ${cosineSimilarity(emb1, emb2).toFixed(3)}`);  // High
console.log(`Similarity 1-3: ${cosineSimilarity(emb1, emb3).toFixed(3)}`);  // Low

Error Handling

Comprehensive Error Handling

import { Ollama } from 'ollama';

const ollama = new Ollama();

async function* safeGenerateStream(prompt, model = 'llama3.2') {
  try {
    const response = await ollama.generate({
      model: model,
      prompt: prompt,
      stream: true
    });

    for await (const chunk of response) {
      yield chunk.response;
    }

  } catch (error) {
    // Model not found or other API errors
    if (error.message.toLowerCase().includes('not found')) {
      console.log(`\n✗ Model '${model}' not found`);
      console.log(`  Run: ollama pull ${model}`);
      console.log(`  Or browse models at: https://ollama.com/search`);
    } else if (error.code === 'ECONNREFUSED') {
      console.log('\n✗ Connection failed. Is Ollama running?');
      console.log('  Start Ollama with: ollama serve');
    } else {
      console.log(`\n✗ Unexpected error: ${error.message}`);
    }
  }
}

// Usage
process.stdout.write('Response: ');
for await (const token of safeGenerateStream('Hello, world!', 'llama3.2')) {
  process.stdout.write(token);
}
process.stdout.write('\n');

Checking Model Availability

async function ensureModelAvailable(model) {
  try {
    const { models } = await ollama.list();
    const modelNames = models.map(m => m.name);

    if (!modelNames.includes(model)) {
      console.log(`Model '${model}' not available locally`);
      console.log(`Available models: ${modelNames.join(', ')}`);
      console.log(`\nTo download: ollama pull ${model}`);
      console.log(`Browse models: https://ollama.com/search`);
      return false;
    }

    return true;

  } catch (error) {
    console.log(`Failed to check models: ${error.message}`);
    return false;
  }
}

// Usage
if (await ensureModelAvailable('llama3.2')) {
  // Proceed with using the model
}

Best Practices

  1. Always Use Streaming: Stream responses for better user experience
  2. Ask About Models: Don't assume models - ask users which model they want to use
  3. Verify Connection: Check Ollama connection during development with curl
  4. Error Handling: Handle model not found and connection errors gracefully
  5. Context Management: Manage conversation history to avoid token limits
  6. Model Selection: Direct users to https://ollama.com/search to find models
  7. Custom Hosts: Always ask users for their Ollama URL, don't assume localhost
  8. ES Modules: Use "type": "module" in package.json for modern import syntax

Complete Example Script

// script.js
import { Ollama } from 'ollama';

const ollama = new Ollama();

async function main() {
  const model = 'llama3.2';

  // Check connection
  try {
    await ollama.list();
  } catch (error) {
    console.log(`Error: Cannot connect to Ollama - ${error.message}`);
    console.log('Make sure Ollama is running: ollama serve');
    return;
  }

  // Stream a response
  console.log('Asking about JavaScript...\n');

  const response = await ollama.generate({
    model: model,
    prompt: 'Explain JavaScript in one sentence',
    stream: true
  });

  process.stdout.write('Response: ');
  for await (const chunk of response) {
    process.stdout.write(chunk.response);
  }
  process.stdout.write('\n');
}

main();

package.json

{
  "type": "module",
  "dependencies": {
    "ollama": "^0.5.0"
  }
}

Running

npm install
node script.js