Adds LLM_PROVIDER=ollama for fully local, zero-cost inference via Ollama's OpenAI-compatible API. No API key required. Configurable base URL via OLLAMA_BASE_URL env var.
50 lines
1.4 KiB
JavaScript
50 lines
1.4 KiB
JavaScript
// Ollama Provider — raw fetch, no SDK
|
|
// Uses Ollama's OpenAI-compatible Chat Completions API
|
|
// No API key required — fully local inference
|
|
|
|
import { LLMProvider } from './provider.mjs';
|
|
|
|
export class OllamaProvider extends LLMProvider {
|
|
constructor(config) {
|
|
super(config);
|
|
this.name = 'ollama';
|
|
this.baseUrl = (config.baseUrl || 'http://localhost:11434').replace(/\/+$/, '');
|
|
this.model = config.model || 'llama3.1:8b';
|
|
}
|
|
|
|
get isConfigured() { return !!this.model; }
|
|
|
|
async complete(systemPrompt, userMessage, opts = {}) {
|
|
const res = await fetch(`${this.baseUrl}/v1/chat/completions`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
model: this.model,
|
|
max_tokens: opts.maxTokens || 4096,
|
|
messages: [
|
|
{ role: 'system', content: systemPrompt },
|
|
{ role: 'user', content: userMessage },
|
|
],
|
|
}),
|
|
signal: AbortSignal.timeout(opts.timeout || 120000),
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const err = await res.text().catch(() => '');
|
|
throw new Error(`Ollama API ${res.status}: ${err.substring(0, 200)}`);
|
|
}
|
|
|
|
const data = await res.json();
|
|
const text = data.choices?.[0]?.message?.content || '';
|
|
|
|
return {
|
|
text,
|
|
usage: {
|
|
inputTokens: data.usage?.prompt_tokens || 0,
|
|
outputTokens: data.usage?.completion_tokens || 0,
|
|
},
|
|
model: data.model || this.model,
|
|
};
|
|
}
|
|
}
|