fix: respect configured LLM generation limits

2026-07-04 12:11:28 +02:00
parent ebe2906d1c
commit dda1d23a30
5 changed files with 58 additions and 2 deletions
--- a/.env.example
+++ b/.env.example
@@ -29,6 +29,7 @@ OPENROUTER_APP_NAME=Intelligence Terminal

 # Local OpenAI-compatible examples
 # LiteLLM: LLM_PROVIDER=litellm, LLM_BASE_URL=https://llm.example.com/v1, LLM_API_KEY=your-proxy-key, LLM_MODEL=your-model-alias
+# Local 20B+ models may need LLM_TIMEOUT_MS=300000 for full intelligence sweeps.
 # LM Studio: LLM_PROVIDER=lmstudio, LLM_BASE_URL=http://host.docker.internal:1234/v1, LLM_MODEL=local-model
 # Ollama: LLM_PROVIDER=ollama, LLM_BASE_URL=http://host.docker.internal:11434, LLM_MODEL=llama3.1:8b
 # Generic: LLM_PROVIDER=openai-compatible, LLM_BASE_URL=http://host.docker.internal:8000/v1, LLM_MODEL=your-model
--- a/README.md
+++ b/README.md
@@ -178,6 +178,7 @@ LLM_PROVIDER=litellm
 LLM_BASE_URL=https://llm.example.com/v1
 LLM_API_KEY=your-litellm-api-key
 LLM_MODEL=your-model-alias
+LLM_TIMEOUT_MS=300000

 # LM Studio
 LLM_PROVIDER=lmstudio
--- a/lib/llm/ideas.mjs
+++ b/lib/llm/ideas.mjs
@@ -43,7 +43,13 @@ Output ONLY valid JSON array. Each object:
 }`;

  try {
-    const result = await provider.complete(systemPrompt, context, { maxTokens: 4096, timeout: 90000 });
+    const maxTokens = Number.isFinite(provider.maxTokens) && provider.maxTokens > 0
+      ? provider.maxTokens
+      : 4096;
+    const timeout = Number.isFinite(provider.timeoutMs) && provider.timeoutMs > 0
+      ? provider.timeoutMs
+      : 90000;
+    const result = await provider.complete(systemPrompt, context, { maxTokens, timeout });
    const ideas = parseIdeasResponse(result.text);
    if (ideas && ideas.length > 0) {
      return ideas;
--- a/package.json
+++ b/package.json
@@ -12,7 +12,7 @@
    "brief:save": "node apis/save-briefing.mjs",
    "diag": "node diag.mjs",
    "test": "npm run test:unit",
-    "test:unit": "node --test test/llm-openrouter.test.mjs test/llm-ollama.test.mjs test/llm-openai-compatible.test.mjs test/llm-litellm.test.mjs test/fetch-utils.test.mjs test/reddit-source.test.mjs test/acled-source.test.mjs test/mojibake-text.test.mjs test/adsb.test.mjs test/dashboard-geotagging.test.mjs",
+    "test:unit": "node --test test/llm-openrouter.test.mjs test/llm-ollama.test.mjs test/llm-openai-compatible.test.mjs test/llm-litellm.test.mjs test/llm-ideas.test.mjs test/fetch-utils.test.mjs test/reddit-source.test.mjs test/acled-source.test.mjs test/mojibake-text.test.mjs test/adsb.test.mjs test/dashboard-geotagging.test.mjs",
    "compose:config": "docker compose config",
    "clean": "node scripts/clean.mjs",
    "fresh-start": "npm run clean && npm start"
--- a/test/llm-ideas.test.mjs
+++ b/test/llm-ideas.test.mjs
@@ -0,0 +1,48 @@
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { generateLLMIdeas } from '../lib/llm/ideas.mjs';
+
+const response = JSON.stringify([{
+  title: 'Test idea',
+  type: 'WATCH',
+  ticker: 'SPY',
+  confidence: 'LOW',
+  rationale: 'Test rationale',
+  risk: 'Test risk',
+  horizon: 'Days',
+  signals: ['test'],
+}]);
+
+test('idea generation respects provider token and timeout configuration', async () => {
+  let capturedOptions;
+  const provider = {
+    isConfigured: true,
+    maxTokens: 2000,
+    timeoutMs: 300000,
+    async complete(_systemPrompt, _context, options) {
+      capturedOptions = options;
+      return { text: response };
+    },
+  };
+
+  const ideas = await generateLLMIdeas(provider, {}, null, []);
+
+  assert.deepEqual(capturedOptions, { maxTokens: 2000, timeout: 300000 });
+  assert.equal(ideas.length, 1);
+  assert.equal(ideas[0].source, 'llm');
+});
+
+test('idea generation keeps safe defaults for providers without limits', async () => {
+  let capturedOptions;
+  const provider = {
+    isConfigured: true,
+    async complete(_systemPrompt, _context, options) {
+      capturedOptions = options;
+      return { text: response };
+    },
+  };
+
+  await generateLLMIdeas(provider, {}, null, []);
+
+  assert.deepEqual(capturedOptions, { maxTokens: 4096, timeout: 90000 });
+});