fix(research): validate session_id to block path traversal
Every research endpoint interpolates session_id into filesystem paths
(Path('data/deep_research') / f'{session_id}.json') without checking
for traversal sequences. A crafted ID like '../../data/auth' reaches
arbitrary JSON files — readable via research_detail (which also leaks
file paths in error messages), writable via research_archive, and
deletable via research_delete.
Add _validate_session_id() which rejects anything outside
[a-zA-Z0-9-]{1,128}. Called before filesystem access in all 12
endpoints that accept a session_id path parameter.
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
@@ -14,6 +15,8 @@ from pydantic import BaseModel, Field
|
||||
from src.endpoint_resolver import resolve_endpoint
|
||||
from src.auth_helpers import get_current_user
|
||||
|
||||
_SESSION_ID_RE = re.compile(r"^[a-zA-Z0-9-]{1,128}$")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Model-name substrings that are NOT chat/generation models — research must
|
||||
@@ -58,6 +61,10 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
|
||||
raise HTTPException(401, "Not authenticated")
|
||||
return user
|
||||
|
||||
def _validate_session_id(session_id: str) -> None:
|
||||
if not _SESSION_ID_RE.fullmatch(session_id):
|
||||
raise HTTPException(400, "Invalid session ID format")
|
||||
|
||||
def _owns_in_memory(session_id: str, user: str) -> bool:
|
||||
"""Ownership check for an in-flight (in-memory) research task.
|
||||
Falls back to the on-disk JSON if the task has already finished."""
|
||||
@@ -95,6 +102,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
|
||||
@router.get("/api/research/status/{session_id}")
|
||||
async def research_status(session_id: str, request: Request):
|
||||
user = _require_user(request)
|
||||
_validate_session_id(session_id)
|
||||
if not _owns_in_memory(session_id, user):
|
||||
raise HTTPException(404, "No research found for this session")
|
||||
status = research_handler.get_status(session_id)
|
||||
@@ -105,6 +113,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
|
||||
@router.post("/api/research/cancel/{session_id}")
|
||||
async def research_cancel(session_id: str, request: Request):
|
||||
user = _require_user(request)
|
||||
_validate_session_id(session_id)
|
||||
if not _owns_in_memory(session_id, user):
|
||||
raise HTTPException(404, "No research found for this session")
|
||||
cancelled = research_handler.cancel_research(session_id)
|
||||
@@ -113,6 +122,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
|
||||
@router.post("/api/research/result/{session_id}")
|
||||
async def research_result(session_id: str, request: Request):
|
||||
user = _require_user(request)
|
||||
_validate_session_id(session_id)
|
||||
if not _owns_in_memory(session_id, user):
|
||||
raise HTTPException(404, "No research result available")
|
||||
result = research_handler.get_result(session_id)
|
||||
@@ -140,6 +150,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
|
||||
async def research_report(session_id: str, request: Request):
|
||||
"""Serve the visual HTML report for a completed research session."""
|
||||
user = _require_user(request)
|
||||
_validate_session_id(session_id)
|
||||
_assert_owns_research(session_id, user)
|
||||
logger.info(f"Visual report requested for session {session_id}")
|
||||
try:
|
||||
@@ -160,6 +171,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
|
||||
"""Mark an image URL as hidden for this research's visual report.
|
||||
Persisted to the research JSON so subsequent /report renders skip it."""
|
||||
user = _require_user(request)
|
||||
_validate_session_id(session_id)
|
||||
_assert_owns_research(session_id, user)
|
||||
ok = research_handler.hide_image(session_id, body.url)
|
||||
if not ok:
|
||||
@@ -170,6 +182,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
|
||||
async def research_unhide_images(session_id: str, request: Request):
|
||||
"""Clear the hidden-images list for a research session."""
|
||||
user = _require_user(request)
|
||||
_validate_session_id(session_id)
|
||||
_assert_owns_research(session_id, user)
|
||||
ok = research_handler.unhide_all_images(session_id)
|
||||
if not ok:
|
||||
@@ -235,6 +248,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
|
||||
"""Return the full JSON for a single research result — sources,
|
||||
summary, stats — used by the Library preview panel."""
|
||||
user = _require_user(request)
|
||||
_validate_session_id(session_id)
|
||||
path = Path("data/deep_research") / f"{session_id}.json"
|
||||
if not path.exists():
|
||||
raise HTTPException(404, "Research not found")
|
||||
@@ -251,6 +265,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
|
||||
async def research_archive(session_id: str, request: Request, archived: bool = Query(True)):
|
||||
"""Soft-archive / restore a research report (sets `archived` in its JSON)."""
|
||||
user = _require_user(request)
|
||||
_validate_session_id(session_id)
|
||||
path = Path("data/deep_research") / f"{session_id}.json"
|
||||
if not path.exists():
|
||||
raise HTTPException(404, "Research not found")
|
||||
@@ -270,6 +285,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
|
||||
async def research_delete(session_id: str, request: Request):
|
||||
"""Delete a research result from disk."""
|
||||
user = _require_user(request)
|
||||
_validate_session_id(session_id)
|
||||
data_dir = Path("data/deep_research")
|
||||
json_path = data_dir / f"{session_id}.json"
|
||||
deleted = False
|
||||
@@ -413,6 +429,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
|
||||
async def research_stream(session_id: str, request: Request):
|
||||
"""SSE stream of research progress events."""
|
||||
user = _require_user(request)
|
||||
_validate_session_id(session_id)
|
||||
if not _owns_in_memory(session_id, user):
|
||||
raise HTTPException(404, "No research found for this session")
|
||||
async def _generate():
|
||||
@@ -446,6 +463,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
|
||||
async def research_result_peek(session_id: str, request: Request):
|
||||
"""Get research result without clearing it (for panel use)."""
|
||||
user = _require_user(request)
|
||||
_validate_session_id(session_id)
|
||||
if not _owns_in_memory(session_id, user):
|
||||
raise HTTPException(404, "No research found for this session")
|
||||
result = research_handler.get_result(session_id)
|
||||
@@ -475,6 +493,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
|
||||
the user can ask follow-up questions in a clean conversation.
|
||||
"""
|
||||
_require_user(request)
|
||||
_validate_session_id(session_id)
|
||||
if session_manager is None:
|
||||
raise HTTPException(500, "session_manager not configured")
|
||||
|
||||
|
||||
55
tests/test_research_session_id_validation.py
Normal file
55
tests/test_research_session_id_validation.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""Regression tests: research session_id must reject path-traversal sequences."""
|
||||
|
||||
import re
|
||||
import unittest
|
||||
|
||||
_SESSION_ID_RE = re.compile(r"^[a-zA-Z0-9-]{1,128}$")
|
||||
|
||||
|
||||
class TestResearchSessionIdValidation(unittest.TestCase):
|
||||
"""Validate the regex used to guard research session_id path params."""
|
||||
|
||||
def test_accepts_rp_prefixed_id(self):
|
||||
self.assertIsNotNone(_SESSION_ID_RE.fullmatch("rp-abc123def456"))
|
||||
|
||||
def test_accepts_standard_uuid(self):
|
||||
self.assertIsNotNone(
|
||||
_SESSION_ID_RE.fullmatch("550e8400-e29b-41d4-a716-446655440000")
|
||||
)
|
||||
|
||||
def test_accepts_custom_alphanumeric(self):
|
||||
self.assertIsNotNone(_SESSION_ID_RE.fullmatch("custom-id-123"))
|
||||
|
||||
def test_rejects_double_dot(self):
|
||||
self.assertIsNone(_SESSION_ID_RE.fullmatch(".."))
|
||||
|
||||
def test_rejects_single_dot(self):
|
||||
self.assertIsNone(_SESSION_ID_RE.fullmatch("."))
|
||||
|
||||
def test_rejects_dot_slash_traversal(self):
|
||||
self.assertIsNone(_SESSION_ID_RE.fullmatch("../../data/auth"))
|
||||
|
||||
def test_rejects_deep_traversal(self):
|
||||
self.assertIsNone(_SESSION_ID_RE.fullmatch("../../../etc/passwd"))
|
||||
|
||||
def test_rejects_mixed_traversal(self):
|
||||
self.assertIsNone(_SESSION_ID_RE.fullmatch("normal/../../traversal"))
|
||||
|
||||
def test_rejects_dot_prefix_traversal(self):
|
||||
self.assertIsNone(_SESSION_ID_RE.fullmatch("./../../secret"))
|
||||
|
||||
def test_rejects_empty(self):
|
||||
self.assertIsNone(_SESSION_ID_RE.fullmatch(""))
|
||||
|
||||
def test_rejects_whitespace(self):
|
||||
self.assertIsNone(_SESSION_ID_RE.fullmatch(" "))
|
||||
|
||||
def test_rejects_slash(self):
|
||||
self.assertIsNone(_SESSION_ID_RE.fullmatch("a/b"))
|
||||
|
||||
def test_rejects_null_byte(self):
|
||||
self.assertIsNone(_SESSION_ID_RE.fullmatch("rp-test\x00"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user