fix: personal-docs path confinement used abspath, allowing symlink escape (#1728)

_resolve_allowed_personal_dir confined a user-supplied path to PERSONAL_DIR with
os.path.abspath + os.path.commonpath. abspath normalises `..` but does NOT
resolve symlinks, so a symlink placed inside PERSONAL_DIR pointing outside it
passes the commonpath check and lets index_personal_documents read files outside
the root. Use os.path.realpath for both the base and the candidate so symlinks
are resolved before the confinement check.

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Mubashir R
2026-06-03 09:29:57 +05:00
committed by GitHub
parent 0e538ecd29
commit 4907b16d9b
2 changed files with 59 additions and 2 deletions

View File

@@ -69,9 +69,12 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
if not directory:
raise HTTPException(400, "Directory path is required")
base_abs = os.path.abspath(PERSONAL_DIR)
# realpath (not abspath) so a symlink inside PERSONAL_DIR that points
# outside it is resolved before the commonpath confinement check below;
# abspath only normalises `..` and would let such a symlink escape.
base_abs = os.path.realpath(PERSONAL_DIR)
candidate = directory if os.path.isabs(directory) else os.path.join(base_abs, directory)
resolved = os.path.abspath(candidate)
resolved = os.path.realpath(candidate)
try:
in_base = os.path.commonpath([resolved, base_abs]) == base_abs
except ValueError:

View File

@@ -0,0 +1,54 @@
"""Regression: _resolve_allowed_personal_dir must resolve symlinks (realpath)
when confining a path to PERSONAL_DIR.
It used os.path.abspath, which normalises ``..`` but does NOT resolve symlinks,
so a symlink placed inside PERSONAL_DIR pointing outside it passes the
os.path.commonpath confinement check and lets index_personal_documents read
files outside the root. os.path.realpath resolves the symlink before the check.
_resolve_allowed_personal_dir is a closure inside setup_personal_routes, so the
source-level test pins the fix and the behavioural test proves the underlying
confinement principle.
"""
import ast
import os
from pathlib import Path
SRC = Path(__file__).resolve().parent.parent / "routes" / "personal_routes.py"
def _function_source(src_text, name):
tree = ast.parse(src_text)
for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) and node.name == name:
return ast.get_source_segment(src_text, node)
raise AssertionError(f"{name} not found in {SRC}")
def test_confinement_uses_realpath_not_abspath():
body = _function_source(SRC.read_text(), "_resolve_allowed_personal_dir")
assert "os.path.realpath" in body, (
"_resolve_allowed_personal_dir must use os.path.realpath so a symlink "
"inside PERSONAL_DIR cannot escape the confinement check"
)
assert "os.path.abspath" not in body, (
"os.path.abspath does not resolve symlinks; the confinement check must "
"not rely on it"
)
def test_realpath_catches_symlink_escape(tmp_path):
# The principle the fix relies on: abspath keeps the symlink path inside the
# base (confinement fooled); realpath resolves it outside (confinement holds).
base = tmp_path / "personal"
base.mkdir()
outside = tmp_path / "outside"
outside.mkdir()
link = base / "escape"
os.symlink(outside, link)
base_abs = os.path.realpath(base) # base itself may live under a symlinked tmp
# abspath: the symlink still looks inside base -> escape not detected
assert os.path.commonpath([os.path.abspath(base / "escape"), os.path.abspath(base)]) == os.path.abspath(base)
# realpath: the symlink resolves to `outside` -> escape detected
assert os.path.commonpath([os.path.realpath(link), base_abs]) != base_abs