fix: document library language facet undercounts text documents (#1758)

This commit is contained in:
Afonso Coutinho
2026-06-03 05:28:38 +01:00
committed by GitHub
parent a096e872f5
commit 992866e167
2 changed files with 44 additions and 1 deletions

View File

@@ -15,6 +15,21 @@ from src.auth_helpers import get_current_user
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _aggregate_language_facets(lang_rows):
"""Sum document counts per display language for the library facet.
NULL-language and explicit "text" rows share the "text" bucket (the
language filter treats them as one), so they must be ADDED. The old dict
comprehension keyed both to "text", silently overwriting one group and
undercounting the facet versus what the filter actually returns.
"""
out = {}
for lang, cnt in lang_rows:
key = lang or "text"
out[key] = out.get(key, 0) + cnt
return out
from routes.document_helpers import ( from routes.document_helpers import (
DocumentCreate, DocumentUpdate, DocumentPatch, DocumentCreate, DocumentUpdate, DocumentPatch,
@@ -258,7 +273,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
) )
lang_q = _owner_session_filter(lang_q, user) lang_q = _owner_session_filter(lang_q, user)
lang_rows = lang_q.group_by(Document.language).all() lang_rows = lang_q.group_by(Document.language).all()
languages = {lang or "text": cnt for lang, cnt in lang_rows} languages = _aggregate_language_facets(lang_rows)
# Session count (owner-filtered) # Session count (owner-filtered)
sc_q = ( sc_q = (

View File

@@ -0,0 +1,28 @@
"""Library language facet must SUM NULL-language and "text" docs.
documents_library built the facet with {lang or "text": cnt ...}, so a
NULL-language row and an explicit "text" row both keyed "text" and one
silently overwrote the other. The language FILTER treats NULL and "text"
as a single bucket ((language == None) | (language == "text")), so the
facet count must add them, otherwise clicking the facet returns more docs
than the count promised.
"""
from routes.document_routes import _aggregate_language_facets
def test_null_and_text_are_summed():
rows = [(None, 3), ("text", 2), ("python", 5)]
assert _aggregate_language_facets(rows) == {"text": 5, "python": 5}
def test_only_null():
assert _aggregate_language_facets([(None, 4)]) == {"text": 4}
def test_distinct_languages_preserved():
rows = [("python", 2), ("javascript", 7), ("text", 1)]
assert _aggregate_language_facets(rows) == {"python": 2, "javascript": 7, "text": 1}
def test_empty():
assert _aggregate_language_facets([]) == {}