fix: document library language facet undercounts text documents (#1758)
This commit is contained in:
@@ -15,6 +15,21 @@ from src.auth_helpers import get_current_user
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _aggregate_language_facets(lang_rows):
|
||||||
|
"""Sum document counts per display language for the library facet.
|
||||||
|
|
||||||
|
NULL-language and explicit "text" rows share the "text" bucket (the
|
||||||
|
language filter treats them as one), so they must be ADDED. The old dict
|
||||||
|
comprehension keyed both to "text", silently overwriting one group and
|
||||||
|
undercounting the facet versus what the filter actually returns.
|
||||||
|
"""
|
||||||
|
out = {}
|
||||||
|
for lang, cnt in lang_rows:
|
||||||
|
key = lang or "text"
|
||||||
|
out[key] = out.get(key, 0) + cnt
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
from routes.document_helpers import (
|
from routes.document_helpers import (
|
||||||
DocumentCreate, DocumentUpdate, DocumentPatch,
|
DocumentCreate, DocumentUpdate, DocumentPatch,
|
||||||
@@ -258,7 +273,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
|
|||||||
)
|
)
|
||||||
lang_q = _owner_session_filter(lang_q, user)
|
lang_q = _owner_session_filter(lang_q, user)
|
||||||
lang_rows = lang_q.group_by(Document.language).all()
|
lang_rows = lang_q.group_by(Document.language).all()
|
||||||
languages = {lang or "text": cnt for lang, cnt in lang_rows}
|
languages = _aggregate_language_facets(lang_rows)
|
||||||
|
|
||||||
# Session count (owner-filtered)
|
# Session count (owner-filtered)
|
||||||
sc_q = (
|
sc_q = (
|
||||||
|
|||||||
28
tests/test_document_library_language_facet.py
Normal file
28
tests/test_document_library_language_facet.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
"""Library language facet must SUM NULL-language and "text" docs.
|
||||||
|
|
||||||
|
documents_library built the facet with {lang or "text": cnt ...}, so a
|
||||||
|
NULL-language row and an explicit "text" row both keyed "text" and one
|
||||||
|
silently overwrote the other. The language FILTER treats NULL and "text"
|
||||||
|
as a single bucket ((language == None) | (language == "text")), so the
|
||||||
|
facet count must add them, otherwise clicking the facet returns more docs
|
||||||
|
than the count promised.
|
||||||
|
"""
|
||||||
|
from routes.document_routes import _aggregate_language_facets
|
||||||
|
|
||||||
|
|
||||||
|
def test_null_and_text_are_summed():
|
||||||
|
rows = [(None, 3), ("text", 2), ("python", 5)]
|
||||||
|
assert _aggregate_language_facets(rows) == {"text": 5, "python": 5}
|
||||||
|
|
||||||
|
|
||||||
|
def test_only_null():
|
||||||
|
assert _aggregate_language_facets([(None, 4)]) == {"text": 4}
|
||||||
|
|
||||||
|
|
||||||
|
def test_distinct_languages_preserved():
|
||||||
|
rows = [("python", 2), ("javascript", 7), ("text", 1)]
|
||||||
|
assert _aggregate_language_facets(rows) == {"python": 2, "javascript": 7, "text": 1}
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty():
|
||||||
|
assert _aggregate_language_facets([]) == {}
|
||||||
Reference in New Issue
Block a user