update docstring inside DatasetInfo.from_dict()

* sorts the unknown to have deterministic output

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Signed-off-by: Maxime Ellerbach <maxime@ellerbach.net>
This commit is contained in:
Maxime Ellerbach
2026-04-27 18:19:33 +02:00
parent 7fb32168dd
commit de05a00dd0
+4 -3
View File
@@ -162,11 +162,12 @@ class DatasetInfo:
def from_dict(cls, data: dict) -> "DatasetInfo":
"""Construct from a raw dict (e.g. loaded directly from JSON).
Unknown keys are silently ignored for forward compatibility with
datasets that carry additional fields (e.g. ``total_videos`` from v2.x).
Unknown keys are ignored for forward compatibility with datasets that
carry additional fields (e.g. ``total_videos`` from v2.x). A warning is
logged when such fields are present.
"""
known = {f.name for f in dataclasses.fields(cls)}
unknown = {k for k in data if k not in known}
unknown = sorted(k for k in data if k not in known)
if unknown:
logger.warning(f"Unknown fields in DatasetInfo: {unknown}. These will be ignored.")
return cls(**{k: v for k, v in data.items() if k in known})