chore(smolvla2-runtime): log first-tick resize so train/inference match is verifiable

Print one warning the first time the robot observation provider runs
through, showing live camera resolution and the dataset's training
resolution, plus whether we resized. Lets the operator confirm at a
glance that the visual prefix really is being fed at the same shape
the model saw at training — instead of guessing whether the resize
fired silently.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pepijn
2026-05-12 18:06:00 +02:00
parent ab5c1dc392
commit 398a8cf730
@@ -607,6 +607,7 @@ def _build_robot_observation_provider(
# tightly on visual features, goes out of distribution and the
# head's distribution at position 0 collapses to its dominant
# mode (a memorised ``\n``-only run in this checkpoint).
_resize_logged = {"done": False}
target_image_shapes: dict[str, tuple[int, int]] = {}
if ds_features:
for fkey, fmeta in ds_features.items():
@@ -658,11 +659,18 @@ def _build_robot_observation_provider(
if img.ndim != 3:
continue
cur_h, cur_w = img.shape[:2]
if not _resize_logged["done"]:
logger.warning(
"camera %s: live=%dx%d, training=%dx%d (resize=%s)",
cam_key, cur_h, cur_w, target_h, target_w,
"yes" if (cur_h, cur_w) != (target_h, target_w) else "no — already matched",
)
if (cur_h, cur_w) == (target_h, target_w):
continue
raw[cam_key] = _cv2.resize(
img, (target_w, target_h), interpolation=_cv2.INTER_AREA
)
_resize_logged["done"] = True
except Exception as exc: # noqa: BLE001
logger.warning("camera resize to dataset shape failed: %s", exc)