fix(datasets): allow zero-width features in get_hf_features_from_features

Setting a 1-D feature with shape=(0,) builds datasets.Sequence(length=0, ...),
which pyarrow rejects with ArrowInvalid: list_size needs to be a strict
positive integer when datasets.Dataset.from_dict(...) is called inside
save_episode. Use length=-1 (variable-length) for zero-width 1-D shapes.

Fixes the second half of #3654 (the first half is #3664, in compute_episode_stats).
This commit is contained in:
Mahbod
2026-05-26 14:22:14 +02:00
committed by CarolinePascal
parent 911734ec9c
commit fc262fbc06
2 changed files with 32 additions and 3 deletions
+3 -3
View File
@@ -67,9 +67,9 @@ def get_hf_features_from_features(features: dict) -> datasets.Features:
elif ft["shape"] == (1,):
hf_features[key] = datasets.Value(dtype=ft["dtype"])
elif len(ft["shape"]) == 1:
hf_features[key] = datasets.Sequence(
length=ft["shape"][0], feature=datasets.Value(dtype=ft["dtype"])
)
# pyarrow rejects fixed_size_list[0], so use a variable length list instead
length = ft["shape"][0] if ft["shape"][0] > 0 else -1
hf_features[key] = datasets.Sequence(length=length, feature=datasets.Value(dtype=ft["dtype"]))
elif len(ft["shape"]) == 2:
hf_features[key] = datasets.Array2D(shape=ft["shape"], dtype=ft["dtype"])
elif len(ft["shape"]) == 3:
+29
View File
@@ -0,0 +1,29 @@
#!/usr/bin/env python
# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])")
import datasets
from lerobot.datasets.feature_utils import get_hf_features_from_features
def test_get_hf_features_zero_width_feature_does_not_raise_on_from_dict():
features = {"empty": {"dtype": "float32", "shape": (0,), "names": ["empty"]}}
hf_features = get_hf_features_from_features(features)
datasets.Dataset.from_dict({"empty": [[], []]}, features=hf_features)