Source code for mbodied.data.utils
# Copyright 2024 mbodi ai
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from datasets import Features, Image, Value
[docs]
def to_features(indict, image_keys=None, exclude_keys=None, prefix="") -> Features:
"""Convert a dictionary to a Datasets Features object.
Args:
indict (dict): The dictionary to convert.
image_keys (dict): A dictionary of keys that should be treated as images.
exclude_keys (set): A set of full-path-keys to exclude.
prefix (str): A prefix to add to the keys.
"""
if exclude_keys is None:
exclude_keys = set()
if image_keys is None:
image_keys = {}
if isinstance(indict, str):
return Value("string")
if isinstance(indict, int):
return Value("int32")
if isinstance(indict, float):
return Value("float32")
if isinstance(indict, np.int32):
return Value("int32")
if isinstance(indict, np.float32):
return Value("float32")
if isinstance(indict, list | tuple | np.ndarray):
if len(indict) == 0:
raise ValueError("Cannot infer schema from empty list")
return [to_features(indict[0])]
if isinstance(indict, dict):
out_dict = {}
for key, value in indict.items():
full_key = f"{prefix}.{key}" if prefix else key
if full_key in image_keys and full_key not in exclude_keys:
out_dict[key] = Image(decode=True)
elif full_key not in exclude_keys:
out_dict[key] = to_features(value, image_keys, exclude_keys, full_key)
return out_dict
raise ValueError(f"Cannot infer schema from {indict}")
[docs]
def infer_features(example) -> Features:
"""Infer Hugging Face Datasets Features from an example."""
return Features(to_features(example))