mirror of
https://github.com/Tavish9/any4lerobot.git
synced 2026-06-26 15:46:58 +00:00
67091bc4a7
* Add generic converter adapter hooks Co-authored-by: Codex <codex@openai.com> * Require conversion task repo ids Co-authored-by: Codex <codex@openai.com> * Remove conversion task runtime repo id check Co-authored-by: Codex <codex@openai.com> * Apply suggestion from @gemini-code-assist[bot] Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --------- Co-authored-by: Codex <codex@openai.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
39 lines
865 B
Python
39 lines
865 B
Python
from collections.abc import Mapping, Sequence
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
TaskMetadata = Mapping[str, Any]
|
|
FeatureSpec = Mapping[str, dict]
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ConversionTask:
|
|
"""One independently convertible raw input file and adapter metadata."""
|
|
|
|
input_path: Path
|
|
output_path: Path
|
|
local_repo_id: str
|
|
metadata: TaskMetadata = field(default_factory=dict)
|
|
|
|
|
|
def setup_logger():
|
|
import sys
|
|
|
|
from datatrove.utils.logging import logger
|
|
|
|
logger.remove()
|
|
logger.add(sys.stdout, level="INFO", colorize=True)
|
|
return logger
|
|
|
|
|
|
def unique_strings(values: Sequence[str]) -> list[str]:
|
|
result = []
|
|
seen = set()
|
|
for value in values:
|
|
if value in seen:
|
|
continue
|
|
result.append(value)
|
|
seen.add(value)
|
|
return result
|