mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-26 05:59:52 +00:00
small fix
This commit is contained in:
@@ -43,7 +43,7 @@ Usage:
|
|||||||
pip install transformers torch qwen-vl-utils accelerate
|
pip install transformers torch qwen-vl-utils accelerate
|
||||||
|
|
||||||
# Annotate and push to hub:
|
# Annotate and push to hub:
|
||||||
python subtask_annotation_local.py \\
|
python subtask_annotation.py \\
|
||||||
--repo-id pepijn223/mydataset \\
|
--repo-id pepijn223/mydataset \\
|
||||||
--subtasks "reach,grasp,lift,place" \\
|
--subtasks "reach,grasp,lift,place" \\
|
||||||
--video-key observation.images.base \\
|
--video-key observation.images.base \\
|
||||||
@@ -65,7 +65,7 @@ from rich.console import Console
|
|||||||
from rich.panel import Panel
|
from rich.panel import Panel
|
||||||
from rich.progress import Progress, SpinnerColumn, TextColumn
|
from rich.progress import Progress, SpinnerColumn, TextColumn
|
||||||
from rich.tree import Tree
|
from rich.tree import Tree
|
||||||
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
|
from transformers import Qwen3VLMoeForConditionalGeneration, AutoProcessor
|
||||||
|
|
||||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||||
|
|
||||||
@@ -158,7 +158,7 @@ class VideoAnnotator:
|
|||||||
self.console.print(f"[cyan]Loading model: {model_name}...[/cyan]")
|
self.console.print(f"[cyan]Loading model: {model_name}...[/cyan]")
|
||||||
|
|
||||||
# Load model and processor
|
# Load model and processor
|
||||||
self.model = Qwen2VLForConditionalGeneration.from_pretrained(
|
self.model = Qwen3VLMoeForConditionalGeneration.from_pretrained(
|
||||||
model_name,
|
model_name,
|
||||||
torch_dtype=torch_dtype,
|
torch_dtype=torch_dtype,
|
||||||
device_map=device,
|
device_map=device,
|
||||||
@@ -743,15 +743,15 @@ def main():
|
|||||||
epilog="""
|
epilog="""
|
||||||
Examples:
|
Examples:
|
||||||
# List available cameras:
|
# List available cameras:
|
||||||
python subtask_annotation_local.py --repo-id pepijn223/mydataset --subtasks "reach,grasp" --max-episodes 0
|
python subtask_annotation.py --repo-id pepijn223/mydataset --subtasks "reach,grasp" --max-episodes 0
|
||||||
|
|
||||||
# Annotate with specific camera:
|
# Annotate with specific camera:
|
||||||
python subtask_annotation_local.py --repo-id pepijn223/mydataset --subtasks "reach,grasp" --video-key observation.images.top --push-to-hub
|
python subtask_annotation.py --repo-id pepijn223/mydataset --subtasks "reach,grasp" --video-key observation.images.top --push-to-hub
|
||||||
|
|
||||||
# Use smaller model (7B instead of 30B):
|
# Use custom model:
|
||||||
python subtask_annotation_local.py --repo-id pepijn223/mydataset --subtasks "reach,grasp" --video-key observation.images.top --model Qwen/Qwen2-VL-7B-Instruct --push-to-hub
|
python subtask_annotation.py --repo-id pepijn223/mydataset --subtasks "reach,grasp" --video-key observation.images.top --model Qwen/Qwen3-VL-30B-A3B-Instruct --push-to-hub
|
||||||
|
|
||||||
Note: The 7B model requires ~16GB VRAM. Use 2B model (~8GB VRAM) if needed.
|
Note: The 30B model requires ~60GB VRAM. Make sure you have sufficient GPU memory.
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@@ -783,7 +783,7 @@ Note: The 7B model requires ~16GB VRAM. Use 2B model (~8GB VRAM) if needed.
|
|||||||
"--model",
|
"--model",
|
||||||
type=str,
|
type=str,
|
||||||
default="Qwen/Qwen3-VL-30B-A3B-Instruct",
|
default="Qwen/Qwen3-VL-30B-A3B-Instruct",
|
||||||
help="Model to use (default: Qwen/Qwen3-VL-30B-A3B-Instruct). Other options: Qwen/Qwen2-VL-2B-Instruct, Qwen/Qwen2-VL-7B-Instruct",
|
help="Qwen3-VL model to use (default: Qwen/Qwen3-VL-30B-A3B-Instruct)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--skip-existing",
|
"--skip-existing",
|
||||||
|
|||||||
Reference in New Issue
Block a user