diff --git a/tests/annotations/test_modules.py b/tests/annotations/test_modules.py
index 7e7ecace3..5b3535779 100644
--- a/tests/annotations/test_modules.py
+++ b/tests/annotations/test_modules.py
@@ -126,12 +126,26 @@ def test_module2_at_t0_emits_speech_only_no_interjection(fixture_dataset_root: P
 def test_module2_mid_episode_emits_paired_interjection_and_speech(
     fixture_dataset_root: Path, tmp_path: Path
 ) -> None:
+    """Module 2 anchors interjections on Module 1's subtask boundaries.
+
+    The executor runs Module 1 first, then Module 2 reads the subtask
+    rows back from the same staging tree (see
+    ``_mid_episode_interjections``). Reproduce that contract here by
+    seeding the staging with two subtask rows so a single ``0 → 1``
+    boundary exists for Module 2 to anchor on.
+    """
     vlm = make_canned_responder(
         {
             "acknowledgement the robot": {"text": "OK."},
-            "ONE realistic interruption": {
-                "interjection": "actually skip the dishes",
-                "speech": "Skipping the dishes.",
+            # Marker matches the distinctive line of
+            # ``module_2_interjection.txt``. The old marker
+            # ("ONE realistic interruption") came from a previous prompt
+            # version that asked for counterfactual interjections; the
+            # current design anchors on subtask boundaries instead, so
+            # the prompt and its marker changed.
+            "Write ONE interjection": {
+                "interjection": "now wipe the counter please",
+                "speech": "On it.",
             },
         },
     )
@@ -142,6 +156,29 @@ def test_module2_mid_episode_emits_paired_interjection_and_speech(
     )
     record = next(iter_episodes(fixture_dataset_root))
     staging = EpisodeStaging(tmp_path / "stage", record.episode_index)
+    # Seed Module 1's subtask staging so Module 2 has a boundary to
+    # anchor on (it bails with zero rows when no spans exist — the
+    # production executor guarantees Module 1 ran first).
+    boundary_ts = float(record.frame_timestamps[len(record.frame_timestamps) // 2])
+    staging.write(
+        "module_1",
+        [
+            {
+                "role": "assistant",
+                "content": "grasp the sponge",
+                "style": "subtask",
+                "timestamp": float(record.frame_timestamps[0]),
+                "tool_calls": None,
+            },
+            {
+                "role": "assistant",
+                "content": "wipe the counter",
+                "style": "subtask",
+                "timestamp": boundary_ts,
+                "tool_calls": None,
+            },
+        ],
+    )
     module.run_episode(record, staging)
     rows = staging.read("module_2")
 
@@ -163,9 +200,7 @@ def test_module3_vqa_unique_per_frame_and_camera(single_episode_root: Path, tmp_
         vlm=vlm,
         config=Module3Config(vqa_emission_hz=1.0, K=3),
         seed=1,
-        frame_provider=_StubFrameProvider(
-            cameras=("observation.images.top", "observation.images.wrist")
-        ),
+        frame_provider=_StubFrameProvider(cameras=("observation.images.top", "observation.images.wrist")),
     )
     record = next(iter_episodes(single_episode_root))
     staging = EpisodeStaging(tmp_path / "stage", record.episode_index)
@@ -176,13 +211,9 @@ def test_module3_vqa_unique_per_frame_and_camera(single_episode_root: Path, tmp_
         assert r["style"] == "vqa"
         assert r.get("camera") in {"observation.images.top", "observation.images.wrist"}
     # at most one (vqa, user) and one (vqa, assistant) per (timestamp, camera)
-    user_keys = [
-        (r["timestamp"], r["camera"]) for r in rows if r["role"] == "user" and r["style"] == "vqa"
-    ]
+    user_keys = [(r["timestamp"], r["camera"]) for r in rows if r["role"] == "user" and r["style"] == "vqa"]
     assistant_keys = [
-        (r["timestamp"], r["camera"])
-        for r in rows
-        if r["role"] == "assistant" and r["style"] == "vqa"
+        (r["timestamp"], r["camera"]) for r in rows if r["role"] == "assistant" and r["style"] == "vqa"
     ]
     assert len(user_keys) == len(set(user_keys))
     assert len(assistant_keys) == len(set(assistant_keys))
@@ -222,17 +253,32 @@ def test_module1_attaches_video_block_to_subtask_prompt(fixture_dataset_root: Pa
     provider = _StubFrameProvider()
     module = PlanSubtasksMemoryModule(
         vlm=StubVlmClient(responder=responder),
-        config=Module1Config(max_video_frames=5, frames_per_second=10.0),
+        # Disable the rephrasings sub-prompt so the test's only video-bearing
+        # call is the subtask one — keeps the assertions below focused on
+        # ``_generate_subtasks`` rather than fighting the order of unrelated
+        # text-only Module-1 sub-prompts.
+        config=Module1Config(max_video_frames=5, frames_per_second=10.0, n_task_rephrasings=0),
         frame_provider=provider,
     )
     record = next(iter_episodes(fixture_dataset_root))
     staging = EpisodeStaging(tmp_path / "stage", record.episode_index)
     module.run_episode(record, staging)
 
-    # the subtask call (the first VLM call) must carry exactly one video block
+    # Find the call carrying the subtask prompt rather than blindly taking
+    # captured[0] — Module 1 issues several sub-prompts and their order is
+    # not part of the contract.
     assert captured, "no VLM calls made"
-    first_call = captured[0]
-    content = first_call[0]["content"]
+
+    def _prompt_text(messages):
+        for m in messages:
+            for block in m.get("content", []):
+                if isinstance(block, dict) and block.get("type") == "text":
+                    return block.get("text", "")
+        return ""
+
+    subtask_calls = [m for m in captured if "atomic subtasks" in _prompt_text(m)]
+    assert len(subtask_calls) == 1, "expected exactly one subtask-prompt VLM call"
+    content = subtask_calls[0][0]["content"]
     video_blocks = [b for b in content if isinstance(b, dict) and b.get("type") == "video"]
     image_blocks = [b for b in content if isinstance(b, dict) and b.get("type") == "image"]
     text_blocks = [b for b in content if isinstance(b, dict) and b.get("type") == "text"]