From 83cb91d05cb721d6bed9a0f6b0cc7b47eb52566f Mon Sep 17 00:00:00 2001
From: JK <jk@chequer.io>
Date: Thu, 12 Feb 2026 21:18:48 +0900
Subject: [PATCH] =?UTF-8?q?feat(reverse-sync):=20Sidecar=20mapping=20looku?=
 =?UTF-8?q?p=20=EB=AA=A8=EB=93=88=20=EB=B0=8F=20=EC=9C=A0=EB=8B=9B=20?=
 =?UTF-8?q?=ED=85=8C=EC=8A=A4=ED=8A=B8=20=EC=B6=94=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Description
- `bin/reverse_sync/sidecar_lookup.py` 모듈 신규 추가
  - `mapping.yaml` 파일 로드 (`load_sidecar_mapping`)
  - MDX block index → SidecarEntry 역인덱스 구축 (`build_mdx_to_sidecar_index`)
  - xhtml_xpath → BlockMapping 인덱스 구축 (`build_xpath_to_mapping`)
  - 2-hop 조회: MDX index → SidecarEntry → BlockMapping (`find_mapping_by_sidecar`)
  - XHTML + MDX로부터 mapping.yaml 생성 (`generate_sidecar_mapping`)
- `tests/test_sidecar_lookup.py` 유닛 테스트 38개 추가
  - 각 함수별 단위 테스트 + 실제 테스트 케이스 기반 통합 테스트
  - 2-hop 조회 전체 경로 검증

### Background
#682에서 forward converter가 `var/<page_id>/mapping.yaml` sidecar 파일을 생성하도록 구현 완료.
이 모듈은 해당 sidecar 파일을 로드하고 인덱스를 구축하여,
reverse-sync pipeline에서 O(1) 블록 매칭을 가능하게 하는 기반 모듈입니다.
실제 reverse-sync pipeline 적용은 후속 PR에서 진행합니다.

## Added/updated tests?
- [x] Yes — 38개 유닛 테스트 추가 (240/240 전체 통과)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../bin/reverse_sync/sidecar_lookup.py        | 319 +++++++++++
 confluence-mdx/tests/test_sidecar_lookup.py   | 533 ++++++++++++++++++
 2 files changed, 852 insertions(+)
 create mode 100644 confluence-mdx/bin/reverse_sync/sidecar_lookup.py
 create mode 100644 confluence-mdx/tests/test_sidecar_lookup.py

diff --git a/confluence-mdx/bin/reverse_sync/sidecar_lookup.py b/confluence-mdx/bin/reverse_sync/sidecar_lookup.py
new file mode 100644
index 000000000..8f2d0fa77
--- /dev/null
+++ b/confluence-mdx/bin/reverse_sync/sidecar_lookup.py
@@ -0,0 +1,319 @@
+"""Sidecar Mapping Lookup — mapping.yaml 로드 및 인덱스 구축."""
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Dict, List, Optional
+
+import yaml
+
+from reverse_sync.mapping_recorder import BlockMapping
+
+
+@dataclass
+class SidecarEntry:
+    xhtml_xpath: str
+    xhtml_type: str
+    mdx_blocks: List[int] = field(default_factory=list)
+
+
+def load_sidecar_mapping(mapping_path: str) -> List[SidecarEntry]:
+    """mapping.yaml 파일을 로드하여 SidecarEntry 목록을 반환한다."""
+    path = Path(mapping_path)
+    if not path.exists():
+        raise FileNotFoundError(
+            f"Sidecar mapping not found: {mapping_path}\n"
+            f"Forward converter를 실행하여 mapping.yaml을 생성하세요."
+        )
+    data = yaml.safe_load(path.read_text())
+    entries = []
+    for item in data.get('mappings', []):
+        entries.append(SidecarEntry(
+            xhtml_xpath=item['xhtml_xpath'],
+            xhtml_type=item.get('xhtml_type', ''),
+            mdx_blocks=item.get('mdx_blocks', []),
+        ))
+    return entries
+
+
+def build_mdx_to_sidecar_index(
+    entries: List[SidecarEntry],
+) -> Dict[int, SidecarEntry]:
+    """MDX 블록 인덱스 → SidecarEntry 역인덱스를 구축한다."""
+    index: Dict[int, SidecarEntry] = {}
+    for entry in entries:
+        for mdx_idx in entry.mdx_blocks:
+            index[mdx_idx] = entry
+    return index
+
+
+def build_xpath_to_mapping(
+    mappings: List[BlockMapping],
+) -> Dict[str, BlockMapping]:
+    """xhtml_xpath → BlockMapping 인덱스를 구축한다."""
+    index: Dict[str, BlockMapping] = {}
+    for m in mappings:
+        index[m.xhtml_xpath] = m
+    return index
+
+
+def generate_sidecar_mapping(
+    xhtml: str,
+    mdx: str,
+    page_id: str = '',
+) -> str:
+    """XHTML + MDX로부터 mapping.yaml 내용을 생성한다.
+
+    Forward converter의 sidecar 생성 로직을 재현한다.
+    record_mapping()과 parse_mdx_blocks()를 조합하여 텍스트 기반 매칭을 수행한다.
+
+    순서 + 텍스트 매칭:
+      각 XHTML 매핑에 대해 현재 MDX 포인터부터 앞으로 탐색하여
+      정규화된 텍스트가 일치하는 MDX 블록을 찾는다.
+      일치하지 않는 XHTML 블록(image, toc, empty paragraph 등)은
+      빈 mdx_blocks로 기록한다.
+    """
+    from reverse_sync.mapping_recorder import record_mapping
+    from reverse_sync.mdx_block_parser import parse_mdx_blocks
+    from reverse_sync.text_normalizer import normalize_mdx_to_plain, collapse_ws
+
+    xhtml_mappings = record_mapping(xhtml)
+    mdx_blocks = parse_mdx_blocks(mdx)
+
+    # 콘텐츠 블록만 필터 (frontmatter, empty, import 제외)
+    NON_CONTENT = frozenset(('empty', 'frontmatter', 'import_statement'))
+
+    entries = []
+    mdx_content_indices = [
+        i for i, b in enumerate(mdx_blocks)
+        if b.type not in NON_CONTENT
+    ]
+
+    # MDX 콘텐츠 블록별 정규화 텍스트를 미리 계산
+    mdx_plains = {}
+    for ci in mdx_content_indices:
+        b = mdx_blocks[ci]
+        mdx_plains[ci] = collapse_ws(normalize_mdx_to_plain(b.content, b.type))
+
+    # child mapping은 별도 처리 (parent xpath에 포함)
+    child_ids = set()
+    for m in xhtml_mappings:
+        for cid in m.children:
+            child_ids.add(cid)
+
+    # top-level mapping만 매칭 대상
+    top_mappings = [m for m in xhtml_mappings if m.block_id not in child_ids]
+    mdx_ptr = 0  # MDX 콘텐츠 인덱스 포인터
+
+    # 텍스트가 비어있거나 의미 없는 XHTML 매핑 타입
+    SKIP_TYPES = frozenset(('html_block',))
+    # XHTML 매핑 중 MDX 대응이 없는 것들 (image, toc 매크로 등)
+    NO_MDX_XPATHS = frozenset()  # 동적 판단으로 처리
+
+    LOOKAHEAD = 5  # 최대 앞으로 탐색할 MDX 블록 수
+
+    for xm in top_mappings:
+        xhtml_plain = collapse_ws(xm.xhtml_plain_text)
+
+        # 빈 텍스트 XHTML 블록은 MDX 대응 없음
+        if not xhtml_plain:
+            entries.append({
+                'xhtml_xpath': xm.xhtml_xpath,
+                'xhtml_type': xm.type,
+                'mdx_blocks': [],
+            })
+            continue
+
+        if mdx_ptr >= len(mdx_content_indices):
+            entries.append({
+                'xhtml_xpath': xm.xhtml_xpath,
+                'xhtml_type': xm.type,
+                'mdx_blocks': [],
+            })
+            continue
+
+        # 현재 MDX 블록과 텍스트 비교
+        matched_at = _find_text_match(
+            xhtml_plain, mdx_content_indices, mdx_plains, mdx_ptr, LOOKAHEAD)
+
+        if matched_at is not None:
+            # 매치 위치까지 MDX 포인터 이동
+            mdx_ptr = matched_at
+            mdx_idx = mdx_content_indices[mdx_ptr]
+            matched_indices = [mdx_idx]
+            mdx_ptr += 1
+
+            # children이 있으면 후속 MDX 블록도 이 XHTML 매핑에 대응
+            # 단, 다음 top-level XHTML 매핑의 텍스트와 겹치지 않는 범위에서만
+            if xm.children:
+                num_children = _count_child_mdx_blocks(
+                    xm, xhtml_mappings, child_ids,
+                    mdx_content_indices, mdx_blocks, mdx_plains,
+                    mdx_ptr, top_mappings,
+                    normalize_mdx_to_plain, collapse_ws,
+                )
+                for _ in range(num_children):
+                    if mdx_ptr < len(mdx_content_indices):
+                        matched_indices.append(mdx_content_indices[mdx_ptr])
+                        mdx_ptr += 1
+
+            entries.append({
+                'xhtml_xpath': xm.xhtml_xpath,
+                'xhtml_type': xm.type,
+                'mdx_blocks': matched_indices,
+            })
+        else:
+            # 텍스트 매치 실패 — MDX 대응 없음 (image, toc 등)
+            entries.append({
+                'xhtml_xpath': xm.xhtml_xpath,
+                'xhtml_type': xm.type,
+                'mdx_blocks': [],
+            })
+
+    mapping_data = {
+        'version': 1,
+        'source_page_id': page_id,
+        'mdx_file': 'page.mdx',
+        'mappings': entries,
+    }
+    return yaml.dump(mapping_data, allow_unicode=True, default_flow_style=False)
+
+
+def _count_child_mdx_blocks(
+    xm,
+    xhtml_mappings,
+    child_ids,
+    mdx_content_indices,
+    mdx_blocks,
+    mdx_plains,
+    mdx_ptr,
+    top_mappings,
+    normalize_mdx_to_plain,
+    collapse_ws,
+) -> int:
+    """children이 있는 XHTML 매핑에 대응하는 MDX 블록 수를 결정한다.
+
+    다음 비빈 top-level XHTML 매핑의 텍스트와 겹치지 않는 범위에서
+    후속 MDX 블록을 소비한다.
+    """
+    # 현재 XHTML 매핑 이후의 top-level 매핑들을 찾아
+    # 그 중 첫 번째로 유의미한 텍스트를 가진 것의 시그니처를 구한다.
+    current_idx = None
+    for i, tm in enumerate(top_mappings):
+        if tm is xm:
+            current_idx = i
+            break
+    if current_idx is None:
+        return len(xm.children)
+
+    # 다음 매핑들의 텍스트 시그니처 수집
+    next_sigs = []
+    for tm in top_mappings[current_idx + 1:]:
+        sig = _strip_all_ws(collapse_ws(tm.xhtml_plain_text))
+        if sig:
+            next_sigs.append(sig)
+        if len(next_sigs) >= 3:
+            break
+
+    if not next_sigs:
+        return len(xm.children)
+
+    # mdx_ptr부터 앞으로 스캔하면서
+    # 다음 top-level 매핑의 텍스트와 일치하는 MDX 블록이 나오면 중단
+    count = 0
+    max_scan = len(xm.children) + 5  # 약간의 여유
+    for offset in range(max_scan):
+        ptr = mdx_ptr + offset
+        if ptr >= len(mdx_content_indices):
+            break
+        mdx_idx = mdx_content_indices[ptr]
+        mdx_sig = _strip_all_ws(mdx_plains[mdx_idx])
+        if not mdx_sig:
+            count += 1
+            continue
+
+        # 다음 top-level 매핑과 일치하면 중단
+        hit = False
+        for ns in next_sigs:
+            if mdx_sig == ns:
+                hit = True
+                break
+            if len(ns) >= 10 and ns[:50] in mdx_sig:
+                hit = True
+                break
+            if len(mdx_sig) >= 10 and mdx_sig[:50] in ns:
+                hit = True
+                break
+        if hit:
+            break
+        count += 1
+
+    return count
+
+
+def _strip_all_ws(text: str) -> str:
+    """모든 공백 문자를 제거한다. 텍스트 서명 비교용."""
+    return ''.join(text.split())
+
+
+def _find_text_match(
+    xhtml_plain: str,
+    mdx_content_indices: List[int],
+    mdx_plains: Dict[int, str],
+    start_ptr: int,
+    lookahead: int,
+) -> Optional[int]:
+    """XHTML plain text와 일치하는 MDX 블록을 전방 탐색한다.
+
+    start_ptr부터 최대 lookahead 범위 내에서 텍스트가 일치하는
+    MDX 콘텐츠 블록의 포인터 위치를 반환한다.
+    일치하는 블록이 없으면 None을 반환한다.
+
+    매칭 전략:
+      1. 완전 일치 (collapse_ws 후 동일)
+      2. 공백 무시 완전 일치 (모든 공백 제거 후 동일)
+      3. 공백 무시 prefix 포함 (모든 공백 제거 후 앞 50자가 포함)
+    """
+    end_ptr = min(start_ptr + lookahead, len(mdx_content_indices))
+    xhtml_sig = _strip_all_ws(xhtml_plain)
+
+    # 1차: collapse_ws 후 완전 일치
+    for ptr in range(start_ptr, end_ptr):
+        mdx_idx = mdx_content_indices[ptr]
+        if xhtml_plain == mdx_plains[mdx_idx]:
+            return ptr
+
+    # 2차: 공백 무시 완전 일치
+    for ptr in range(start_ptr, end_ptr):
+        mdx_idx = mdx_content_indices[ptr]
+        mdx_sig = _strip_all_ws(mdx_plains[mdx_idx])
+        if xhtml_sig == mdx_sig:
+            return ptr
+
+    # 3차: 공백 무시 prefix 포함
+    if len(xhtml_sig) >= 10:
+        prefix = xhtml_sig[:50]
+        for ptr in range(start_ptr, end_ptr):
+            mdx_idx = mdx_content_indices[ptr]
+            mdx_sig = _strip_all_ws(mdx_plains[mdx_idx])
+            if not mdx_sig:
+                continue
+            if prefix in mdx_sig or mdx_sig[:50] in xhtml_sig:
+                return ptr
+
+    return None
+
+
+def find_mapping_by_sidecar(
+    mdx_block_index: int,
+    mdx_to_sidecar: Dict[int, SidecarEntry],
+    xpath_to_mapping: Dict[str, BlockMapping],
+) -> Optional[BlockMapping]:
+    """MDX 블록 인덱스로부터 sidecar를 거쳐 BlockMapping을 찾는다.
+
+    BlockChange.index (MDX 블록 인덱스)
+      → SidecarEntry (xhtml_xpath)
+      → BlockMapping
+    """
+    entry = mdx_to_sidecar.get(mdx_block_index)
+    if entry is None:
+        return None
+    return xpath_to_mapping.get(entry.xhtml_xpath)
diff --git a/confluence-mdx/tests/test_sidecar_lookup.py b/confluence-mdx/tests/test_sidecar_lookup.py
new file mode 100644
index 000000000..74596da8a
--- /dev/null
+++ b/confluence-mdx/tests/test_sidecar_lookup.py
@@ -0,0 +1,533 @@
+"""Sidecar Mapping Lookup 모듈 유닛 테스트.
+
+sidecar_lookup.py의 핵심 기능을 검증한다:
+  - mapping.yaml 파일 로드 및 SidecarEntry 생성
+  - MDX block index → SidecarEntry 역인덱스 구축
+  - xhtml_xpath → BlockMapping 인덱스 구축
+  - 2-hop 조회: MDX index → SidecarEntry → BlockMapping
+  - XHTML + MDX로부터 mapping.yaml 생성 (generate_sidecar_mapping)
+  - 텍스트 매칭 내부 함수들 (_find_text_match, _strip_all_ws)
+"""
+import pytest
+import yaml
+from pathlib import Path
+from unittest.mock import patch
+
+from reverse_sync.sidecar_lookup import (
+    SidecarEntry,
+    load_sidecar_mapping,
+    build_mdx_to_sidecar_index,
+    build_xpath_to_mapping,
+    find_mapping_by_sidecar,
+    generate_sidecar_mapping,
+    _find_text_match,
+    _strip_all_ws,
+)
+from reverse_sync.mapping_recorder import BlockMapping
+
+
+# ── SidecarEntry ──────────────────────────────────────────────
+
+class TestSidecarEntry:
+    def test_basic_creation(self):
+        entry = SidecarEntry(
+            xhtml_xpath='p[1]', xhtml_type='paragraph', mdx_blocks=[0, 1])
+        assert entry.xhtml_xpath == 'p[1]'
+        assert entry.xhtml_type == 'paragraph'
+        assert entry.mdx_blocks == [0, 1]
+
+    def test_default_mdx_blocks(self):
+        entry = SidecarEntry(xhtml_xpath='h2[1]', xhtml_type='heading')
+        assert entry.mdx_blocks == []
+
+
+# ── load_sidecar_mapping ──────────────────────────────────────
+
+class TestLoadSidecarMapping:
+    def test_load_valid_mapping(self, tmp_path):
+        mapping_data = {
+            'version': 1,
+            'source_page_id': '12345',
+            'mdx_file': 'page.mdx',
+            'mappings': [
+                {'xhtml_xpath': 'h2[1]', 'xhtml_type': 'heading', 'mdx_blocks': [2]},
+                {'xhtml_xpath': 'p[1]', 'xhtml_type': 'paragraph', 'mdx_blocks': [4, 5]},
+                {'xhtml_xpath': 'p[2]', 'xhtml_type': 'paragraph', 'mdx_blocks': []},
+            ]
+        }
+        mapping_file = tmp_path / 'mapping.yaml'
+        mapping_file.write_text(yaml.dump(mapping_data, allow_unicode=True))
+
+        entries = load_sidecar_mapping(str(mapping_file))
+        assert len(entries) == 3
+        assert entries[0].xhtml_xpath == 'h2[1]'
+        assert entries[0].xhtml_type == 'heading'
+        assert entries[0].mdx_blocks == [2]
+        assert entries[1].mdx_blocks == [4, 5]
+        assert entries[2].mdx_blocks == []
+
+    def test_file_not_found(self):
+        with pytest.raises(FileNotFoundError, match='Sidecar mapping not found'):
+            load_sidecar_mapping('/nonexistent/mapping.yaml')
+
+    def test_empty_mappings(self, tmp_path):
+        mapping_file = tmp_path / 'mapping.yaml'
+        mapping_file.write_text(yaml.dump({'version': 1, 'mappings': []}))
+        entries = load_sidecar_mapping(str(mapping_file))
+        assert entries == []
+
+    def test_missing_optional_fields(self, tmp_path):
+        mapping_data = {
+            'version': 1,
+            'mappings': [
+                {'xhtml_xpath': 'h2[1]'},  # xhtml_type, mdx_blocks 생략
+            ]
+        }
+        mapping_file = tmp_path / 'mapping.yaml'
+        mapping_file.write_text(yaml.dump(mapping_data))
+        entries = load_sidecar_mapping(str(mapping_file))
+        assert entries[0].xhtml_type == ''
+        assert entries[0].mdx_blocks == []
+
+
+# ── build_mdx_to_sidecar_index ────────────────────────────────
+
+class TestBuildMdxToSidecarIndex:
+    def test_basic_index(self):
+        e1 = SidecarEntry('h2[1]', 'heading', [2])
+        e2 = SidecarEntry('p[1]', 'paragraph', [4, 5])
+        index = build_mdx_to_sidecar_index([e1, e2])
+        assert index[2] is e1
+        assert index[4] is e2
+        assert index[5] is e2
+        assert 0 not in index
+        assert 3 not in index
+
+    def test_empty_mdx_blocks(self):
+        e = SidecarEntry('p[2]', 'paragraph', [])
+        index = build_mdx_to_sidecar_index([e])
+        assert len(index) == 0
+
+    def test_multiple_entries_single_blocks(self):
+        entries = [
+            SidecarEntry('h2[1]', 'heading', [0]),
+            SidecarEntry('p[1]', 'paragraph', [2]),
+            SidecarEntry('p[2]', 'paragraph', [4]),
+        ]
+        index = build_mdx_to_sidecar_index(entries)
+        assert len(index) == 3
+        assert index[0].xhtml_xpath == 'h2[1]'
+        assert index[2].xhtml_xpath == 'p[1]'
+        assert index[4].xhtml_xpath == 'p[2]'
+
+
+# ── build_xpath_to_mapping ────────────────────────────────────
+
+def _make_mapping(block_id, xpath, plain_text='', type_='paragraph', children=None):
+    return BlockMapping(
+        block_id=block_id,
+        type=type_,
+        xhtml_xpath=xpath,
+        xhtml_text='',
+        xhtml_plain_text=plain_text,
+        xhtml_element_index=0,
+        children=children or [],
+    )
+
+
+class TestBuildXpathToMapping:
+    def test_basic_index(self):
+        m1 = _make_mapping('heading-1', 'h2[1]')
+        m2 = _make_mapping('paragraph-1', 'p[1]')
+        index = build_xpath_to_mapping([m1, m2])
+        assert index['h2[1]'] is m1
+        assert index['p[1]'] is m2
+        assert 'p[2]' not in index
+
+    def test_empty_mappings(self):
+        index = build_xpath_to_mapping([])
+        assert len(index) == 0
+
+
+# ── find_mapping_by_sidecar ───────────────────────────────────
+
+class TestFindMappingBySidecar:
+    def setup_method(self):
+        self.m1 = _make_mapping('heading-1', 'h2[1]', 'Overview')
+        self.m2 = _make_mapping('paragraph-1', 'p[1]', 'Some content')
+        self.xpath_index = build_xpath_to_mapping([self.m1, self.m2])
+
+        e1 = SidecarEntry('h2[1]', 'heading', [2])
+        e2 = SidecarEntry('p[1]', 'paragraph', [5])
+        self.sidecar_index = build_mdx_to_sidecar_index([e1, e2])
+
+    def test_found_via_sidecar(self):
+        result = find_mapping_by_sidecar(2, self.sidecar_index, self.xpath_index)
+        assert result is self.m1
+
+        result = find_mapping_by_sidecar(5, self.sidecar_index, self.xpath_index)
+        assert result is self.m2
+
+    def test_mdx_index_not_in_sidecar(self):
+        result = find_mapping_by_sidecar(99, self.sidecar_index, self.xpath_index)
+        assert result is None
+
+    def test_xpath_not_in_mapping_index(self):
+        """sidecar에는 있지만 xpath_to_mapping에는 없는 경우."""
+        e3 = SidecarEntry('p[99]', 'paragraph', [10])
+        sidecar_index = build_mdx_to_sidecar_index([e3])
+        result = find_mapping_by_sidecar(10, sidecar_index, self.xpath_index)
+        assert result is None
+
+
+# ── _strip_all_ws ─────────────────────────────────────────────
+
+class TestStripAllWs:
+    def test_basic(self):
+        assert _strip_all_ws('hello world') == 'helloworld'
+
+    def test_tabs_and_newlines(self):
+        assert _strip_all_ws('a\tb\nc d') == 'abcd'
+
+    def test_empty(self):
+        assert _strip_all_ws('') == ''
+
+    def test_only_whitespace(self):
+        assert _strip_all_ws('   \t\n  ') == ''
+
+
+# ── _find_text_match ──────────────────────────────────────────
+
+class TestFindTextMatch:
+    def test_exact_match_at_start(self):
+        """1차: collapse_ws 후 완전 일치."""
+        indices = [0, 1, 2]
+        plains = {0: 'Hello World', 1: 'Foo Bar', 2: 'Baz'}
+        result = _find_text_match('Hello World', indices, plains, 0, 5)
+        assert result == 0
+
+    def test_exact_match_at_offset(self):
+        indices = [0, 1, 2]
+        plains = {0: 'AAA', 1: 'BBB', 2: 'CCC'}
+        result = _find_text_match('BBB', indices, plains, 0, 5)
+        assert result == 1
+
+    def test_whitespace_insensitive_match(self):
+        """2차: 공백 무시 완전 일치."""
+        indices = [0, 1]
+        plains = {0: 'Hello  World', 1: 'Foo'}
+        # xhtml_plain 'HelloWorld' vs mdx 'Hello  World' → strip_all_ws 비교
+        result = _find_text_match('Hello World', indices, plains, 0, 5)
+        # 1차에서 실패하지만 2차 공백무시에서 매칭
+        assert result is not None
+
+    def test_prefix_match(self):
+        """3차: prefix 포함 매칭."""
+        indices = [0]
+        long_text = 'A' * 60
+        plains = {0: long_text + ' extra'}
+        # xhtml_plain의 앞 50자가 mdx에 포함
+        result = _find_text_match(long_text, indices, plains, 0, 5)
+        assert result is not None
+
+    def test_no_match(self):
+        indices = [0, 1]
+        plains = {0: 'AAA', 1: 'BBB'}
+        result = _find_text_match('CCC', indices, plains, 0, 5)
+        assert result is None
+
+    def test_start_ptr_skips_earlier(self):
+        """start_ptr 이전의 블록은 검색하지 않는다."""
+        indices = [0, 1, 2]
+        plains = {0: 'Target', 1: 'Other', 2: 'More'}
+        result = _find_text_match('Target', indices, plains, 1, 5)
+        assert result is None  # index 0은 검색 범위 밖
+
+    def test_lookahead_limit(self):
+        """lookahead 범위를 초과하면 매칭하지 않는다."""
+        indices = [0, 1, 2, 3, 4, 5]
+        plains = {i: f'block-{i}' for i in range(6)}
+        result = _find_text_match('block-5', indices, plains, 0, 3)
+        assert result is None  # lookahead=3이므로 index 0,1,2만 검색
+
+    def test_short_text_no_prefix_match(self):
+        """10자 미만의 짧은 텍스트는 prefix 매칭을 시도하지 않는다."""
+        indices = [0]
+        plains = {0: 'AB extra'}
+        result = _find_text_match('AB', indices, plains, 0, 5)
+        assert result is None
+
+
+# ── generate_sidecar_mapping ──────────────────────────────────
+
+class TestGenerateSidecarMapping:
+    """XHTML + MDX로부터 mapping.yaml을 생성하는 통합 테스트."""
+
+    def test_simple_heading_paragraph(self):
+        """heading + paragraph → 각각 MDX 블록에 매핑된다."""
+        xhtml = '<h2>Overview</h2><p>This is content.</p>'
+        mdx = (
+            '---\ntitle: Test\n---\n\n'
+            '## Overview\n\n'
+            'This is content.\n'
+        )
+        result = generate_sidecar_mapping(xhtml, mdx, '12345')
+        data = yaml.safe_load(result)
+
+        assert data['version'] == 1
+        assert data['source_page_id'] == '12345'
+        assert len(data['mappings']) >= 2
+
+        # heading과 paragraph 모두 비어있지 않은 mdx_blocks를 가져야 함
+        heading_entry = next(
+            e for e in data['mappings'] if e['xhtml_type'] == 'heading')
+        para_entry = next(
+            e for e in data['mappings'] if e['xhtml_type'] == 'paragraph')
+        assert len(heading_entry['mdx_blocks']) >= 1
+        assert len(para_entry['mdx_blocks']) >= 1
+
+    def test_empty_xhtml_block_gets_empty_mdx_blocks(self):
+        """이미지 등 텍스트가 없는 XHTML 블록은 빈 mdx_blocks를 받는다."""
+        xhtml = (
+            '<h2>Title</h2>'
+            '<ac:image><ri:attachment ri:filename="img.png"/></ac:image>'
+            '<p>Paragraph content.</p>'
+        )
+        mdx = (
+            '---\ntitle: Test\n---\n\n'
+            '## Title\n\n'
+            '![img](/images/img.png)\n\n'
+            'Paragraph content.\n'
+        )
+        result = generate_sidecar_mapping(xhtml, mdx)
+        data = yaml.safe_load(result)
+
+        # image 블록은 빈 텍스트이므로 빈 mdx_blocks
+        image_entries = [
+            e for e in data['mappings'] if e.get('mdx_blocks') == []]
+        assert len(image_entries) >= 1
+
+    def test_yaml_format_output(self):
+        """생성된 YAML이 올바른 형식인지 확인한다."""
+        xhtml = '<p>Hello World.</p>'
+        mdx = '---\ntitle: Test\n---\n\nHello World.\n'
+        result = generate_sidecar_mapping(xhtml, mdx, 'page-1')
+
+        # YAML 파싱 가능
+        data = yaml.safe_load(result)
+        assert isinstance(data, dict)
+        assert 'mappings' in data
+        assert isinstance(data['mappings'], list)
+
+    def test_page_id_in_output(self):
+        xhtml = '<p>Content.</p>'
+        mdx = '---\ntitle: Test\n---\n\nContent.\n'
+        result = generate_sidecar_mapping(xhtml, mdx, 'my-page-42')
+        data = yaml.safe_load(result)
+        assert data['source_page_id'] == 'my-page-42'
+
+    def test_multiple_paragraphs_sequential_matching(self):
+        """여러 paragraph가 순서대로 MDX 블록에 매칭된다."""
+        xhtml = '<p>First paragraph.</p><p>Second paragraph.</p><p>Third paragraph.</p>'
+        mdx = (
+            '---\ntitle: Test\n---\n\n'
+            'First paragraph.\n\n'
+            'Second paragraph.\n\n'
+            'Third paragraph.\n'
+        )
+        result = generate_sidecar_mapping(xhtml, mdx)
+        data = yaml.safe_load(result)
+
+        matched = [e for e in data['mappings'] if e['mdx_blocks']]
+        assert len(matched) == 3
+
+        # MDX 블록 인덱스가 순서대로 증가해야 함
+        all_indices = [e['mdx_blocks'][0] for e in matched]
+        assert all_indices == sorted(all_indices)
+
+    def test_callout_macro_with_children(self):
+        """Callout 매크로 (ac:structured-macro) → 컨테이너 + children 매핑."""
+        xhtml = (
+            '<ac:structured-macro ac:name="info">'
+            '<ac:rich-text-body>'
+            '<p>Info paragraph 1.</p>'
+            '<p>Info paragraph 2.</p>'
+            '</ac:rich-text-body>'
+            '</ac:structured-macro>'
+        )
+        mdx = (
+            '---\ntitle: Test\n---\n\n'
+            ':::info\n\n'
+            'Info paragraph 1.\n\n'
+            'Info paragraph 2.\n\n'
+            ':::\n'
+        )
+        result = generate_sidecar_mapping(xhtml, mdx)
+        data = yaml.safe_load(result)
+
+        # 컨테이너 매핑이 여러 MDX 블록을 포함해야 함
+        container_entries = [
+            e for e in data['mappings'] if len(e.get('mdx_blocks', [])) > 1
+        ]
+        assert len(container_entries) >= 1
+
+
+# ── 실제 테스트 케이스 기반 통합 테스트 ───────────────────────
+
+class TestGenerateSidecarMappingFromTestCases:
+    """tests/testcases/에 있는 실제 테스트 데이터로 검증한다."""
+
+    @pytest.fixture
+    def testcase_dir(self):
+        return Path(__file__).parent / 'testcases'
+
+    def _get_reverse_sync_test_ids(self, testcase_dir):
+        """reverse-sync 입력 파일이 있는 테스트 케이스 ID 목록."""
+        ids = []
+        if not testcase_dir.exists():
+            return ids
+        for d in sorted(testcase_dir.iterdir()):
+            if d.is_dir() and (d / 'original.mdx').exists() and (d / 'page.xhtml').exists():
+                ids.append(d.name)
+        return ids
+
+    def test_all_reverse_sync_cases_produce_valid_yaml(self, testcase_dir):
+        """모든 reverse-sync 테스트 케이스에서 유효한 mapping.yaml을 생성한다."""
+        test_ids = self._get_reverse_sync_test_ids(testcase_dir)
+        if not test_ids:
+            pytest.skip('No reverse-sync test cases found')
+
+        for test_id in test_ids:
+            case_dir = testcase_dir / test_id
+            xhtml = (case_dir / 'page.xhtml').read_text()
+            mdx = (case_dir / 'original.mdx').read_text()
+
+            result = generate_sidecar_mapping(xhtml, mdx, test_id)
+            data = yaml.safe_load(result)
+
+            assert data is not None, f'{test_id}: YAML 파싱 실패'
+            assert 'mappings' in data, f'{test_id}: mappings 키 누락'
+            assert isinstance(data['mappings'], list), f'{test_id}: mappings가 리스트가 아님'
+
+    def test_all_reverse_sync_cases_have_nonempty_mappings(self, testcase_dir):
+        """모든 reverse-sync 테스트 케이스에서 최소 1개의 매핑이 MDX 블록을 가진다."""
+        test_ids = self._get_reverse_sync_test_ids(testcase_dir)
+        if not test_ids:
+            pytest.skip('No reverse-sync test cases found')
+
+        for test_id in test_ids:
+            case_dir = testcase_dir / test_id
+            xhtml = (case_dir / 'page.xhtml').read_text()
+            mdx = (case_dir / 'original.mdx').read_text()
+
+            result = generate_sidecar_mapping(xhtml, mdx, test_id)
+            data = yaml.safe_load(result)
+            matched = [e for e in data['mappings'] if e.get('mdx_blocks')]
+            assert len(matched) >= 1, \
+                f'{test_id}: MDX 블록에 매핑된 엔트리가 없음 ({len(data["mappings"])}개 매핑 중)'
+
+    def test_mdx_block_indices_are_unique(self, testcase_dir):
+        """하나의 MDX 블록 인덱스가 중복 매핑되지 않는다."""
+        test_ids = self._get_reverse_sync_test_ids(testcase_dir)
+        if not test_ids:
+            pytest.skip('No reverse-sync test cases found')
+
+        for test_id in test_ids:
+            case_dir = testcase_dir / test_id
+            xhtml = (case_dir / 'page.xhtml').read_text()
+            mdx = (case_dir / 'original.mdx').read_text()
+
+            result = generate_sidecar_mapping(xhtml, mdx, test_id)
+            data = yaml.safe_load(result)
+
+            all_indices = []
+            for entry in data['mappings']:
+                all_indices.extend(entry.get('mdx_blocks', []))
+            assert len(all_indices) == len(set(all_indices)), \
+                f'{test_id}: MDX 블록 인덱스 중복 발견: {[i for i in all_indices if all_indices.count(i) > 1]}'
+
+    def test_mdx_block_indices_are_ascending(self, testcase_dir):
+        """MDX 블록 인덱스가 매핑 순서대로 증가한다."""
+        test_ids = self._get_reverse_sync_test_ids(testcase_dir)
+        if not test_ids:
+            pytest.skip('No reverse-sync test cases found')
+
+        for test_id in test_ids:
+            case_dir = testcase_dir / test_id
+            xhtml = (case_dir / 'page.xhtml').read_text()
+            mdx = (case_dir / 'original.mdx').read_text()
+
+            result = generate_sidecar_mapping(xhtml, mdx, test_id)
+            data = yaml.safe_load(result)
+
+            all_indices = []
+            for entry in data['mappings']:
+                all_indices.extend(entry.get('mdx_blocks', []))
+            assert all_indices == sorted(all_indices), \
+                f'{test_id}: MDX 블록 인덱스가 오름차순이 아님'
+
+
+# ── 2-hop 조회 통합 테스트 ────────────────────────────────────
+
+class TestSidecarTwoHopLookup:
+    """sidecar 파일 → 인덱스 구축 → 2-hop 조회 전체 경로 테스트."""
+
+    def test_full_pipeline(self, tmp_path):
+        """mapping.yaml 로드 → 인덱스 구축 → find_mapping_by_sidecar 전체 경로."""
+        # 1. mapping.yaml 생성
+        mapping_data = {
+            'version': 1,
+            'source_page_id': '12345',
+            'mappings': [
+                {'xhtml_xpath': 'h2[1]', 'xhtml_type': 'heading', 'mdx_blocks': [2]},
+                {'xhtml_xpath': 'p[1]', 'xhtml_type': 'paragraph', 'mdx_blocks': [4]},
+                {'xhtml_xpath': 'p[2]', 'xhtml_type': 'paragraph', 'mdx_blocks': [6]},
+            ]
+        }
+        mapping_file = tmp_path / 'mapping.yaml'
+        mapping_file.write_text(yaml.dump(mapping_data))
+
+        # 2. sidecar 로드 + 인덱스 구축
+        entries = load_sidecar_mapping(str(mapping_file))
+        mdx_to_sidecar = build_mdx_to_sidecar_index(entries)
+
+        # 3. BlockMapping 구축 (실제로는 record_mapping()이 생성)
+        m1 = _make_mapping('heading-1', 'h2[1]', 'Overview', 'heading')
+        m2 = _make_mapping('paragraph-1', 'p[1]', 'First paragraph.')
+        m3 = _make_mapping('paragraph-2', 'p[2]', 'Second paragraph.')
+        xpath_to_mapping = build_xpath_to_mapping([m1, m2, m3])
+
+        # 4. 2-hop 조회
+        assert find_mapping_by_sidecar(2, mdx_to_sidecar, xpath_to_mapping) is m1
+        assert find_mapping_by_sidecar(4, mdx_to_sidecar, xpath_to_mapping) is m2
+        assert find_mapping_by_sidecar(6, mdx_to_sidecar, xpath_to_mapping) is m3
+        assert find_mapping_by_sidecar(99, mdx_to_sidecar, xpath_to_mapping) is None
+
+    def test_container_with_multiple_mdx_blocks(self, tmp_path):
+        """컨테이너가 여러 MDX 블록에 매핑된 경우, 모든 MDX 블록이 같은 매핑으로 조회된다."""
+        mapping_data = {
+            'version': 1,
+            'mappings': [
+                {
+                    'xhtml_xpath': 'ac:structured-macro[1]',
+                    'xhtml_type': 'html_block',
+                    'mdx_blocks': [3, 5, 7, 9],
+                },
+            ]
+        }
+        mapping_file = tmp_path / 'mapping.yaml'
+        mapping_file.write_text(yaml.dump(mapping_data))
+
+        entries = load_sidecar_mapping(str(mapping_file))
+        mdx_to_sidecar = build_mdx_to_sidecar_index(entries)
+
+        container = _make_mapping(
+            'html_block-1', 'ac:structured-macro[1]',
+            'Container text', 'html_block',
+            children=['paragraph-10', 'paragraph-11'])
+        xpath_to_mapping = build_xpath_to_mapping([container])
+
+        # 모든 MDX 블록이 같은 컨테이너를 가리킴
+        for idx in [3, 5, 7, 9]:
+            result = find_mapping_by_sidecar(idx, mdx_to_sidecar, xpath_to_mapping)
+            assert result is container