#!/usr/bin/env python3
"""Export all Notion pages to local Markdown files."""

import json
import os
import re
import sys
import time
import urllib.request
import urllib.error

NOTION_TOKEN = os.environ.get("NOTION_TOKEN", "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")
NOTION_VERSION = "2022-06-28"
BASE_URL = "https://api.notion.com/v1"
OUTPUT_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "docs", "notion")

HEADERS = {
    "Authorization": f"Bearer {NOTION_TOKEN}",
    "Notion-Version": NOTION_VERSION,
    "Content-Type": "application/json",
}


def api_get(path, params=None):
    url = f"{BASE_URL}{path}"
    if params:
        qs = "&".join(f"{k}={v}" for k, v in params.items())
        url += f"?{qs}"
    req = urllib.request.Request(url, headers=HEADERS)
    for attempt in range(3):
        try:
            with urllib.request.urlopen(req) as resp:
                return json.loads(resp.read())
        except urllib.error.HTTPError as e:
            if e.code == 429:
                wait = int(e.headers.get("Retry-After", 2))
                print(f"  Rate limited, waiting {wait}s...")
                time.sleep(wait)
            else:
                print(f"  HTTP {e.code} for {path}: {e.read().decode()[:200]}")
                raise
    return None


def api_post(path, body):
    url = f"{BASE_URL}{path}"
    data = json.dumps(body).encode()
    req = urllib.request.Request(url, data=data, headers=HEADERS, method="POST")
    for attempt in range(3):
        try:
            with urllib.request.urlopen(req) as resp:
                return json.loads(resp.read())
        except urllib.error.HTTPError as e:
            if e.code == 429:
                wait = int(e.headers.get("Retry-After", 2))
                print(f"  Rate limited, waiting {wait}s...")
                time.sleep(wait)
            else:
                print(f"  HTTP {e.code} for {path}: {e.read().decode()[:200]}")
                raise
    return None


def get_all_blocks(block_id):
    """Recursively get all blocks under a page/block."""
    blocks = []
    cursor = None
    while True:
        params = {"page_size": "100"}
        if cursor:
            params["start_cursor"] = cursor
        data = api_get(f"/blocks/{block_id}/children", params)
        if not data:
            break
        for block in data.get("results", []):
            blocks.append(block)
            if block.get("has_children") and block["type"] not in ("child_page", "child_database"):
                block["_children"] = get_all_blocks(block["id"])
        if not data.get("has_more"):
            break
        cursor = data.get("next_cursor")
    return blocks


def rich_text_to_md(rich_texts):
    """Convert Notion rich_text array to markdown string."""
    parts = []
    for rt in rich_texts:
        text = rt.get("plain_text", "")
        ann = rt.get("annotations", {})
        href = rt.get("href")

        if ann.get("code"):
            text = f"`{text}`"
        if ann.get("bold"):
            text = f"**{text}**"
        if ann.get("italic"):
            text = f"*{text}*"
        if ann.get("strikethrough"):
            text = f"~~{text}~~"
        if ann.get("underline"):
            text = f"<u>{text}</u>"
        if href:
            text = f"[{text}]({href})"

        parts.append(text)
    return "".join(parts)


def blocks_to_md(blocks, indent=0):
    """Convert a list of Notion blocks to markdown."""
    lines = []
    prefix = "  " * indent
    numbered_counter = 0

    for block in blocks:
        btype = block["type"]
        data = block.get(btype, {})

        if btype == "paragraph":
            text = rich_text_to_md(data.get("rich_text", []))
            lines.append(f"{prefix}{text}")
            lines.append("")

        elif btype in ("heading_1", "heading_2", "heading_3"):
            level = int(btype[-1])
            text = rich_text_to_md(data.get("rich_text", []))
            lines.append(f"{'#' * level} {text}")
            lines.append("")

        elif btype == "bulleted_list_item":
            numbered_counter = 0
            text = rich_text_to_md(data.get("rich_text", []))
            lines.append(f"{prefix}- {text}")
            if block.get("_children"):
                child_md = blocks_to_md(block["_children"], indent + 1)
                lines.append(child_md)

        elif btype == "numbered_list_item":
            numbered_counter += 1
            text = rich_text_to_md(data.get("rich_text", []))
            lines.append(f"{prefix}{numbered_counter}. {text}")
            if block.get("_children"):
                child_md = blocks_to_md(block["_children"], indent + 1)
                lines.append(child_md)

        elif btype == "to_do":
            checked = "x" if data.get("checked") else " "
            text = rich_text_to_md(data.get("rich_text", []))
            lines.append(f"{prefix}- [{checked}] {text}")

        elif btype == "toggle":
            text = rich_text_to_md(data.get("rich_text", []))
            lines.append(f"{prefix}<details>")
            lines.append(f"{prefix}<summary>{text}</summary>")
            lines.append("")
            if block.get("_children"):
                lines.append(blocks_to_md(block["_children"], indent))
            lines.append(f"{prefix}</details>")
            lines.append("")

        elif btype == "code":
            text = rich_text_to_md(data.get("rich_text", []))
            lang = data.get("language", "")
            lines.append(f"{prefix}```{lang}")
            lines.append(text)
            lines.append(f"{prefix}```")
            lines.append("")

        elif btype == "quote":
            text = rich_text_to_md(data.get("rich_text", []))
            for line in text.split("\n"):
                lines.append(f"{prefix}> {line}")
            lines.append("")
            if block.get("_children"):
                child_lines = blocks_to_md(block["_children"], indent).split("\n")
                for cl in child_lines:
                    lines.append(f"> {cl}" if cl.strip() else ">")
                lines.append("")

        elif btype == "callout":
            icon = ""
            icon_data = data.get("icon", {})
            if icon_data and icon_data.get("type") == "emoji":
                icon = icon_data["emoji"] + " "
            text = rich_text_to_md(data.get("rich_text", []))
            lines.append(f"{prefix}> {icon}{text}")
            if block.get("_children"):
                child_lines = blocks_to_md(block["_children"], indent).split("\n")
                for cl in child_lines:
                    lines.append(f"{prefix}> {cl}" if cl.strip() else f"{prefix}>")
            lines.append("")

        elif btype == "divider":
            numbered_counter = 0
            lines.append(f"{prefix}---")
            lines.append("")

        elif btype == "image":
            caption = rich_text_to_md(data.get("caption", []))
            url = ""
            if data.get("type") == "external":
                url = data["external"]["url"]
            elif data.get("type") == "file":
                url = data["file"]["url"]
            if caption:
                lines.append(f"{prefix}![{caption}]({url})")
            else:
                lines.append(f"{prefix}![]({url})")
            lines.append("")

        elif btype == "bookmark":
            url = data.get("url", "")
            caption = rich_text_to_md(data.get("caption", []))
            if caption:
                lines.append(f"{prefix}[{caption}]({url})")
            else:
                lines.append(f"{prefix}<{url}>")
            lines.append("")

        elif btype == "embed":
            url = data.get("url", "")
            lines.append(f"{prefix}<{url}>")
            lines.append("")

        elif btype == "table":
            if block.get("_children"):
                rows = block["_children"]
                for i, row in enumerate(rows):
                    if row["type"] == "table_row":
                        cells = row["table_row"]["cells"]
                        cell_texts = [rich_text_to_md(cell) for cell in cells]
                        lines.append(f"{prefix}| " + " | ".join(cell_texts) + " |")
                        if i == 0 and data.get("has_column_header"):
                            lines.append(f"{prefix}| " + " | ".join(["---"] * len(cells)) + " |")
            lines.append("")

        elif btype == "column_list":
            if block.get("_children"):
                for col_block in block["_children"]:
                    if col_block.get("_children"):
                        lines.append(blocks_to_md(col_block["_children"], indent))
            lines.append("")

        elif btype == "child_page":
            title = data.get("title", "")
            lines.append(f"{prefix}> [子页面: {title}]")
            lines.append("")

        elif btype == "child_database":
            title = data.get("title", "")
            lines.append(f"{prefix}> [子数据库: {title}]")
            lines.append("")

        elif btype == "link_preview":
            url = data.get("url", "")
            lines.append(f"{prefix}<{url}>")
            lines.append("")

        elif btype == "equation":
            expr = data.get("expression", "")
            lines.append(f"{prefix}$$")
            lines.append(f"{prefix}{expr}")
            lines.append(f"{prefix}$$")
            lines.append("")

        elif btype == "table_of_contents":
            lines.append(f"{prefix}[TOC]")
            lines.append("")

        elif btype in ("breadcrumb", "unsupported"):
            pass

        else:
            # fallback: try to extract rich_text
            if "rich_text" in data:
                text = rich_text_to_md(data["rich_text"])
                lines.append(f"{prefix}{text}")
                lines.append("")
            else:
                lines.append(f"{prefix}<!-- unsupported block type: {btype} -->")
                lines.append("")

        # Reset numbered counter for non-numbered items
        if btype != "numbered_list_item":
            numbered_counter = 0

    return "\n".join(lines)


def sanitize_filename(title):
    """Convert a title to a safe filename."""
    # Remove emojis and special chars, keep Chinese chars
    name = re.sub(r'[^\w\u4e00-\u9fff\s-]', '', title)
    name = re.sub(r'\s+', '-', name.strip())
    return name[:80] if name else "untitled"


def search_pages():
    """Search for all pages in the workspace."""
    pages = []
    cursor = None
    while True:
        body = {"filter": {"property": "object", "value": "page"}, "page_size": 100}
        if cursor:
            body["start_cursor"] = cursor
        data = api_post("/search", body)
        if not data:
            break
        pages.extend(data.get("results", []))
        if not data.get("has_more"):
            break
        cursor = data.get("next_cursor")
    return pages


def get_page_title(page):
    props = page.get("properties", {})
    title_prop = props.get("title", {})
    title_arr = title_prop.get("title", [])
    return rich_text_to_md(title_arr) if title_arr else "Untitled"


def export_page(page_id, title, output_path):
    """Export a single page to markdown."""
    print(f"  Fetching blocks for: {title}")
    blocks = get_all_blocks(page_id)
    print(f"  Got {len(blocks)} top-level blocks")

    md_content = f"# {title}\n\n"
    md_content += blocks_to_md(blocks)

    # Clean up excessive blank lines
    md_content = re.sub(r'\n{3,}', '\n\n', md_content)

    with open(output_path, "w", encoding="utf-8") as f:
        f.write(md_content)
    print(f"  Saved: {output_path}")


def main():
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    print("Searching for pages in Notion workspace...")
    pages = search_pages()
    print(f"Found {len(pages)} pages\n")

    # Build parent-child tree
    page_map = {}
    for p in pages:
        pid = p["id"]
        title = get_page_title(p)
        parent = p.get("parent", {})
        page_map[pid] = {
            "title": title,
            "parent_type": parent.get("type"),
            "parent_id": parent.get("page_id", parent.get("database_id", "")),
            "page": p,
        }

    # Find the root tutorial page (parent is workspace)
    root_pages = [pid for pid, info in page_map.items() if info["parent_type"] == "workspace"]

    # Export all pages with hierarchy
    exported = 0
    for page in pages:
        pid = page["id"]
        title = get_page_title(page)
        parent = page.get("parent", {})

        # Determine filename based on hierarchy
        if parent.get("type") == "workspace":
            # Root level page - use as directory marker
            filename = f"00-{sanitize_filename(title)}.md"
        else:
            # Chapter or sub-page
            # Try to extract chapter number
            ch_match = re.search(r'第\s*(\d+)\s*章', title) or re.search(r'第([一二三四五六七八九十]+)章', title)
            if ch_match:
                ch_num = ch_match.group(1)
                # Convert Chinese numbers
                cn_map = {"一": "1", "二": "2", "三": "3", "四": "4", "五": "5",
                          "六": "6", "七": "7", "八": "8", "九": "9", "十": "10"}
                if ch_num in cn_map:
                    ch_num = cn_map[ch_num]
                filename = f"{ch_num.zfill(2)}-{sanitize_filename(title)}.md"
            elif "References" in title or "调研" in title:
                filename = f"99-{sanitize_filename(title)}.md"
            elif "指南" in title or "部署" in title:
                filename = f"00-{sanitize_filename(title)}.md"
            else:
                filename = f"{sanitize_filename(title)}.md"

        output_path = os.path.join(OUTPUT_DIR, filename)
        print(f"[{exported + 1}/{len(pages)}] {title}")
        export_page(pid, title, output_path)
        exported += 1
        print()

    print(f"\nDone! Exported {exported} pages to {OUTPUT_DIR}")


if __name__ == "__main__":
    main()
