#!/usr/bin/env python3
"""
TurboStarter Documentation Chunker

Downloads llms.txt from TurboStarter and splits it into organized markdown files.
Creates an index.md with navigation and a CLAUDE.md context file.

Usage:
    python refresh-docs.py

Or make executable:
    chmod +x refresh-docs.py
    ./refresh-docs.py
"""

import os
import re
import urllib.request
from pathlib import Path
from datetime import datetime
from collections import defaultdict

# Configuration
LLMS_URL = "https://www.turbostarter.dev/llms.txt"
DOCS_DIR = Path(__file__).parent
OUTPUT_DIR = DOCS_DIR / "sections"

def download_llms_txt():
    """Download the latest llms.txt from TurboStarter."""
    print(f"Downloading from {LLMS_URL}...")
    with urllib.request.urlopen(LLMS_URL) as response:
        content = response.read().decode('utf-8')

    # Save full file
    full_path = DOCS_DIR / "llms-full.txt"
    full_path.write_text(content)
    print(f"Saved full file to {full_path} ({len(content)} bytes)")
    return content

def parse_frontmatter(text):
    """Extract YAML frontmatter from a document section."""
    match = re.match(r'^---\s*\n(.*?)\n---\s*\n', text, re.DOTALL)
    if not match:
        return None, text

    frontmatter = {}
    for line in match.group(1).strip().split('\n'):
        if ':' in line:
            key, value = line.split(':', 1)
            frontmatter[key.strip()] = value.strip()

    content = text[match.end():]
    return frontmatter, content

def url_to_path(url):
    """Convert URL path to filesystem path."""
    # /docs/web/database -> web/database
    path = url.lstrip('/')
    if path.startswith('docs/'):
        path = path[5:]
    return path

def chunk_docs(content):
    """Split llms.txt into individual documents."""
    # Split by document separator (--- at start of line followed by url:)
    sections = re.split(r'\n(?=---\s*\nurl:)', content)

    docs = []
    for section in sections:
        section = section.strip()
        if not section:
            continue

        frontmatter, body = parse_frontmatter(section)
        if frontmatter and 'url' in frontmatter:
            docs.append({
                'url': frontmatter.get('url', ''),
                'title': frontmatter.get('title', 'Untitled'),
                'description': frontmatter.get('description', ''),
                'content': body.strip(),
                'path': url_to_path(frontmatter.get('url', ''))
            })

    return docs

def organize_by_category(docs):
    """Group documents by their top-level category."""
    categories = defaultdict(list)
    for doc in docs:
        parts = doc['path'].split('/')
        if parts:
            category = parts[0]  # web, mobile, extension, etc.
            categories[category].append(doc)
    return dict(categories)

def save_docs(docs):
    """Save chunked documents to filesystem."""
    # Clean output directory
    if OUTPUT_DIR.exists():
        import shutil
        shutil.rmtree(OUTPUT_DIR)
    OUTPUT_DIR.mkdir(parents=True)

    # Group by category
    categories = organize_by_category(docs)

    saved_files = []
    for category, category_docs in categories.items():
        category_dir = OUTPUT_DIR / category
        category_dir.mkdir(parents=True, exist_ok=True)

        for doc in category_docs:
            # Create subdirectories if needed
            path_parts = doc['path'].split('/')
            if len(path_parts) > 1:
                subdir = category_dir / '/'.join(path_parts[1:-1]) if len(path_parts) > 2 else category_dir
                subdir.mkdir(parents=True, exist_ok=True)
                filename = path_parts[-1] + '.md'
                filepath = subdir / filename
            else:
                filepath = category_dir / 'index.md'

            # Build markdown content
            md_content = f"""---
title: {doc['title']}
description: {doc['description']}
url: {doc['url']}
---

# {doc['title']}

{doc['content']}
"""
            filepath.write_text(md_content)
            saved_files.append({
                'filepath': filepath.relative_to(DOCS_DIR),
                'title': doc['title'],
                'description': doc['description'],
                'url': doc['url'],
                'category': category
            })

    print(f"Saved {len(saved_files)} documentation files")
    return saved_files, categories

def generate_index(saved_files, categories, docs):
    """Generate index.md with rich contextual navigation."""
    lines = [
        "# TurboStarter Documentation Index",
        "",
        f"**Last updated:** {datetime.now().strftime('%Y-%m-%d %H:%M')}  ",
        f"**Total pages:** {len(docs)}  ",
        f"**Source:** https://www.turbostarter.dev/llms.txt",
        "",
        "---",
        "",
        "## Quick Reference",
        "",
        "Use this index to find TurboStarter documentation. Each link includes a description.",
        "",
    ]

    # Category overview with counts and key topics
    lines.append("### Categories Overview")
    lines.append("")
    lines.append("| Platform | Pages | Key Topics |")
    lines.append("|----------|-------|------------|")

    for category in sorted(categories.keys()):
        count = len(categories[category])
        # Extract unique subcategories as key topics
        subcats = set()
        for doc in categories[category]:
            parts = doc['path'].split('/')
            if len(parts) > 1:
                subcats.add(parts[1])
        topics = ', '.join(sorted(subcats)[:5])
        if len(subcats) > 5:
            topics += f' (+{len(subcats)-5} more)'
        lines.append(f"| **{category.title()}** | {count} | {topics} |")

    lines.append("")
    lines.append("---")
    lines.append("")

    # Detailed sections with full context
    for category in sorted(categories.keys()):
        lines.append(f"## {category.title()}")
        lines.append("")

        # Group by subcategory
        subcats = defaultdict(list)
        for doc in categories[category]:
            parts = doc['path'].split('/')
            subcat = parts[1] if len(parts) > 1 else 'overview'
            subcats[subcat].append(doc)

        for subcat in sorted(subcats.keys()):
            subcat_title = subcat.replace('-', ' ').replace('_', ' ').title()
            lines.append(f"### {subcat_title}")
            lines.append("")

            # Add a contextual summary based on descriptions
            subcat_docs = subcats[subcat]
            if len(subcat_docs) > 3:
                lines.append(f"*{len(subcat_docs)} pages covering {subcat_title.lower()} functionality.*")
                lines.append("")

            # Table format for better scanning
            lines.append("| Topic | Description |")
            lines.append("|-------|-------------|")
            for doc in sorted(subcat_docs, key=lambda d: d['title']):
                filepath = f"sections/{doc['path']}.md"
                # Truncate long descriptions
                desc = doc['description'][:80] + '...' if len(doc['description']) > 80 else doc['description']
                lines.append(f"| [{doc['title']}]({filepath}) | {desc} |")

            lines.append("")

    # Quick lookup section
    lines.append("---")
    lines.append("")
    lines.append("## Quick Lookup by Keyword")
    lines.append("")
    lines.append("Common searches and where to find them:")
    lines.append("")

    # Build keyword index from titles and descriptions
    keyword_map = defaultdict(list)
    keywords_of_interest = [
        'auth', 'login', 'oauth', 'session',
        'database', 'drizzle', 'postgres', 'migration',
        'api', 'hono', 'endpoint', 'route',
        'billing', 'stripe', 'payment', 'subscription',
        'email', 'smtp', 'template',
        'storage', 's3', 'upload', 'file',
        'i18n', 'translation', 'locale',
        'admin', 'user', 'role', 'permission',
        'organization', 'team', 'member',
        'ai', 'openai', 'anthropic', 'chat',
        'deploy', 'vercel', 'docker',
        'test', 'vitest', 'playwright',
    ]

    for doc in docs:
        text = f"{doc['title']} {doc['description']}".lower()
        for kw in keywords_of_interest:
            if kw in text:
                keyword_map[kw].append(doc)

    # Output keyword table
    lines.append("| Keyword | Related Docs |")
    lines.append("|---------|--------------|")
    for kw in sorted(keyword_map.keys()):
        related = keyword_map[kw][:3]  # Max 3 per keyword
        links = ', '.join([f"[{d['title']}](sections/{d['path']}.md)" for d in related])
        if len(keyword_map[kw]) > 3:
            links += f" (+{len(keyword_map[kw])-3} more)"
        lines.append(f"| `{kw}` | {links} |")

    lines.append("")

    index_path = DOCS_DIR / "index.md"
    index_path.write_text('\n'.join(lines))
    print(f"Generated index at {index_path}")

def generate_claude_md():
    """Generate CLAUDE.md context file for the docs folder."""
    content = """# TurboStarter Framework Context

TurboStarter framework documentation for AI context loading.

## When to Read More

**Read `index.md`** if you need to:
- Find TurboStarter documentation on a specific topic
- Search by keyword (auth, database, billing, api, etc.)
- Understand what documentation is available

**Read `framework.md`** for:
- pnpm commands and workflows
- Monorepo structure
- Code conventions

## Quick Reference

| Need | Read |
|------|------|
| Commands & patterns | `framework.md` |
| Authentication | `sections/web/auth/` |
| Database/Drizzle | `sections/web/database/` |
| API/Hono | `sections/web/api/` |
| Billing/Stripe | `sections/web/billing/` |
| UI Components | `sections/web/ui/` |
| Organizations | `sections/web/organizations/` |
| i18n | `sections/web/i18n/` |
| Mobile | `sections/mobile/` |

## Refreshing

```bash
python .context/turbostarter-framework-context/refresh-docs.py
```

## Notes

- These docs are **subordinate** to `.context/CLAUDE.md`
- Adapt patterns to match existing codebase, don't copy verbatim
- When in doubt, check the actual code in `packages/` and `apps/`
"""

    claude_path = DOCS_DIR / "CLAUDE.md"
    claude_path.write_text(content)
    print(f"Generated CLAUDE.md at {claude_path}")

def main():
    print("=" * 60)
    print("TurboStarter Documentation Chunker")
    print("=" * 60)
    print()

    # Download latest docs
    content = download_llms_txt()

    # Parse and chunk
    print("Parsing documentation sections...")
    docs = chunk_docs(content)
    print(f"Found {len(docs)} documentation pages")

    # Save to filesystem
    print("Saving chunked files...")
    saved_files, categories = save_docs(docs)

    # Generate navigation files
    print("Generating navigation files...")
    generate_index(saved_files, categories, docs)
    generate_claude_md()

    print()
    print("=" * 60)
    print("Done! Documentation is ready in .context/turbostarter-framework-context/")
    print("=" * 60)

if __name__ == "__main__":
    main()