#!/usr/bin/env python3
"""
Markdown QA and Cleanup Script for Silence Isn't Distance
Performs comprehensive validation and fixes for EPUB/KDP export
"""

import os
import re
from pathlib import Path
from typing import Dict, List, Tuple

EXPORT_READY_DIR = Path("export-ready")
DOCS_DIR = Path("docs")

# File order mapping from MASTER_INDEX
FILE_ORDER = {
    "00-cover.md": 0,
    "01-title-page.md": 1,
    "02-dedication.md": 2,
    "02a-content-considerations.md": 2.5,
    "03-preface.md": 3,
    "04-how-to-use-this-book.md": 4,
    "05-introduction.md": 5,
    "06-chapter-01-overloaded-mind.md": 6,
    "07-chapter-02-emotional-architecture.md": 7,
    "08-chapter-03-mirror-misunderstanding.md": 8,
    "09-chapter-04-science-of-withdrawal.md": 9,
    "10-chapter-05-shutdown-triggers.md": 10,
    "11-chapter-06-survival-mode-love.md": 11,
    "12-chapter-07-overfunctioning.md": 12,
    "13-chapter-08-projection-mirror.md": 13,
    "14-chapter-09-burnout-breakdown-dissociation.md": 14,
    "15-chapter-10-loving-someone-who-retreats.md": 15,
    "16-chapter-11-what-people-get-wrong.md": 16,
    "17-chapter-12-repair-reconnection.md": 17,
    "18-chapter-13-emotional-bandwidth.md": 18,
    "19-chapter-14-healing-survival-patterns.md": 19,
    "20-chapter-15-coming-home-to-silence.md": 20,
    "21-epilogue.md": 21,
    "22-acknowledgements.md": 22,
    "23-about-the-author.md": 23,
    "24-back-of-book-summary.md": 24,
    "25-bibliography.md": 25,
    "25a-glossary.md": 25.5,
}

def extract_front_matter(content: str) -> Tuple[Dict, str]:
    """Extract YAML front matter from content."""
    if not content.startswith("---"):
        return {}, content
    
    end_index = content.find("---", 3)
    if end_index == -1:
        return {}, content
    
    front_matter_text = content[3:end_index].strip()
    front_matter = {}
    
    for line in front_matter_text.split("\n"):
        match = re.match(r"^(\w+):\s*(.+)$", line)
        if match:
            key = match.group(1)
            value = match.group(2).strip().strip('"\'')
            front_matter[key] = value
    
    body = content[end_index + 3:].strip()
    return front_matter, body

def fix_front_matter(filename: str, front_matter: Dict) -> Dict:
    """Fix front matter to use order field with proper numbering."""
    fixed = front_matter.copy()
    
    # Convert chapter to order if needed
    if "chapter" in fixed and "order" not in fixed:
        order_value = FILE_ORDER.get(filename, 0)
        fixed["order"] = str(int(order_value)) if order_value == int(order_value) else str(order_value)
        del fixed["chapter"]
    
    # Ensure order is set correctly
    if "order" not in fixed:
        order_value = FILE_ORDER.get(filename, 0)
        fixed["order"] = str(int(order_value)) if order_value == int(order_value) else str(order_value)
    
    return fixed

def fix_italics(content: str) -> str:
    """Convert *italics* to _italics_ (but not **bold**)."""
    # Pattern to match *text* but not **text** or * at start of line (list)
    # Match *word* but not **word** or * at line start
    def replace_italic(match):
        text = match.group(1)
        # Don't replace if it's part of bold or list item
        if match.start() > 0 and content[match.start()-1] == '\n' and content[match.start()-2] != '\n':
            return match.group(0)  # Likely a list item
        return f"_{text}_"
    
    # Replace *text* with _text_ but preserve **bold**
    # More careful: only replace *text* that's not part of **text**
    content = re.sub(r'(?<!\*)\*([^*]+?)\*(?!\*)', r'_\1_', content)
    
    # Fix edge cases where we might have broken things
    # Restore **bold** if we accidentally changed it
    content = re.sub(r'\*\*([^*]+?)\*\*', r'**\1**', content)
    
    return content

def fix_cover_path(content: str) -> str:
    """Fix cover image path."""
    content = re.sub(
        r'!\[.*?\]\(\.\./assets/',
        r'![Silence Isn\'t Distance Cover](assets/',
        content
    )
    return content

def check_heading_hierarchy(content: str) -> List[str]:
    """Check for heading hierarchy issues."""
    issues = []
    lines = content.split("\n")
    for i, line in enumerate(lines, 1):
        if line.startswith("####"):
            issues.append(f"Line {i}: H4 heading found (should be H3 max)")
        elif line.startswith("#####"):
            issues.append(f"Line {i}: H5 heading found (should be H3 max)")
        elif line.startswith("######"):
            issues.append(f"Line {i}: H6 heading found (should be H3 max)")
    return issues

def check_spacing(content: str) -> List[str]:
    """Check spacing after YAML front matter."""
    issues = []
    if content.startswith("---"):
        end_index = content.find("---", 3)
        if end_index != -1:
            after_front = content[end_index + 3:]
            # Check if there's at least one newline after front matter
            if after_front and not after_front.startswith("\n"):
                issues.append("Missing blank line after YAML front matter")
            # Check if there's proper spacing (blank line) before first content
            elif after_front.startswith("\n") and not after_front.startswith("\n\n") and not after_front.startswith("\n#"):
                # Only one newline, should have blank line before content
                if len(after_front) > 1 and after_front[1] != "\n" and after_front[1] != "#":
                    issues.append("Missing blank line after YAML front matter")
    return issues

def process_file(filename: str) -> Dict:
    """Process a single file and return fixes made."""
    filepath = EXPORT_READY_DIR / filename
    if not filepath.exists():
        return {"error": f"File not found: {filename}"}
    
    with open(filepath, 'r', encoding='utf-8') as f:
        original = f.read()
    
    fixes = []
    front_matter, body = extract_front_matter(original)
    
    # Fix front matter
    fixed_front_matter = fix_front_matter(filename, front_matter)
    if fixed_front_matter != front_matter:
        fixes.append("Fixed front matter (converted chapter to order or set order)")
    
    # Fix italics
    body_fixed = fix_italics(body)
    if body_fixed != body:
        fixes.append("Converted *italics* to _italics_")
        body = body_fixed
    
    # Fix cover path
    body_fixed = fix_cover_path(body)
    if body_fixed != body:
        fixes.append("Fixed cover image path")
        body = body_fixed
    
    # Reconstruct file
    front_matter_str = "---\n"
    for key, value in fixed_front_matter.items():
        # Don't quote numeric values
        if key == "order":
            # Check if value is numeric (int or float)
            try:
                float_val = float(value)
                if float_val == int(float_val):
                    front_matter_str += f"{key}: {int(float_val)}\n"
                else:
                    front_matter_str += f"{key}: {float_val}\n"
            except ValueError:
                front_matter_str += f"{key}: \"{value}\"\n"
        else:
            front_matter_str += f"{key}: \"{value}\"\n"
    front_matter_str += "---\n\n"
    
    # Ensure body starts with content (not extra newlines)
    body = body.lstrip()
    new_content = front_matter_str + body
    
    # Check for issues
    heading_issues = check_heading_hierarchy(new_content)
    spacing_issues = check_spacing(new_content)
    
    # Write fixed file
    if fixes or heading_issues or spacing_issues:
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(new_content)
    
    return {
        "fixes": fixes,
        "heading_issues": heading_issues,
        "spacing_issues": spacing_issues,
        "front_matter": fixed_front_matter
    }

def main():
    """Main processing function."""
    print("🔍 Starting Markdown QA and Cleanup...")
    print("")
    
    all_results = {}
    all_front_matter = {}
    
    # Process all files
    for filename in sorted(FILE_ORDER.keys()):
        print(f"Processing {filename}...")
        result = process_file(filename)
        all_results[filename] = result
        if "front_matter" in result:
            all_front_matter[filename] = result["front_matter"]
    
    # Generate reports
    print("\n📊 Generating reports...")
    
    # 1. Cleanup Report
    cleanup_report = generate_cleanup_report(all_results)
    with open(DOCS_DIR / "MARKDOWN_CLEANUP_REPORT.md", 'w', encoding='utf-8') as f:
        f.write(cleanup_report)
    
    # 2. Export Readiness Report
    readiness_report = generate_readiness_report(all_results)
    with open(DOCS_DIR / "EXPORT_READINESS_REPORT.md", 'w', encoding='utf-8') as f:
        f.write(readiness_report)
    
    # 3. Front Matter Audit
    front_matter_audit = generate_front_matter_audit(all_front_matter)
    with open(DOCS_DIR / "FRONTMATTER_AUDIT.md", 'w', encoding='utf-8') as f:
        f.write(front_matter_audit)
    
    # 4. Master Book
    master_book = generate_master_book()
    with open(EXPORT_READY_DIR / "MASTER_BOOK.md", 'w', encoding='utf-8') as f:
        f.write(master_book)
    
    print("\n✅ QA Complete! Reports generated in docs/")
    print("✅ Master book created: export-ready/MASTER_BOOK.md")

def generate_cleanup_report(results: Dict) -> str:
    """Generate cleanup report."""
    report = "# Markdown Cleanup Report\n\n"
    report += "**Date:** 2025-12-06\n"
    report += "**Project:** Silence Isn't Distance — Markdown QA\n\n"
    report += "---\n\n"
    
    total_fixes = 0
    for filename, result in results.items():
        if "error" in result:
            report += f"## {filename}\n\n❌ **Error:** {result['error']}\n\n"
            continue
        
        if result["fixes"] or result["heading_issues"] or result["spacing_issues"]:
            report += f"## {filename}\n\n"
            
            if result["fixes"]:
                report += "**Fixes Applied:**\n"
                for fix in result["fixes"]:
                    report += f"- ✅ {fix}\n"
                    total_fixes += 1
                report += "\n"
            
            if result["heading_issues"]:
                report += "**Heading Issues:**\n"
                for issue in result["heading_issues"]:
                    report += f"- ⚠️ {issue}\n"
                report += "\n"
            
            if result["spacing_issues"]:
                report += "**Spacing Issues:**\n"
                for issue in result["spacing_issues"]:
                    report += f"- ⚠️ {issue}\n"
                report += "\n"
        else:
            report += f"## {filename}\n\n✅ No issues found.\n\n"
    
    report += f"\n---\n\n**Total Fixes Applied:** {total_fixes}\n"
    return report

def generate_readiness_report(results: Dict) -> str:
    """Generate export readiness report."""
    report = "# Export Readiness Report\n\n"
    report += "**Date:** 2025-12-06\n"
    report += "**Target Formats:** EPUB, KDP HTML, Print PDF\n\n"
    report += "---\n\n"
    
    ready_count = 0
    warning_count = 0
    
    for filename, result in sorted(results.items(), key=lambda x: FILE_ORDER.get(x[0], 0)):
        if "error" in result:
            report += f"### {filename}\n\n❌ **NOT READY** — File missing\n\n"
            continue
        
        issues = []
        if result["heading_issues"]:
            issues.extend(result["heading_issues"])
        if result["spacing_issues"]:
            issues.extend(result["spacing_issues"])
        
        if issues:
            report += f"### {filename}\n\n⚠️ **WARNINGS:**\n"
            for issue in issues:
                report += f"- {issue}\n"
            report += "\n"
            warning_count += 1
        else:
            report += f"### {filename}\n\n✅ **100% READY**\n\n"
            ready_count += 1
    
    report += f"\n---\n\n"
    report += f"**Ready:** {ready_count}/{len(results)} files\n"
    report += f"**Warnings:** {warning_count} files\n"
    report += f"**Status:** {'✅ Ready for Export' if warning_count == 0 else '⚠️ Review Warnings'}\n"
    
    return report

def generate_front_matter_audit(front_matters: Dict) -> str:
    """Generate front matter audit."""
    report = "# Front Matter Audit\n\n"
    report += "**Date:** 2025-12-06\n\n"
    report += "---\n\n"
    
    for filename in sorted(front_matters.keys(), key=lambda x: FILE_ORDER.get(x, 0)):
        fm = front_matters[filename]
        report += f"## {filename}\n\n"
        report += "```yaml\n"
        for key, value in fm.items():
            report += f"{key}: \"{value}\"\n"
        report += "```\n\n"
        report += f"**Order Value:** {fm.get('order', 'MISSING')}\n\n"
        report += "---\n\n"
    
    return report

def generate_master_book() -> str:
    """Generate master book by concatenating all files."""
    master = ""
    
    for filename in sorted(FILE_ORDER.keys(), key=lambda x: FILE_ORDER.get(x, 0)):
        filepath = EXPORT_READY_DIR / filename
        if filepath.exists():
            with open(filepath, 'r', encoding='utf-8') as f:
                content = f.read()
            
            # Remove YAML front matter for master book
            if content.startswith("---"):
                end_index = content.find("---", 3)
                if end_index != -1:
                    content = content[end_index + 3:].strip()
            
            master += content
            master += "\n\n<!-- Page Break -->\n\n"
    
    return master

if __name__ == "__main__":
    main()