#!/usr/bin/env python3
"""
Generate ElevenReader-optimized version of the book
Compliance fixes: Remove URLs, add disclaimer, optimize formatting
"""

import os
import re
from pathlib import Path

EXPORT_READY_DIR = Path("export-ready")
OUTPUT_DIR = Path("export")

# File order mapping
FILE_ORDER = {
    "00-cover.md": 0,
    "01-title-page.md": 1,
    "02-dedication.md": 2,
    "02a-content-considerations.md": 2.5,
    "03-preface.md": 3,
    "04-how-to-use-this-book.md": 4,
    "05-introduction.md": 5,
    "06-chapter-01-overloaded-mind.md": 6,
    "07-chapter-02-emotional-architecture.md": 7,
    "08-chapter-03-mirror-misunderstanding.md": 8,
    "09-chapter-04-science-of-withdrawal.md": 9,
    "10-chapter-05-shutdown-triggers.md": 10,
    "11-chapter-06-survival-mode-love.md": 11,
    "12-chapter-07-overfunctioning.md": 12,
    "13-chapter-08-projection-mirror.md": 13,
    "14-chapter-09-burnout-breakdown-dissociation.md": 14,
    "15-chapter-10-loving-someone-who-retreats.md": 15,
    "16-chapter-11-what-people-get-wrong.md": 16,
    "17-chapter-12-repair-reconnection.md": 17,
    "18-chapter-13-emotional-bandwidth.md": 18,
    "19-chapter-14-healing-survival-patterns.md": 19,
    "20-chapter-15-coming-home-to-silence.md": 20,
    "21-epilogue.md": 21,
    "22-acknowledgements.md": 22,
    "23-about-the-author.md": 23,
    "24-back-of-book-summary.md": 24,
    "25-bibliography.md": 25,
    "25a-glossary.md": 25.5,
}

# Mandatory disclaimer
DISCLAIMER = """---

**Important Notice**

This book is for educational and reflective purposes only. It does not provide medical, psychological, or therapeutic advice. If you are struggling with mental health challenges, please seek support from a licensed professional.

---

"""

def remove_yaml_frontmatter(content: str) -> str:
    """Remove YAML front matter block."""
    if content.startswith("---"):
        end_index = content.find("---", 3)
        if end_index != -1:
            content = content[end_index + 3:].strip()
    return content

def remove_urls(content: str) -> str:
    """Remove URLs and convert to plain text citations."""
    # Remove markdown links but keep text
    content = re.sub(r'\[([^\]]+)\]\(https?://[^\)]+\)', r'\1', content)
    content = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', content)
    
    # Remove bare URLs
    content = re.sub(r'https?://[^\s\)]+', '', content)
    content = re.sub(r'www\.[^\s\)]+', '', content)
    
    # Convert citation-style URLs to plain text
    # Example: (Source: https://example.com) -> (Source: Research by Author Name)
    content = re.sub(r'\(Source:\s*https?://[^\)]+\)', '(Source: Research cited)', content)
    content = re.sub(r'\(See:\s*https?://[^\)]+\)', '(See: Referenced study)', content)
    
    return content

def replace_one_truth(content: str) -> str:
    """Replace 'One Truth' with 'Lesson' for better TTS."""
    # Replace in headings (case-insensitive)
    content = re.sub(r'###\s+One Truth', '### Lesson', content, flags=re.IGNORECASE)
    content = re.sub(r'##\s+One Truth', '## Lesson', content, flags=re.IGNORECASE)
    content = re.sub(r'#\s+One Truth', '# Lesson', content, flags=re.IGNORECASE)
    # Also catch standalone "One Truth" lines (for plaintext conversion)
    content = re.sub(r'^One Truth$', 'Lesson', content, flags=re.MULTILINE | re.IGNORECASE)
    return content

def clean_punctuation(content: str) -> str:
    """Replace fancy punctuation with simple versions."""
    # Replace curly quotes with straight quotes
    content = content.replace('"', '"').replace('"', '"')
    content = content.replace(''', "'").replace(''', "'")
    # Keep em dashes as they are (TTS handles them fine)
    return content

def normalize_spacing(content: str) -> str:
    """Ensure exactly 1 blank line between paragraphs."""
    # Remove triple+ newlines
    content = re.sub(r'\n{3,}', '\n\n', content)
    # Ensure no trailing spaces
    lines = content.split('\n')
    cleaned_lines = [line.rstrip() for line in lines]
    return '\n'.join(cleaned_lines)

def get_chapter_title(filename: str, content: str) -> str:
    """Extract chapter title, cleaning it for ElevenReader."""
    # Try to find H1 heading
    h1_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
    if h1_match:
        title = h1_match.group(1).strip()
        # Remove markdown formatting
        title = re.sub(r'\*\*([^\*]+)\*\*', r'\1', title)
        title = re.sub(r'_(.+?)_', r'\1', title)
        # Clean punctuation
        title = title.replace('"', '"').replace('"', '"')
        title = title.replace(''', "'").replace(''', "'")
        return title
    
    # Fallback to filename
    title = filename.replace('.md', '').replace('-', ' ').replace('_', ' ')
    title = re.sub(r'^\d+[a-z]?\s*', '', title)
    return title.title()

def process_file(filename: str) -> tuple[str, str]:
    """Process a single file and return (title, clean_content)."""
    filepath = EXPORT_READY_DIR / filename
    if not filepath.exists():
        return None, None
    
    with open(filepath, 'r', encoding='utf-8') as f:
        content = f.read()
    
    # Remove YAML front matter
    content = remove_yaml_frontmatter(content)
    
    # Get title before other processing
    title = get_chapter_title(filename, content)
    
    # Apply all transformations
    content = remove_urls(content)
    content = replace_one_truth(content)
    content = clean_punctuation(content)
    content = normalize_spacing(content)
    
    return title, content

def main():
    """Generate ElevenReader-optimized version."""
    print("🎙️ Generating ElevenReader-Optimized Version")
    print("=" * 60)
    print("")
    
    OUTPUT_DIR.mkdir(exist_ok=True)
    
    # Process all files
    chapters = []
    for filename in sorted(FILE_ORDER.keys(), key=lambda x: FILE_ORDER.get(x, 0)):
        print(f"Processing {filename}...")
        title, content = process_file(filename)
        
        if title and content:
            chapters.append((title, content, filename))
    
    print(f"\n✅ Processed {len(chapters)} files")
    print("")
    
    # Build optimized output
    print("📝 Building ElevenReader-optimized version...")
    output_lines = []
    
    # Add title page
    for title, content, filename in chapters:
        if filename == "01-title-page.md":
            output_lines.append(content)
            output_lines.append("")
            break
    
    # Add disclaimer before first chapter
    output_lines.append(DISCLAIMER)
    output_lines.append("")
    
    # Add all chapters with break markers
    chapter_count = 0
    for title, content, filename in chapters:
        # Skip cover and title page (already added)
        if filename in ["00-cover.md", "01-title-page.md"]:
            continue
        
        # Add chapter break marker
        output_lines.append(f"<reader-chapter-break>")
        output_lines.append("")
        output_lines.append(f"# {title}")
        output_lines.append("")
        output_lines.append(content)
        output_lines.append("")
        output_lines.append("")
        
        chapter_count += 1
    
    # Join and finalize
    optimized_content = '\n'.join(output_lines)
    
    # Final cleanup
    optimized_content = normalize_spacing(optimized_content)
    
    # Save as .md
    md_output = OUTPUT_DIR / "Silence-Isnt-Distance-ELEVENREADER.md"
    with open(md_output, 'w', encoding='utf-8') as f:
        f.write(optimized_content)
    
    print(f"✅ Saved: {md_output}")
    
    # Also generate plaintext version
    print("📄 Generating plaintext version...")
    
    # Convert markdown to plain text (simple conversion)
    plaintext = optimized_content
    # Remove markdown formatting
    plaintext = re.sub(r'^#{1,6}\s+(.+)$', r'\1', plaintext, flags=re.MULTILINE)
    plaintext = re.sub(r'\*\*([^\*]+)\*\*', r'\1', plaintext)
    plaintext = re.sub(r'(?<!\*)\*([^\*]+)\*(?!\*)', r'\1', plaintext)
    plaintext = re.sub(r'__(.+?)__', r'\1', plaintext)
    plaintext = re.sub(r'(?<!_)_([^_]+)_(?!_)', r'\1', plaintext)
    plaintext = re.sub(r'`([^`]+)`', r'\1', plaintext)
    plaintext = re.sub(r'^>\s+', '', plaintext, flags=re.MULTILINE)
    plaintext = re.sub(r'^[-*+]\s+', '', plaintext, flags=re.MULTILINE)
    plaintext = re.sub(r'^\d+\.\s+', '', plaintext, flags=re.MULTILINE)
    plaintext = re.sub(r'<reader-chapter-break>', '=== CHAPTER BREAK ===', plaintext)
    plaintext = normalize_spacing(plaintext)
    
    txt_output = OUTPUT_DIR / "Silence-Isnt-Distance-ELEVENREADER.txt"
    with open(txt_output, 'w', encoding='utf-8') as f:
        f.write(plaintext)
    
    print(f"✅ Saved: {txt_output}")
    
    # Summary
    print("")
    print("=" * 60)
    print("✨ ElevenReader-Optimized Version Complete!")
    print("")
    print("📦 Files created:")
    print(f"   • {md_output}")
    print(f"   • {txt_output}")
    print("")
    print("✅ Compliance fixes applied:")
    print("   ✓ YAML front matter removed")
    print("   ✓ URLs removed/converted to citations")
    print("   ✓ 'One Truth' → 'Lesson'")
    print("   ✓ Disclaimer added")
    print("   ✓ Chapter break markers added")
    print("   ✓ Spacing normalized")
    print("   ✓ Punctuation cleaned")
    print("")
    print("🚀 Ready to upload to ElevenReader!")
    print("=" * 60)

if __name__ == "__main__":
    main()