#!/usr/bin/env python3
"""
Generate clean plaintext versions for ElevenLabs Reader
Strips all Markdown formatting and creates chaptered + continuous versions
"""

import os
import re
from pathlib import Path

EXPORT_READY_DIR = Path("export-ready")
OUTPUT_DIR = Path("export")

# File order mapping
FILE_ORDER = {
    "00-cover.md": 0,
    "01-title-page.md": 1,
    "02-dedication.md": 2,
    "02a-content-considerations.md": 2.5,
    "03-preface.md": 3,
    "04-how-to-use-this-book.md": 4,
    "05-introduction.md": 5,
    "06-chapter-01-overloaded-mind.md": 6,
    "07-chapter-02-emotional-architecture.md": 7,
    "08-chapter-03-mirror-misunderstanding.md": 8,
    "09-chapter-04-science-of-withdrawal.md": 9,
    "10-chapter-05-shutdown-triggers.md": 10,
    "11-chapter-06-survival-mode-love.md": 11,
    "12-chapter-07-overfunctioning.md": 12,
    "13-chapter-08-projection-mirror.md": 13,
    "14-chapter-09-burnout-breakdown-dissociation.md": 14,
    "15-chapter-10-loving-someone-who-retreats.md": 15,
    "16-chapter-11-what-people-get-wrong.md": 16,
    "17-chapter-12-repair-reconnection.md": 17,
    "18-chapter-13-emotional-bandwidth.md": 18,
    "19-chapter-14-healing-survival-patterns.md": 19,
    "20-chapter-15-coming-home-to-silence.md": 20,
    "21-epilogue.md": 21,
    "22-acknowledgements.md": 22,
    "23-about-the-author.md": 23,
    "24-back-of-book-summary.md": 24,
    "25-bibliography.md": 25,
    "25a-glossary.md": 25.5,
}

def strip_markdown(content: str) -> str:
    """Remove all Markdown formatting, keeping structure."""
    
    # Remove YAML front matter
    if content.startswith("---"):
        end_index = content.find("---", 3)
        if end_index != -1:
            content = content[end_index + 3:].strip()
    
    # Remove code blocks (keep content)
    content = re.sub(r'```[\s\S]*?```', '', content)
    content = re.sub(r'`([^`]+)`', r'\1', content)
    
    # Remove images but keep alt text
    content = re.sub(r'!\[([^\]]*)\]\([^\)]+\)', r'\1', content)
    
    # Remove links but keep text
    content = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', content)
    
    # Convert headings to plain text (keep the text, remove #)
    content = re.sub(r'^#{1,6}\s+(.+)$', r'\1', content, flags=re.MULTILINE)
    
    # Remove bold (**text** or __text__)
    content = re.sub(r'\*\*([^\*]+)\*\*', r'\1', content)
    content = re.sub(r'__([^_]+)__', r'\1', content)
    
    # Remove italics (*text* or _text_)
    content = re.sub(r'(?<!\*)\*([^\*]+)\*(?!\*)', r'\1', content)
    content = re.sub(r'(?<!_)_([^_]+)_(?!_)', r'\1', content)
    
    # Remove blockquote markers
    content = re.sub(r'^>\s+', '', content, flags=re.MULTILINE)
    
    # Remove horizontal rules
    content = re.sub(r'^---+$', '', content, flags=re.MULTILINE)
    
    # Remove list markers (convert to plain text)
    content = re.sub(r'^\s*[-*+]\s+', '', content, flags=re.MULTILINE)
    content = re.sub(r'^\s*\d+\.\s+', '', content, flags=re.MULTILINE)
    
    # Clean up extra whitespace
    content = re.sub(r'\n{3,}', '\n\n', content)
    content = re.sub(r'[ \t]+', ' ', content)
    
    # Remove any remaining markdown artifacts
    content = re.sub(r'[#>*_`~\-]{2,}', '', content)
    
    return content.strip()

def get_chapter_title(filename: str, content: str) -> str:
    """Extract chapter title from content or filename."""
    # Try to find H1 heading
    h1_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
    if h1_match:
        title = h1_match.group(1).strip()
        # Clean markdown from title
        title = re.sub(r'\*\*([^\*]+)\*\*', r'\1', title)
        title = re.sub(r'_(.+?)_', r'\1', title)
        return title.upper()
    
    # Fallback to filename
    title = filename.replace('.md', '').replace('-', ' ').replace('_', ' ')
    # Remove leading numbers
    title = re.sub(r'^\d+\s*', '', title)
    return title.upper()

def process_file(filename: str) -> tuple[str, str]:
    """Process a single file and return (title, clean_content)."""
    filepath = EXPORT_READY_DIR / filename
    if not filepath.exists():
        return None, None
    
    with open(filepath, 'r', encoding='utf-8') as f:
        content = f.read()
    
    # Get title before stripping
    title = get_chapter_title(filename, content)
    
    # Strip markdown
    clean_content = strip_markdown(content)
    
    return title, clean_content

def format_text_for_reading(text: str, max_width: int = 80) -> str:
    """Format text with proper line wrapping for readability."""
    lines = []
    paragraphs = text.split('\n\n')
    
    for para in paragraphs:
        if not para.strip():
            lines.append('')
            continue
        
        # Word wrap paragraph
        words = para.split()
        current_line = []
        current_length = 0
        
        for word in words:
            word_length = len(word)
            if current_length + word_length + 1 <= max_width:
                current_line.append(word)
                current_length += word_length + 1
            else:
                if current_line:
                    lines.append(' '.join(current_line))
                current_line = [word]
                current_length = word_length
        
        if current_line:
            lines.append(' '.join(current_line))
        
        lines.append('')  # Blank line after paragraph
    
    return '\n'.join(lines)

def main():
    """Generate plaintext versions."""
    print("📝 Generating plaintext versions for ElevenLabs Reader...")
    print("")
    
    # Create output directory
    OUTPUT_DIR.mkdir(exist_ok=True)
    
    # Process all files in order
    chapters = []
    for filename in sorted(FILE_ORDER.keys(), key=lambda x: FILE_ORDER.get(x, 0)):
        print(f"Processing {filename}...")
        title, content = process_file(filename)
        
        if title and content:
            chapters.append((title, content))
    
    print(f"\n✅ Processed {len(chapters)} files")
    print("")
    
    # Generate chaptered version
    print("📄 Creating chaptered version...")
    chaptered_output = []
    
    for i, (title, content) in enumerate(chapters, 1):
        chaptered_output.append("=" * 80)
        chaptered_output.append(f"CHAPTER {i}: {title}")
        chaptered_output.append("=" * 80)
        chaptered_output.append("")
        chaptered_output.append(content)
        chaptered_output.append("")
        chaptered_output.append("")
    
    chaptered_text = '\n'.join(chaptered_output)
    
    # Save chaptered version
    chaptered_path = OUTPUT_DIR / "Silence-Isnt-Distance-PLAINTEXT.txt"
    with open(chaptered_path, 'w', encoding='utf-8') as f:
        f.write(chaptered_text)
    
    print(f"✅ Saved: {chaptered_path}")
    
    # Generate continuous reading version
    print("📄 Creating continuous reading version...")
    continuous_output = []
    
    for title, content in chapters:
        # Add subtle section break (not as prominent as chaptered version)
        continuous_output.append("")
        continuous_output.append("")
        continuous_output.append(f"{title}")
        continuous_output.append("")
        continuous_output.append(content)
        continuous_output.append("")
    
    continuous_text = '\n'.join(continuous_output)
    
    # Save continuous version
    continuous_path = OUTPUT_DIR / "Silence-Isnt-Distance-READING-MODE.txt"
    with open(continuous_path, 'w', encoding='utf-8') as f:
        f.write(continuous_text)
    
    print(f"✅ Saved: {continuous_path}")
    
    # Generate stats
    print("")
    print("📊 File Statistics:")
    print(f"   Chaptered version: {len(chaptered_text):,} characters")
    print(f"   Continuous version: {len(continuous_text):,} characters")
    print(f"   Total chapters: {len(chapters)}")
    
    print("")
    print("✨ Plaintext generation complete!")
    print("")
    print("📦 Files ready for ElevenLabs Reader:")
    print(f"   • {chaptered_path}")
    print(f"   • {continuous_path}")

if __name__ == "__main__":
    main()