import os
import pandas as pd
from fastapi import FastAPI, HTTPException, Query
from fastapi.responses import FileResponse

app = FastAPI(title="TTS Arena Audio Server")

# --- CONFIGURATION ---
AUDIO_BASE_PATH = "audios"  # Root folder for audio files

# Define your Gender -> Speaker mapping here
# (I have pre-filled this based on standard ElevenLabs default genders)
GENDER_TO_SPEAKERS = {
    "male": [
        "Adam", "Bill", "Brian", "Callum", "Charlie", "Chris", "Daniel", 
        "Eric", "George", "Harry", "Liam", "Roger", "Will"
    ],
    "female": [
        "Alice", "Jessica", "Laura", "Lily", "Matilda", "River", "Sarah"
    ]
}

# Global variables to hold data
db = {
    "elevenlabs": pd.DataFrame(),
    "parler": pd.DataFrame()
}

def load_data():
    """Loads CSVs into memory on startup."""
    try:
        # Load ElevenLabs CSV
        # Assumes columns: 'text', 'filename' (and optionally 'voice'/'speaker')
        if os.path.exists("elevenlabs.csv"):
            db["elevenlabs"] = pd.read_csv("elevenlabs.csv")
            # Normalize text for easier matching
            db["elevenlabs"]['text_clean'] = db["elevenlabs"]['text'].astype(str).str.strip()
            print(f"✅ Loaded {len(db['elevenlabs'])} ElevenLabs records.")
        else:
            print("⚠️ 'elevenlabs.csv' not found.")

        # Load Parler CSV
        if os.path.exists("parler.csv"):
            db["parler"] = pd.read_csv("parler.csv")
            db["parler"]['text_clean'] = db["parler"]['text'].astype(str).str.strip()
            print(f"✅ Loaded {len(db['parler'])} Parler records.")
        else:
            print("⚠️ 'parler.csv' not found.")

    except Exception as e:
        print(f"❌ Error loading data: {e}")

# Run data loading on startup
load_data()

@app.get("/")
def health_check():
    return {"status": "running", "records": {k: len(v) for k, v in db.items()}}

@app.get("/elevenlabs/q")
def get_elevenlabs_audio(
    sentence: str = Query(..., description="The exact sentence text"),
    gender: str = Query(..., regex="^(male|female)$")
):
    df = db["elevenlabs"]
    if df.empty:
        raise HTTPException(status_code=503, detail="Database not loaded")

    # 1. Filter by Sentence (Exact Match)
    # We use the cleaned text column to avoid whitespace issues
    clean_sentence = sentence.strip()
    matches = df[df['text_clean'] == clean_sentence]

    if matches.empty:
        raise HTTPException(status_code=404, detail="Sentence not found in database")

    # 2. Filter by Gender
    allowed_speakers = GENDER_TO_SPEAKERS.get(gender.lower(), [])
    
    # We need to find a row where the speaker (or filename) matches our gender list.
    # Case A: If CSV has a 'voice' or 'speaker' column
    if 'voice' in matches.columns:
        gender_matches = matches[matches['voice'].isin(allowed_speakers)]
    
    # Case B: If we must guess speaker from filename (e.g., "row_0_Adam.wav")
    else:
        # Create a regex pattern to match any allowed speaker name
        # e.g., "Adam|Bill|Brian"
        pattern = '|'.join(allowed_speakers)
        gender_matches = matches[matches['filename'].str.contains(pattern, case=False, na=False)]

    if gender_matches.empty:
        raise HTTPException(
            status_code=404, 
            detail=f"No '{gender}' speaker found for this sentence."
        )

    # 3. Pick the first match
    selected_row = gender_matches.iloc[0]
    filename = selected_row['filename']
    
    # Construct full path
    # Assuming files are stored like: audios/elevenlabs/Adam/row_0_Adam.wav 
    # OR just flat: audios/elevenlabs/row_0_Adam.wav
    # We check both for safety.
    
    # Option 1: Flat structure
    file_path = os.path.join(AUDIO_BASE_PATH, "elevenlabs", filename)
    
    # Option 2: Nested folder structure (Speaker/Filename) if Option 1 fails
    if not os.path.exists(file_path):
        # Extract speaker name to find folder
        speaker_name = next((s for s in allowed_speakers if s in filename), None)
        if speaker_name:
            file_path = os.path.join(AUDIO_BASE_PATH, "elevenlabs", speaker_name, filename)

    if not os.path.exists(file_path):
        raise HTTPException(status_code=404, detail=f"Audio file missing on disk: {filename}")

    return FileResponse(file_path, media_type="audio/wav")

# Optional: Parler Endpoint (since you loaded the CSV)
@app.get("/parler/q")
def get_parler_audio(sentence: str):
    df = db["parler"]
    clean_sentence = sentence.strip()
    match = df[df['text_clean'] == clean_sentence]
    
    if match.empty:
        raise HTTPException(status_code=404, detail="Sentence not found")
        
    filename = match.iloc[0]['filename']
    file_path = os.path.join(AUDIO_BASE_PATH, "parler", filename)
    
    if not os.path.exists(file_path):
        raise HTTPException(status_code=404, detail="File missing")
        
    return FileResponse(file_path, media_type="audio/wav")