import json
import uuid

import argparse
from collections import defaultdict
def generate_sentence_annotations(data):
    transcript = data["transcript"]
    results = []
    current_char_idx = 0
    
    for sentence_info in data["sentences"]:
        sentence_text = sentence_info["text"]
        start_time = sentence_info["start_time"]
        end_time = sentence_info["end_time"]
        
        # Find the character offset for the full sentence
        start_char = transcript.find(sentence_text, current_char_idx)
        if start_char == -1:
            continue
            
        end_char = start_char + len(sentence_text)
        current_char_idx = end_char
        
        text_id = f"text_{uuid.uuid4().hex[:8]}"
        audio_id = f"audio_{uuid.uuid4().hex[:8]}"
        
        # 1. Text Region (Using "Sentence" label)
        text_region = {
            "id": text_id,
            "from_name": "text_labels", 
            "to_name": "transcript",    
            "type": "labels",
            "value": {
                "start": start_char,
                "end": end_char,
                "text": sentence_text,
                "labels": ["Sentence"]  # Changed from "Word" to "Sentence"
            }
        }
        
        # 2. Audio Region (Using "Sentence" label)
        audio_region = {
            "id": audio_id,
            "from_name": "labels",      
            "to_name": "audio",         
            "type": "labels",
            "value": {
                "start": start_time,
                "end": end_time,
                "labels": ["Sentence"]  # Changed from "Word" to "Sentence"
            }
        }
        
        # 3. Relation linking them
        relation = {
            "from_id": text_region["id"],
            "to_id": audio_region["id"],
            "type": "relation",
            "direction": "right"
        }
        
        results.extend([text_region, audio_region, relation])
        
    task = {
        "data": {
            "audio": data["audio_url"],
            "transcript": transcript
        },
        "predictions": [
            {
                "model_version": "auto_aligner_v1",
                "result": results
            }
        ]
    }
    
    return [task]

def process_manifest(manifest_path):
    audio_data = defaultdict(lambda: {"transcript": "", "sentences": [], "audio_url": ""})
    
    with open(manifest_path, "r", encoding="utf-8") as f:
        for line in f:
            item = json.loads(line.strip())
            source_audio = item["source_audio_filepath"]
            
            # Using local root path
            audio_data[source_audio]["audio_url"] = f"/root/{source_audio}"
            
            sentence_info = {
                "text": item["text"],
                "start_time": item["offset"],
                "end_time": item["offset"] + item["duration"]
            }
            audio_data[source_audio]["sentences"].append(sentence_info)
            
    all_tasks = []
    for source_audio, data in audio_data.items():
        # Sort sentences by start time
        data["sentences"].sort(key=lambda x: x["start_time"])
        # Construct full transcript
        data["transcript"] = " ".join([s["text"] for s in data["sentences"]])
        
        tasks = generate_sentence_annotations(data)
        all_tasks.extend(tasks)
        
    return all_tasks

if __name__ == "__main__":
    manifest_file = "/root/aivanta_chunks/manifest_final.jsonl"
    ls_tasks = process_manifest(manifest_file)
    
    output_file = "pre_annotations_sentences.json"
    with open(output_file, "w") as f:
        json.dump(ls_tasks, f, indent=2)
    print(f"Successfully generated {output_file} with {len(ls_tasks)} tasks")