
import os,json
from bs4 import BeautifulSoup
from collections import defaultdict

def group_speaking_intervals(data):
    # Group entries by name and role
    participant_logs = defaultdict(lambda: {"role": None, "logs": []})
    for entry in sorted(data, key=lambda x:float( x['speaking_time_sec'])):
        name = entry['name']
        participant_logs[name]["logs"].append((entry['speaking_time_sec'], entry['speaking_state']))
        participant_logs[name]["role"] = entry.get("role", "user")

    result = []
    for name, data in participant_logs.items():
        timestamps = []
        current_state = None
        start_time = None

        for i, (time, state) in enumerate(data["logs"]):
            if state != current_state:
                if current_state is not None:
                    timestamps.append({'from': start_time, 'to': round(float(time), 3), 'status': current_state})
                start_time = round(float(time), 3)
                current_state = state

        if current_state is not None and start_time is not None:
            timestamps.append({'from': start_time, 'to': round(float(time), 3), 'status': current_state})

        result.append({
            'participant': name,
            'role': data["role"],
            'speaking_timestamps': timestamps
        })
    #Need to handle last scenerio


    return {p["participant"]: p for p in result}

def gmeet_particpants_info_exctract(directory, filename, normalize_username, meet_config_data, log_info=None, meeting_id=None):

    filepath = os.path.join(directory, filename)

    # Speaking class list
    
    name_tag_selector=meet_config_data.get("name_selector",'.OFfHfd.urlhDe.iPFm3e span.notranslate')
    # name_tag_selector=meet_config_data.get("name_selector",'.ne2Ple-oshW8e-V67aGc')
    # speaking_tag_selector=meet_config_data.get("speaking_span_selector",".JHK7jb.Nep7Ue.iPFm3e.eLNT1d , .tC2Wod.ACcyyc.eQJ1qd.t9yCsb") # Unmute
    speaking_tag_selector = meet_config_data.get(
    "speaking_span_selector",
    ".JHK7jb.Nep7Ue.iPFm3e.eLNT1d, "
    ".JHK7jb.Nep7Ue.FTMc0c.iPFm3e, "
    ".tC2Wod.ACcyyc.eQJ1qd.t9yCsb, "
    ".IisKdb.GF8M7d.gjg47c.YFyDbd.iPFm3e.VeFZv, "
    ".IisKdb.GF8M7d.HX2H7.YFyDbd.iPFm3e.VeFZv, "
    ".IisKdb.GF8M7d.Oaajhc.YFyDbd.iPFm3e.VeFZv, "
    ".IisKdb.GF8M7d.OgVli.YFyDbd.iPFm3e.VeFZv, "
    ".JHK7jb.Nep7Ue.iPFm3e "
)

    # speaking_state_selector=".tC2Wo.ACcyyc.eQJ1qd.t9yCsb.kssMZb"
    speaking_state_selector=".kssMZb"

    # Load html snapshots map
    with open(filepath) as f:
        html_dict = json.load(f)
    # start_time=meet_config_data["start_time"]
    # print(html_dict,meet_config_data["start_time"])
    # Cache of: html_filename => [participant_info, ...]
    result_cache = {}

    all_participant_info = []

    for time_stamp, html_filename in html_dict.items():
        # second=(int(time_stamp)-start_time)/1000
        second=int(time_stamp)

        full_path = os.path.join(directory, f"{html_filename}.html")


        # Reuse if this file already processed
        if html_filename in result_cache:
            cached_participants = result_cache[html_filename]
        else:
            try:
                with open(full_path, "r", encoding="utf-8") as file:
                    content = file.read()
                soup = BeautifulSoup(content, "lxml")
            except Exception as e:
                print(f"Error loading {full_path}: {str(e)}")
                if log_info:
                    log_info(meeting_id, f"Failed to load {full_path}: {str(e)}")
                continue

            participant_tags = soup.select("div.participant")
            if not participant_tags:
                if log_info:
                    log_info(meeting_id, f"Full Page Found {second}, No data available >> {full_path}")
                continue
            # print(participant_tags)
            cached_participants = []
            for participant in participant_tags:
                # print(participant)
                name_tag = participant.select_one(name_tag_selector
                    
                )
                name = name_tag.text.strip() if name_tag else ""
                if not name :
                    continue
                # print(name)
                normalized_name = normalize_username(name)
                role = "user"
                speaking_tag = participant.select_one(
                    speaking_tag_selector
                )

                speaking_state = "unmute" if speaking_tag else "mute"
                if speaking_tag:
                    is_speaking_tag = participant.select_one(
                        speaking_state_selector
                    )
                    if is_speaking_tag:
                        speaking_state = "speaking"

                cached_participants.append({
                    "name": normalized_name,
                    "role": role,
                    "speaking_state": speaking_state
                })

                # print(cached_participants)

            # Cache the processed result
            result_cache[html_filename] = cached_participants

        # Add speaking time to each participant
        for p in cached_participants:
            all_participant_info.append({
                **p,
                "speaking_time_sec": float(second)
            })

    return group_speaking_intervals(all_participant_info)


