import json
import os
import re
from datetime import datetime, timedelta, timezone

import pandas as pd

import wandb
from eliciting_contexts.utils.constants import WANDB_ENTITY

api = wandb.Api()

entity_project = f"{WANDB_ENTITY}/fluent-dreaming"
download_folder = "data/sandbagging/train-sandbagging-f1dpbban-step11264/epo/steering/"
os.makedirs(download_folder, exist_ok=True)

# Calculate the timestamp for 48 hours ago (timezone-aware)
since_time = datetime.now(timezone.utc) - timedelta(hours=48)
since_iso = since_time.isoformat()

# Build the filters dictionary for createdAt
filters = {"createdAt": {"$gte": since_iso}}

# Retrieve runs using the createdAt filter and proper ordering (using 'createdAt')
runs_by_time = list(
    api.runs(
        path=entity_project,
        filters=filters,
        order="-createdAt",  # Order by createdAt (newest first)
        per_page=200,  # Adjust as needed
    )
)

# Now define your regex pattern for displayName filtering
pattern = re.compile(r"cosine_mean_activations_layer_.*_0\.1_1000")

# Filter runs by checking the displayName on the client side
filtered_runs = [
    run for run in runs_by_time if run.displayName and pattern.search(run.displayName)
]


for run in filtered_runs:
    print(f"Checking run: {run.name}")
    for file in run.files():
        if file.name.endswith(".table.json"):
            print(f"  Found table file: {file.name}")
            downloaded_file = file.download(root=download_folder, replace=True)
            local_path = downloaded_file.name
            with open(local_path, "r") as f:
                table_json = json.load(f)

            # Convert the JSON to a DataFrame
            columns = table_json.get("columns", [])
            data = table_json.get("data", [])
            df = pd.DataFrame(data=data, columns=columns)

            # Strip directory parts from file.name
            base_filename = os.path.basename(file.name)
            # e.g. "pareto_frontier_302_bb8b71071527ca475fe8.table.json"

            # Optionally remove ".table.json" from the end to get just "pareto_frontier_302_bb8b71071527ca475fe8"
            # (in case you want a cleaner CSV filename)
            if base_filename.endswith(".table.json"):
                base_filename = base_filename[: -len(".table.json")]

            # Construct the CSV path without any subfolders
            csv_filename = f"{run.name}_{base_filename}.csv"
            csv_path = os.path.join(download_folder, csv_filename)

            df.to_csv(csv_path, index=False)
            print(f"  -> Saved CSV: {csv_path}")
