#!/usr/bin/env python3
"""Replace hardcoded directory paths with ANONYMOUS_DIR in YAML files."""

from pathlib import Path

# Keys that hold directory paths (exact match, with optional leading whitespace in line)
DIR_KEYS = {
    'output_dir',
    'project_root_dir',
    'data_dir',
    'data.data_set_kwargs.data_dir',
    'logging.checkpointing.directory',
    'logging.dir',
    'directory',
    'dir',
    'load_from',
    'initial_model_params.load_from',
    'pretraining.load_from',
    'evaluation_checkpoints.root_dir',
    'checkpointing.directory',
}


def is_path_value(val: str) -> bool:
    if not val or not val.strip():
        return False
    s = val.strip().strip('\'"').lower()
    if s in ('null', 'none', '~'):
        return False
    return '/' in val or val.strip().startswith('.')


def process_line(line: str) -> str:
    if ':' not in line:
        return line
    key_part, _, value_part = line.partition(':')
    key = key_part.strip()
    if key not in DIR_KEYS:
        return line
    if not is_path_value(value_part):
        return line
    # Preserve indentation and key, replace value with ANONYMOUS_DIR
    return key_part + ': ANONYMOUS_DIR\n'


def process_file(path: Path) -> bool:
    text = path.read_text()
    new_lines = [process_line(line) for line in text.splitlines(keepends=True)]
    new_text = ''.join(new_lines)
    if new_text != text:
        path.write_text(new_text)
        return True
    return False


def main():
    root = Path(__file__).resolve().parent.parent
    count = 0
    for path in root.rglob('*.yaml'):
        if 'anonymize_dirs' in str(path):
            continue
        if process_file(path):
            count += 1
            print(path.relative_to(root))
    print(f'Updated {count} YAML files.')


if __name__ == '__main__':
    main()
