import json

path_to_raw_json='./config_files/wa-raw/test_webarena.raw.json'
path_to_save='./config_files/wa-raw'

excluded_sites = ['wikipedia', 'calculator']

def split_json_by_sites(input_file, excluded_sites):
    # Load the provided JSON file
    with open(input_file, 'r') as file:
        data = json.load(file)

    # Initialize a dictionary to hold separate data for each 'sites' key
    separated_data = {}

    # Iterate through each item in the data
    for item in data:
        # Create a list of valid sites by excluding the excluded sites
        valid_sites = [site for site in item['sites'] if site not in excluded_sites]
        
        # If there are valid sites, add the item to the corresponding separated_data entries
        for site in valid_sites:
            if site not in separated_data:
                separated_data[site] = []
            separated_data[site].append(item)

    # Save each site's data into separate JSON files
    for site, items in separated_data.items():
        with open(f'{path_to_save}/test_{site}_raw.json', 'w') as file:
            json.dump(items, file, indent=2)

split_json_by_sites(path_to_raw_json, excluded_sites)
