import json
import argparse

def load_jsonl(file_path):
    """Load a JSONL file and return the data as a list of dictionaries."""
    with open(file_path, 'r') as file:
        return [json.loads(line) for line in file]

def save_jsonl(file_path, data):
    """Save a list of dictionaries as a JSONL file."""
    with open(file_path, 'w') as file:
        for item in data:
            file.write(json.dumps(item, ensure_ascii=False) + '\n')

def update_dataset(data1, data2):
    # Create a mapping from dataset2 based on question_id and db_id, including 'sql' and 'evidence'
    sql_evidence_map = {
        (entry['question_id'], entry['db_id']): {
            "sql": entry.get('sql'),
            "evidence": entry.get('evidence')
        } for entry in data2
    }

    # Update dataset1 by adding 'sql' and 'evidence' keys where a match is found in dataset2
    for entry in data1:
        key = (entry['question_id'], entry['db_id'])
        if key in sql_evidence_map:
            entry.update(sql_evidence_map[key])

    return data1

def main():
    parser = argparse.ArgumentParser(description="Merge 'sql' and 'evidence' from dataset2 into dataset1 based on question_id and db_id.")
    parser.add_argument('--data_1_path', required=True, help="Path to the first JSONL file (dataset 1)")
    parser.add_argument('--data_2_path', required=True, help="Path to the second JSONL file (dataset 2)")
    parser.add_argument('--output_path', required=True, help="Path to save the updated dataset 1 JSONL file")

    args = parser.parse_args()

    # Load both datasets
    dataset1 = load_jsonl(args.data_1_path)
    dataset2 = load_jsonl(args.data_2_path)

    # Update dataset1 with the sql and evidence from dataset2
    updated_data = update_dataset(dataset1, dataset2)

    # Save the updated dataset1
    save_jsonl(args.output_path, updated_data)

if __name__ == "__main__":
    main()
