import json
import sys
import os
from tqdm import tqdm
import time

# Append the parent directory to sys.path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from utils.keys import GOOGLE_SEARCH_KEY, GOOGLE_CX

import requests

with open("../../data/full_dataset.json", 'r') as f:
    data = json.load(f)
facts = [fact.split('Additional Info:')[0].strip() for fact in data.keys()]

results = {}

for fact in facts:
    url = f"https://www.googleapis.com/customsearch/v1?key={GOOGLE_SEARCH_KEY}&cx={GOOGLE_CX}&q={fact}"
    response = requests.get(url)
    data = response.json()
    # totalResults is a string, so you might want to convert it to an int if needed
    total_results = int(data.get("searchInformation", {}).get("totalResults", "0"))
    results[fact] = total_results

for query, count in results.items():
    print(f"{query}: {count} results")
    
with open("fact_frequencies_google.json", 'w') as f:
    json.dump(results, f, indent=4)