import argparse
import hashlib
import json
import os
import time
from datetime import datetime
from io import BytesIO
from urllib.parse import urljoin

import requests
from dotenv import load_dotenv
from PIL import Image
from tqdm import tqdm


def generate_hash(text: str, algorithm: str = "sha256") -> str:
    """
    Generates a hash for the given text using the specified algorithm.

    :param text: The input text to be hashed.
    :param algorithm: The hashing algorithm to use (e.g., 'sha256', 'md5').
    :return: The resulting hash as a hexadecimal string.
    """
    hash_object = hashlib.new(algorithm)
    hash_object.update(text.encode("utf-8"))
    return hash_object.hexdigest()


def google_search(
    api_key,
    cse_id,
    sample,
    num_images=10,
    start=1,
    country="Qatar",
    output_dir="./output/",
    search_url="https://www.googleapis.com/customsearch/v1",
):
    """Fetch high-quality, public domain image URLs using Google Search API."""
    image_urls = []
    final_output_dir = output_dir + f"/{sample['country']}/responses/"
    if not os.path.isdir(final_output_dir):
        os.makedirs(final_output_dir, exist_ok=True)
        print(f"Directry created at {final_output_dir}")
    response_file = os.path.join(final_output_dir, f"{sample['q_id']}.json")
    if os.path.exists(response_file):
        # we can retrieve img urls to download again!
        img_urls = []
        with open(response_file) as f:
            js_obj = json.load(f)
            for response_object in js_obj["responses"]:
                results = response_object.get("items", [])
                for item in results:
                    img_urls.append(item["link"])
        print(f"Skipping query: {sample['query']}")
        return img_urls

    ccode = country_to_code(country)
    if not ccode:
        return None

    full_response = []
    for img_size in ["xxlarge", "xlarge", "large"]:
        while len(image_urls) < num_images:
            params = {
                "q": sample["query"],
                "cx": cse_id,
                "key": api_key,
                "gl": ccode,
                "searchType": "image",
                "safe": "active",
                "num": 10,
                "start": start,
                "imgSize": img_size,
                "rights": "cc_publicdomain,cc_attribute,cc_sharealike",
            }
            # print(params)
            time.sleep(0.5)
            response = requests.get(search_url, params=params)
            if response.status_code != 200:
                print(f"Error fetching images: {response.json()}")
                if response.json()["error"]["status"] == "INVALID_ARGUMENT":
                    break
                continue

            # Save response JSON to the output directory
            response_object = response.json()
            # response_object["data_obj"] = sample
            full_response.append(response_object)

            results = response_object.get("items", [])
            for item in results:
                image_urls.append(item["link"])

            start += 10
            if "nextPage" not in response_object["queries"]:
                break
        if len(image_urls) >= num_images:
            break

    sample["responses"] = full_response
    with open(response_file, "w", encoding="utf-8") as f:
        json.dump(sample, f, indent=4, ensure_ascii=False)

    return image_urls


def download_image(url, save_path):
    """Download and save an image from a URL. Returns the saved file path or None if failed."""
    try:
        response = requests.get(url, stream=True, timeout=10)
        response.raise_for_status()

        img = Image.open(BytesIO(response.content))

        min_width, min_height = 200, 200
        if img.width < min_width or img.height < min_height:
            print(f"Skipping {url} (too small: {img.width}x{img.height})")
            return None

        img_format = img.format.lower()
        file_path = f"{save_path}.{img_format}"

        img.save(file_path)
        print(f"Saved: {file_path}")
        return file_path
    except Exception as e:
        print(f"Failed to download {url}: {e}")
        return None


def read_subcategory_priority(fpath):
    try:
        ranking = {}
        with open(fpath) as f:
            lines = f.read().strip().split("\n")
        for line in lines[1:]:
            rank, cat = line.split("\t")
            ranking[cat] = "High" if int(rank) <= 21 else "Medium"
        return ranking
    except Exception as e:
        print(f"Error in category priority file: {e}")
        return {}


def country_to_code(country):
    country2code = {
        "Qatar": "qa",
        "Kuwait": "kw",
        "UAE": "ae",
        "Saudi Arabia": "sa",
        "Bahrain": "bh",
        "Egypt": "eg",
        "Jordan": "jo",
        "Lebanon": "lb",
        "Palestine": "ps",
        "Libya": "ly",
        "Oman": "om",
        "Iraq": "iq",
        "Algeria": "dz",
        "Somalia": "so",
        "Sudan": "sd",
        "Syria": "sy",
        "Tunisia": "tn",
        "Yemen": "ye",
        "Djibouti": "dj",
        "Morocco": "ma",
        "Mauritania": "mr",
        "Comoros": "km",
    }
    if country in country2code:
        return country2code[country]
    else:
        return None


def read_jsonl_file(filepath, country, ranking):
    """Read and extract queries along with their topic from a nested JSONL file."""
    try:
        with open(filepath) as f:
            js_lines = f.read().strip().split("\n")
        all_queries = {}
        for line in js_lines:
            obj = json.loads(line)
            if obj["country"] not in all_queries:
                all_queries[obj["country"]] = []
            for query in obj["filterd_queries"]:
                if obj["subcategory"] in ranking:
                    all_queries[obj["country"]].append(
                        {
                            "q_id": generate_hash(
                                f"{obj['country']}_{obj['topic']}_{query}"
                            ),
                            "country": obj["country"],
                            "category": obj["category"],
                            "subcategory": obj["subcategory"],
                            "topic": obj["topic"],
                            "query": query,
                            "topic_rank": ranking[obj["subcategory"]],
                        }
                    )
        if country in all_queries:
            return all_queries[country]
        else:
            print(f"The country {country} does not exists in the data file!")
            return []
    except Exception as e:
        print(f"Error reading query file: {e}")
        return []


def main():
    parser = argparse.ArgumentParser(description="Google Image Search and Download")
    parser.add_argument(
        "-q", "--query_file", type=str, required=True, help="Path to query file (JSON)"
    )
    parser.add_argument(
        "-c", "--country", type=str, required=True, help="Country to search for"
    )
    parser.add_argument(
        "-r", "--rank_file", type=str, required=True, help="Subcategory ranking file"
    )
    # parser.add_argument("-n", "--num_images", type=int, required=True, help="Number of images to download")
    parser.add_argument(
        "-o", "--output_dir", type=str, required=True, help="Directory to save images"
    )
    parser.add_argument(
        "-e", "--env_file", type=str, required=True, help="Path to .env file"
    )

    args = parser.parse_args()

    if not os.path.exists(args.env_file):
        print(f"Error: .env file not found at {args.env_file}!")
        return

    load_dotenv(args.env_file, override=True)

    API_KEY = os.getenv("GOOGLE_API_KEY")
    CX = os.getenv("CSE_ID")
    SEARCH_URL = "https://www.googleapis.com/customsearch/v1"

    if not API_KEY or not CX:
        print("Error: Missing GOOGLE_API_KEY or CSE_ID in .env file!")
        return

    # Define paths
    results_dir = args.output_dir
    os.makedirs(results_dir, exist_ok=True)

    rankings = read_subcategory_priority(args.rank_file)
    if not rankings:
        print("No ranking for subcategory found or invalid file format.")
        return

    # queries = read_queries(args.query_file, args.country, rankings)
    queries = read_jsonl_file(args.query_file, args.country, rankings)
    if not queries:
        print("No queries found or invalid file format.")
        return

    results = []
    # num_images = {"High": 60, "Medium": 40}
    num_images = {"High": 30, "Medium": 20}

    # Step 1: Search and Download All Images First
    # count = 0
    for item in tqdm(queries, desc="Searching on Google:"):
        n_results_dir = os.path.join(results_dir, f"{item['country']}")
        os.makedirs(n_results_dir, exist_ok=True)
        images_dir = os.path.join(n_results_dir, "img")
        os.makedirs(images_dir, exist_ok=True)
        results_file = os.path.join(n_results_dir, f"results.json")

        topic = item["topic"]
        query = item["query"]
        image_urls = []

        image_urls = google_search(
            API_KEY,
            CX,
            item,
            num_images[item["topic_rank"]],
            start=1,
            country=item["country"],
            output_dir=results_dir,
        )

        if not image_urls:
            print(f"No suitable images found for query: {query}")
            continue

        query_results = item
        query_results["images"] = []

        # print("Downloading images...")
        for i, url in enumerate(image_urls, 1):
            file_id = generate_hash(f"{url}")
            save_path = os.path.join(images_dir, f"{item['q_id']}_{file_id}.jpeg")
            # downloaded_path = download_image(url, save_path)

            # if downloaded_path:
            query_results["images"].append({"url": url, "file_path": save_path})

        results.append(query_results)

    # Step 2: Save JSON After All Images Are Downloaded
    with open(results_file, "w", encoding="utf-8") as f:
        json.dump(results, f, indent=4, ensure_ascii=False)

    print(f"Image collection completed! Results saved in {results_file}")


if __name__ == "__main__":
    main()
