import os
import numpy as np
from glob import glob
from sklearn.neighbors import NearestNeighbors

# 데이터셋 root 및 경로 설정
DATASET_ROOT = '/database/dkim/VPR_datasets/Nordland_GSV/'
DATABASE_FOLDER = os.path.join(DATASET_ROOT, 'images/test/database')
QUERIES_FOLDER = os.path.join(DATASET_ROOT, 'images/test/queries')
OUTPUT_ROOT = '/database/dkim/gsv_cities/datasets/Nordland_GSV/'
SAVE_name = 'Nordland_GSV'
os.makedirs(OUTPUT_ROOT, exist_ok=True)


def read_images_paths(dataset_folder):
    if not os.path.exists(dataset_folder):
        raise FileNotFoundError(f"Folder {dataset_folder} does not exist")
    
    file_with_paths = dataset_folder + "_images_paths.txt"
    if os.path.exists(file_with_paths):
        print(f"Reading paths of images within {dataset_folder} from {file_with_paths}")
        with open(file_with_paths, "r") as file:
            images_paths = file.read().splitlines()
        images_paths = [os.path.join(dataset_folder, path) for path in images_paths]
        if not os.path.exists(images_paths[0]):
            raise FileNotFoundError(
                f"Image with path {images_paths[0]} does not exist within {dataset_folder}."
            )
    else:
        print(f"Searching test images in {dataset_folder} with glob()")
        images_paths = sorted(glob(f"{dataset_folder}/**/*", recursive=True))
        images_paths = [p for p in images_paths if os.path.isfile(p) and os.path.splitext(p)[1].lower() in [".jpg", ".jpeg", ".png"]]
        if len(images_paths) == 0:
            raise FileNotFoundError(f"Directory {dataset_folder} does not contain any images")
    
    return images_paths

# database와 query 이미지 경로 읽기
database_paths = read_images_paths(DATABASE_FOLDER)
queries_paths = read_images_paths(QUERIES_FOLDER)

# UTM 좌표 추출 (파일 경로에 @utm_easting@utm_northing@...@.jpg 형식으로 포함되어 있다고 가정)
try:
    database_utms = np.array(
        [(path.split("@")[1], path.split("@")[2]) for path in database_paths]
    ).astype(float)
    queries_utms = np.array(
        [(path.split("@")[1], path.split("@")[2]) for path in queries_paths]
    ).astype(float)
except Exception as e:
    raise ValueError("이미지 경로에 UTM 좌표가 포함되어 있지 않거나 형식이 올바르지 않습니다.") from e

# ground truth 계산 (positive_dist_threshold: 25 meter)
positive_dist_threshold = 25
knn = NearestNeighbors(n_jobs=-1)
knn.fit(database_utms)
ground_truth = knn.radius_neighbors(queries_utms, radius=positive_dist_threshold, return_distance=False)

# npy 파일로 저장 (database 이미지, query 이미지, ground truth)
np.save(os.path.join(OUTPUT_ROOT, SAVE_name + '_dbImages.npy'), np.array(database_paths))
np.save(os.path.join(OUTPUT_ROOT, SAVE_name +'_qImages.npy'), np.array(queries_paths))
np.save(os.path.join(OUTPUT_ROOT, SAVE_name + '_gt.npy'), np.array(ground_truth, dtype=object))

print(SAVE_name+"의 npy 파일 생성 완료")
