import chromadb
from config.config import COLLECTION_PREFIX
# --- Configuration ---

MODELS = {
    "dinov1_s": ("vit_small_patch16_224_dino", 384, 224),
    "dinov1_b": ("vit_base_patch16_224_dino", 768, 224),
    "dinov2_s": ("vit_small_patch14_dinov2.lvd142m", 384, 518),
    "dinov2_b": ("vit_base_patch14_dinov2.lvd142m", 768, 518),
    "mobilenet_v2": ("mobilenetv2_100", 960, 224),
}
MODEL_NAMES = ["dinov1_s", "dinov2_s", "dinov2_b", "dinov1_s", "dinov1_b", "mobilenet_v2"]

# --- Safe chunked counting using metadata only ---
def count_collection_ids_by_chunks(collection, chunk_size=10000):
    total = 0
    offset = 0
    while True:
        try:
            results = collection.get(limit=chunk_size, offset=offset, include=[])
            count = len(results["ids"])
            if count == 0:
                break
            total += count
            offset += chunk_size
        except Exception as e:
            print(f" Error during paged query at offset {offset}: {e}")
            break
    return total

# --- Connect to Chroma HTTP server (ClickHouse backend) ---
client = chromadb.HttpClient(host="localhost", port=8010)
print(" Connected to Chroma HTTP server (ClickHouse backend)")

print("📂 Available collections:")
for coll in client.list_collections():
    print(" -", coll.name)

print("\n🔍 Scanning known model collections:")
for model_name in MODEL_NAMES:
    collection_name = f"{COLLECTION_PREFIX}_{model_name}"
    try:
        collection = client.get_or_create_collection(name=collection_name)
        count = count_collection_ids_by_chunks(collection)
        print(f" Collection '{collection_name}': {count} records")
    except Exception as e:
        print(f" Error accessing collection '{collection_name}': {e}")
