import tensorflow as tf
import numpy as np
from tensorflow import keras
import tensorflow_datasets as tfds

# train or validation
mode = "validation"


def preprocess_data(data):
    sparse_label_ids, _ = tf.unique(data['objects']['label'])
    sparse_labels = tf.sparse.SparseTensor(indices=sparse_label_ids[:, None],
                                           values=tf.ones(tf.size(sparse_label_ids), dtype=tf.float32),
                                           dense_shape=(80,)
                                           )
    image = tf.image.convert_image_dtype(data['image'], tf.float32)
    image = tf.image.resize(image, (224, 224))
    return image, tf.sparse.to_dense(tf.sparse.reorder(sparse_labels))


raw_data, ds_info = tfds.load(
    'coco',
    split=[mode],
    shuffle_files=True,
    with_info=True,
)

model = keras.applications.resnet50.ResNet50(include_top=False, pooling='max')

ds = raw_data[0].map(preprocess_data)
num_samples = ds_info.splits[mode].num_examples
batch_size = 32
pos = 0
all_features = np.zeros((num_samples, 2048))
all_labels = np.zeros((num_samples, 80))
for x, y in ds.batch(batch_size):
    features = model(x)
    all_features[pos:pos+batch_size, :] = features.numpy()
    all_labels[pos:pos+batch_size, :] = y.numpy()
    pos += batch_size
    print(pos, "/", num_samples)

np.savez_compressed(f"coco-{mode}.npz", features=all_features, labels=all_labels)
