import clip  # OpenAI CLIP model library
import torch  # PyTorch deep learning framework
import numpy as np  # Numerical computing library
#for class' text
#to get the features of the clip model and save it as a npy file.

def process_text_line(line):
    # Clean input data
    line = line.strip()  # Remove leading/trailing whitespace
    if not line:  # Handle empty lines
        return None
    try:
        # Split label and text (format: label text1/text2/...)
        label_str, texts_str = line.split(maxsplit=1)
    except ValueError:  # Handle format errors
        return None
    label = int(label_str)  # Convert to integer label
    # Split multiple text descriptions
    texts = [t.strip() for t in texts_str.split('/') if t.strip()]
    return label, texts

def extract_features(file_path, model_name="ViT-B/32", device=None):
    # Auto-select device (prefer GPU)
    if device is None:
        device = "cuda" if torch.cuda.is_available() else "cpu"
    
    # Load CLIP model and preprocess
    model, preprocess = clip.load(model_name, device=device)
    all_features = []  # Store all features
    all_labels = []    # Store all labels

    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:  # Process text file line by line
            result = process_text_line(line)
            if not result:  # Skip invalid lines
                continue
            label, text_list = result
            
            # Text encoding
            text_tokens = clip.tokenize(text_list).to(device)  # Generate token matrix
            with torch.no_grad():  # Disable gradient computation
                # Get text features (batch_size x 512)
                features = model.encode_text(text_tokens)  
                # L2 normalization (important!)
                features = features / features.norm(dim=-1, keepdim=True)
            
            # Save results (convert to CPU)
            all_features.append(features.cpu().numpy())
            all_labels.extend([label] * features.shape[0])  # Extend labels

    # Merge all data
    all_features = np.vstack(all_features)  # (N, 512)
    all_labels = np.array(all_labels)       # (N,)
    return all_labels, all_features

def save_to_npy(labels, features, prefix="clip_output"):
    # Save as numpy binary format
    np.save(f"{prefix}_features.npy", features)
    np.save(f"{prefix}_labels.npy", labels)
    print(f"Saved to {prefix}_features.npy and {prefix}_labels.npy")

# Usage example
if __name__ == "__main__":
    input_txt = "feature.txt"  # Input file path
    labels, features = extract_features(input_txt)
    save_to_npy(labels, features)
