import csv
import os

# Emotion code mapping (excluding O and X)
emotion_mapping = {
    "A": "angry",
    "S": "sad",
    "H": "happy",
    "U": "surprise",
    "F": "fear",
    "D": "disgust",
    "C": "contempt",
    "N": "neutral",
}


def generate_msp_podcast_csv():
    input_file = "labels_consensus.csv"
    output_file = "msp-podcast.csv"

    # Check if input file exists
    if not os.path.exists(input_file):
        print(f"Error: {input_file} not found")
        return

    try:
        with open(input_file, "r", encoding="utf-8") as infile, open(
            output_file, "w", newline="", encoding="utf-8"
        ) as outfile:

            # Create CSV reader and writer
            reader = csv.reader(infile)
            writer = csv.writer(outfile)

            # Write header row
            writer.writerow(
                [
                    "dataset_name",
                    "wav_filename",
                    "emotion_label",
                    "gender",
                    "speaker_id",
                ]
            )

            # Skip header row from input file
            next(reader)

            # Process each data row
            for row in reader:
                if len(row) >= 8:  # Ensure row has enough columns
                    filename = row[0]
                    emotion_code = row[1]
                    speaker_id = row[5]
                    gender = row[6]

                    # Skip rows with emotion codes O or X
                    if emotion_code in ["O", "X"]:
                        continue

                    # Map emotion code to lowercase emotion name
                    if emotion_code in emotion_mapping:
                        emotion = emotion_mapping[emotion_code]

                        # Write formatted row
                        writer.writerow(
                            [
                                "MSP-PODCAST-Publish-1.12",
                                filename,
                                emotion,
                                gender.lower(),
                                speaker_id.lower(),
                            ]
                        )
                    else:
                        print(
                            f"Warning: Unknown emotion code '{emotion_code}' for file {filename}"
                        )
                else:
                    print(f"Warning: Invalid row format: {row}")

        print(f"Successfully generated {output_file}")

    except Exception as e:
        print(f"Error processing file: {e}")


if __name__ == "__main__":
    generate_msp_podcast_csv()
