import random

def generate_musicgen_dataset(count=500):
    # Expanded thematic libraries
    themes = {
        "electronic": {
            "genres": ["Deep House", "Minimal Techno", "Glitch Hop", "Synthwave", "Future Bass", "Hardstyle", "Acid House", "Industrial", "IDM", "Liquid DnB", "Trance"],
            "instruments": ["analog sawtooth lead", "FM bell synth", "pulsing sub-bass", "gated reverb snare", "modular blips", "wavetable pads", "bitcrushed drums"],
            "moods": ["hypnotic", "neon-lit", "cybernetic", "euphoric", "dark", "rhythmic", "driving"],
            "production": ["digital clarity", "sidechained", "wide stereo image", "glitchy", "pristine", "heavily compressed"]
        },
        "acoustic_folk": {
            "genres": ["Indie Folk", "Bluegrass", "Appalachian Folk", "Celtic Traditional", "Americana", "Acoustic Singer-Songwriter"],
            "instruments": ["fingerpicked acoustic guitar", "banjo", "mandolin", "upright bass", "fiddle", "tin whistle", "lap steel guitar"],
            "moods": ["rustic", "earthy", "heartfelt", "nostalgic", "melancholic", "spirited", "organic"],
            "production": ["lo-fi grit", "room reverb", "intimate mic setup", "analog warmth", "dusty", "raw"]
        },
        "cinematic": {
            "genres": ["Orchestral Score", "Dark Ambient", "Drone", "Post-Rock", "Trailer Music", "Neo-Classical", "Cyberpunk Soundtrack"],
            "instruments": ["soaring violins", "staccato cellos", "brass fanfares", "taiko drums", "tremolo electric guitar", "church organ", "dissonant piano"],
            "moods": ["epic", "tense", "heroic", "mysterious", "ominous", "ethereal", "cinematic"],
            "production": ["massive hall reverb", "high dynamic range", "layered textures", "deep atmosphere", "wall of sound"]
        },
        "jazz_funk": {
            "genres": ["Bop Jazz", "Acid Jazz", "Soul Jazz", "Funk", "Afrobeat", "Neo-Soul", "Bossa Nova"],
            "instruments": ["Rhodes piano", "slap bass", "wah-wah guitar", "brushed drums", "tenor saxophone", "hammond organ", "congas"],
            "moods": ["groovy", "sophisticated", "vibrant", "chill", "soulful", "upbeat", "syncopated"],
            "production": ["dry studio sound", "vintage saturation", "punchy drums", "warm mids", "tape hiss"]
        },
        "world": {
            "genres": ["Saharan Blues", "Flamenco", "Indian Classical", "Highlife", "Reggae", "Dub", "Middle Eastern Fusion"],
            "instruments": ["sitar", "oud", "djembe", "koto", "tabla", "nylon guitar", "heavy dub bass"],
            "moods": ["hypnotic", "passionate", "meditative", "rhythmic", "cultural", "trance-like"],
            "production": ["natural acoustics", "dub delays", "world-beat textures", "percussive-heavy"]
        }
    }

    prompts = set()
    theme_keys = list(themes.keys())

    while len(prompts) < count:
        # Pick a theme
        t = random.choice(theme_keys)
        category = themes[t]
        
        # Build prompt components
        genre = random.choice(category["genres"])
        inst = random.choice(category["instruments"])
        mood = random.choice(category["moods"])
        prod = random.choice(category["production"])
        
        # Format the caption
        line = f"{genre}, {inst}, {mood}, {prod}."
        
        # Sets automatically handle uniqueness
        prompts.add(line)

    return list(prompts)

# Execution
results = generate_musicgen_dataset(500)

with open("/home/wmar/wmar_audio/outputs/musicgen_prompts.txt", "w") as f:
    for line in results:
        f.write(line + "\n")

print(f"Successfully generated {len(results)} prompts in musicgen_prompts.txt")