import json 
import random




def generate_slo(model_size ,dataset_path):
    if model_size == "8b":
        slo_options = [(5000, 50), (3000, 30), (15000, 50)]
    elif model_size == "70b":
        slo_options = [(5000, 100), (3000, 60), (15000, 100)]

    # tight ttft, loose tpot: human-read reponse
    # tight tpot, loose ttft: tool-calling
    # loose ttft, loose tpot: summary
    if model_size == "8b":
        slo_options = [(500, 30), (3000, 30), (2000, 30), (1000, 50), (500, 50), (7500, 50)]
    elif model_size == "70b" or "27b":
        # (TTFT, TPOT
        slo_options = [(1000,60), (6000, 60), (4000, 60), (2000, 100), (1000, 100), (15000, 100)]
    dataset = []
    random.seed(42)
    with open(dataset_path, 'r') as f:
        for line in f:
            data = json.loads(line)
            selected_slo = random.choice(slo_options)
            data['ttft'] = selected_slo[0]
            data['tpot'] = selected_slo[1]
            dataset.append(data)
    import os
    with open(f"{os.path.splitext(dataset_path)[0]}_with_slo.jsonl", 'w') as f:
        for data in dataset:
            f.write(json.dumps(data) + '\n')
    


if __name__ == "__main__":
    # generate_slo("8b", "datasets/llama8b-sharegpt/test.jsonl")
    # generate_slo("8b", "datasets/llama8b-lmsys/test.jsonl")
    # generate_slo("8b", "datasets/llama8b-sharegpt/train.jsonl")
    # generate_slo("8b", "datasets/llama8b-lmsys/train.jsonl")
    # generate_slo("27b", "datasets/gemma27b-sharegpt/test.jsonl")
    # generate_slo("27b", "datasets/gemma27b-lmsys/test.jsonl")s
    # generate_slo("27b", "datasets/gemma27b-sharegpt/train.jsonl")
    # generate_slo("27b", "datasets/gemma27b-lmsys/train.jsonl")