import pandas as pd
import os
import json
import functools
from datasets import load_dataset


def load_data(path):
    extension = path.split(".")[-1]
    if extension == "txt":
        extension = "text"
    elif extension == "jsonl":
        extension = "json"

    return load_dataset(extension, data_files=path)["train"]



def get_gpu_memory():
    # Runs the nvidia-smi command and retrieves GPU memory usage
    result = os.popen("nvidia-smi --query-gpu=memory.used --format=csv,nounits,noheader").readline().strip()
    return float(result)


def reshape_sequences(sequences, n):
    """
    reshape input sequences List[] with len m*n to List[List[]],each sublist is with len n
    """
    assert n == int(n), f"n shoud be an interger, but {n:.2f} is given"
    prompts = []
    n = int(n)
    assert len(
        sequences) % n == 0, f"length of sequences should be a multiple of {n}, but the length of given sequences is {len(sequences)}"
    m = int(len(sequences) / n)
    start_id = 0
    for _ in range(m):
        end_id = start_id + n
        prompts.append(sequences[start_id: end_id])
        start_id = end_id
    return prompts

def normalize_quotes(s: str) -> str:
    # maps U+2018, U+2019, U+2032  → ASCII apostrophe '
    return s.translate(str.maketrans({
        "\u2018": "'",   # ‘
        "\u2019": "'",   # ’
        "\u2032": "'",   # ′
        "`":  "'",
        "“": "\"",
        "”": "\""
    }))


