{
  "RepoName": "https://github.com/nalepae/pandarallel.git",
  "CommitSHA": "261a652cddb219ac353ff803e81646c08b72fc6f",
  "Time": "",
  "Difficulty": "Easy",
  "Type": "undefined objects",
  "BuggyCode": [
    {
      "path": "nalepae_pandarallel/setup.py",
      "content": "from setuptools import setup\n\nsetup()\n"
    },
    {
      "path": "nalepae_pandarallel/tests/test_pandarallel.py",
      "content": "import importlib\nimport math\n\nimport numpy as np\nimport pandas as pd\nimport pytest\nfrom pandarallel import pandarallel\n\n\n@pytest.fixture(params=(1000, 1))\ndef df_size(request):\n    return request.param\n\n\n@pytest.fixture(params=(False, True))\ndef progress_bar(request):\n    return request.param\n\n\n@pytest.fixture(params=(None, False))\ndef use_memory_fs(request):\n    return request.param\n\n\n@pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\ndef exception(request):\n    return request.param\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0(request):\n    def func(x):\n        return max(x) - min(x)\n\n    return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1(request):\n    def func(x):\n        return math.sin(x.a**2) + math.sin(x.b**2)\n\n    return dict(\n        named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_applymap(request):\n    def func(x):\n        return math.sin(x**2) - math.cos(x**2)\n\n    return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n        request.param\n    ]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_map(request):\n    def func(x):\n        return math.log10(math.sqrt(math.exp(x**2)))\n\n    return dict(\n        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_apply(request):\n    def func(x, power, bias=0):\n        return math.log10(math.sqrt(math.exp(x**power))) + bias\n\n    return dict(\n        named=func,\n        anonymous=lambda x, power, bias=0: math.log10(math.sqrt(math.exp(x**power)))\n        + bias,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply():\n    def func(df):\n        dum = 0\n        for item in df.b:\n            dum += math.log10(math.sqrt(math.exp(item**2)))\n\n        return dum / len(df.b)\n\n    return func\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply_complex():\n    def func(df):\n        return pd.DataFrame(\n            [[df.b.mean(), df.b.min(), df.b.max()]],\n            columns=[\"b_mean\", \"b_min\", \"b_max\"],\n        )\n\n    return func\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_expanding_apply(request):\n    def func(x):\n        return (x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))).sum()\n\n    return dict(\n        named=func,\n        anonymous=lambda x: (\n            x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))\n        ).sum(),\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0_no_reduce(request):\n    def func(x):\n        return x\n\n    return dict(named=func, anonymous=lambda x: x)[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1_no_reduce(request):\n    def func(x):\n        return x**2\n\n    return dict(named=func, anonymous=lambda x: x**2)[request.param]\n\n\n@pytest.fixture\ndef pandarallel_init(progress_bar, use_memory_fs):\n    pandarallel.initialize(\n        progress_bar=progress_bar, use_memory_fs=use_memory_fs, nb_workers=2\n    )\n\n\ndef test_dataframe_apply_invalid_function(pandarallel_init, exception):\n    def f(_):\n        raise exception\n\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(exception):\n        df.parallel_apply(f)\n\n\ndef test_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0, df_size):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.randint(1, 8, df_size),\n            d=np.random.rand(df_size),\n            e=np.random.randint(1, 8, df_size),\n            f=np.random.rand(df_size),\n            g=np.random.randint(1, 8, df_size),\n            h=np.random.rand(df_size),\n        )\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_1, axis=1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1, axis=1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_invalid_axis(pandarallel_init):\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(ValueError):\n        df.parallel_apply(lambda x: x, axis=\"invalid\")\n    \ndef test_empty_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\ndef test_empty_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_applymap(pandarallel_init, func_dataframe_applymap, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.applymap(func_dataframe_applymap)\n    res_parallel = df.parallel_applymap(func_dataframe_applymap)\n    assert res.equals(res_parallel)\n\n\ndef test_series_map(pandarallel_init, func_series_map, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.map(func_series_map)\n    res_parallel = df.a.parallel_map(func_series_map)\n    assert res.equals(res_parallel)\n\n\ndef test_series_apply(pandarallel_init, func_series_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\ndef test_empty_series_apply(pandarallel_init, func_series_apply):\n    df = pd.DataFrame(dict(a=[]))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\n\ndef test_series_rolling_apply(pandarallel_init, func_series_rolling_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.randint(1, 8, df_size), b=list(range(df_size))))\n\n    res = df.b.rolling(4).apply(func_series_rolling_apply, raw=False)\n    res_parallel = df.b.rolling(4).parallel_apply(func_series_rolling_apply, raw=False)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply(\n    pandarallel_init, func_dataframe_groupby_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.rand(df_size),\n        )\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\", \"b\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\", \"b\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply_complex(\n    pandarallel_init, func_dataframe_groupby_apply_complex, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 100, df_size), b=np.random.rand(df_size))\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply_complex)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply_complex)\n    res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_rolling_apply(\n    pandarallel_init, func_dataframe_groupby_rolling_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .parallel_apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_expanding_apply(\n    pandarallel_init, func_dataframe_groupby_expanding_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .parallel_apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res.equals(res_parallel)\n\n\ndef test_dataframe_axis_0_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_0_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n    res = df.apply(func_dataframe_apply_axis_0_no_reduce)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0_no_reduce)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_axis_1_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n\n    res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    assert res.equals(res_parallel)\n\ndef test_memory_fs_root_environment_variable(monkeypatch):\n    monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n    from pandarallel import core\n    importlib.reload(core)\n\n    assert core.MEMORY_FS_ROOT == \"/test\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/utils.py",
      "content": "import itertools\nfrom enum import Enum\nfrom typing import Any, Dict, List, Tuple\n\nimport pandas as pd\nfrom pandas import DataFrame, Index\n\n\ndef chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n    \"\"\"\n    Return `nb_chunks` slices of approximatively `nb_item / nb_chunks` each.\n\n    Parameters\n    ----------\n    nb_item : int\n        Total number of items\n\n    nb_chunks : int\n        Number of chunks to return\n\n    start_offset : int\n        Shift start of slice by this amount\n\n    Returns\n    -------\n    A list of slices\n\n    Examples\n    --------\n    >>> chunks = chunk(103, 4)\n    >>> chunks\n    [slice(0, 26, None), slice(26, 52, None), slice(52, 78, None), slice(78, 103, None)]\n    \"\"\"\n    if nb_item == 0:\n        return [slice(0)]\n    \n    if nb_item <= nb_chunks:\n        return [slice(max(0, idx - start_offset), idx + 1) for idx in range(nb_item)]\n\n    quotient = nb_item // nb_chunks\n    remainder = nb_item % nb_chunks\n\n    quotients = [quotient] * nb_chunks\n    remainders = [1] * remainder + [0] * (nb_chunks - remainder)\n\n    nb_elems_per_chunk = [\n        quotient + remainder for quotient, remainder in zip(quotients, remainders)\n    ]\n\n    accumulated = list(itertools.accumulate(nb_elems_per_chunk))\n    shifted_accumulated = accumulated.copy()\n    shifted_accumulated.insert(0, 0)\n    shifted_accumulated.pop()\n\n    return [\n        slice(max(0, begin - start_offset), end)\n        for begin, end in zip(shifted_accumulated, accumulated)\n    ]\n\n\ndef df_indexed_like(df: DataFrame, axes: List[Index]) -> bool:\n    \"\"\"\n    Returns whether a data frame is indexed in the way specified by the\n    provided axes.\n\n    Used by DataFrameGroupBy to determine whether a group has been modified.\n\n    Function adapted from pandas.core.groupby.ops._is_indexed_like\n\n    Parameters\n    ----------\n    df : DataFrame\n        The data frame in question\n\n    axes : List[Index]\n        The axes to which the data frame is compared\n\n    Returns\n    -------\n    Whether or not the data frame is indexed in the same wa as the axes.\n    \"\"\"\n    if isinstance(df, DataFrame):\n        return df.axes[0].equals(axes[0])\n\n    return False\n\n\ndef get_pandas_version() -> Tuple[int, int]:\n    major_str, minor_str, *_ = pd.__version__.split(\".\")\n    return int(major_str), int(minor_str)\n\n\ndef get_axis_int(user_defined_function_kwargs: Dict[str, Any]):\n    axis = user_defined_function_kwargs.get(\"axis\", 0)\n\n    if axis not in {0, 1, \"index\", \"columns\"}:\n        raise ValueError(f\"No axis named {axis} for object type DataFrame\")\n\n    return {0: 0, 1: 1, \"index\": 0, \"columns\": 1}[axis]\n\n\nclass WorkerStatus(int, Enum):\n    Running = 0\n    Success = 1\n    Error = 2\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/core.py",
      "content": "import multiprocessing\nimport os\nimport pickle\nfrom itertools import count\nfrom multiprocessing.managers import SyncManager\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, cast\n\nimport dill\nimport pandas as pd\nimport psutil\nfrom pandas.core.groupby import DataFrameGroupBy as PandaDataFrameGroupBy\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom .data_types import (\n    DataFrame,\n    DataFrameGroupBy,\n    DataType,\n    ExpandingGroupBy,\n    RollingGroupBy,\n    Series,\n    SeriesRolling,\n)\nfrom .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\nfrom .utils import WorkerStatus\n\nON_WINDOWS = os.name == \"nt\"\nCONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n\n# Root of Memory File System\nMEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n\n# By default, Pandarallel use all available CPUs\nNB_PHYSICAL_CORES = psutil.cpu_count(logical=False)\n\n# Prefix and suffix for files used with Memory File System\nPREFIX = \"pandarallel\"\nPREFIX_INPUT = f\"{PREFIX}_input_\"\nPREFIX_OUTPUT = f\"{PREFIX}_output_\"\nSUFFIX = \".pickle\"\n\n# We use these classes decorators pattern instead of the classic one because of this:\n# https://www.stevenengelhardt.com/2013/01/16/python-multiprocessing-module-and-closures/\n\n\nclass WrapWorkFunctionForFileSystem:\n    def __init__(\n        self,\n        work_function: Callable[\n            [Any, Callable, tuple, Dict[str, Any], Dict[str, Any]], Any\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        input_file_path: Path,\n        output_file_path: Path,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> None:\n        try:\n            # Load dataframe from input file\n            with input_file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            # Delete input file since we don't need it any more. It will free some RAM\n            # since the input file is stored into Shared Memory.\n            input_file_path.unlink()\n\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            result = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            with output_file_path.open(\"wb\") as file_descriptor:\n                pickle.dump(result, file_descriptor)\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\nclass WrapWorkFunctionForPipe:\n    def __init__(\n        self,\n        work_function: Callable[\n            [\n                Any,\n                Callable,\n                tuple,\n                Dict[str, Any],\n                Dict[str, Any],\n            ],\n            Any,\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        data: Any,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        try:\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            results = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n            return results\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\ndef wrap_reduce_function_for_file_system(\n    reduce_function: Callable[[Iterator, Dict[str, Any]], Any]\n) -> Callable[[Iterator[Path], Dict[str, Any]], Any]:\n    \"\"\"This wrapper transforms a `reduce` function which takes as input:\n    - A list of pandas Dataframe\n    - An user defined function\n    and which returns a pandas Dataframe, into a `reduct` function which takes as input:\n    - A list of paths where  pandas Dataframe are pickled\n    which returns a pandas Dataframe.\n    \"\"\"\n\n    def closure(output_file_paths: Iterator[Path], extra: Dict[str, Any]) -> Any:\n        def get_dataframe_and_delete_file(file_path: Path) -> Any:\n            with file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            file_path.unlink()\n            return data\n\n        dfs = (\n            get_dataframe_and_delete_file(output_file_path)\n            for output_file_path in output_file_paths\n        )\n\n        return reduce_function(dfs, extra)\n\n    return closure\n\n\ndef parallelize_with_memory_file_system(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForFileSystem(data_type.work)\n        wrapped_reduce_function = wrap_reduce_function_for_file_system(data_type.reduce)\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        input_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_INPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        output_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_OUTPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        try:\n            for chunk, input_file in zip(chunks, input_files):\n                with Path(input_file.name).open(\"wb\") as file_descriptor:\n                    pickle.dump(chunk, file_descriptor)\n\n            dilled_user_defined_function = dill.dumps(user_defined_function)\n            manager: SyncManager = CONTEXT.Manager()\n            master_workers_queue = manager.Queue()\n\n            work_args_list = [\n                (\n                    Path(input_file.name),\n                    Path(output_file.name),\n                    progress_bars_type,\n                    worker_index,\n                    master_workers_queue,\n                    dilled_user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                    {\n                        **work_extra,\n                        **{\n                            \"master_workers_queue\": master_workers_queue,\n                            \"show_progress_bars\": show_progress_bars,\n                            \"worker_index\": worker_index,\n                        },\n                    },\n                )\n                for worker_index, (\n                    input_file,\n                    output_file,\n                ) in enumerate(zip(input_files, output_files))\n            ]\n\n            pool = CONTEXT.Pool(nb_workers)\n            results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n\n            pool.close()\n\n            generation = count()\n\n            while any(\n                (\n                    worker_status == WorkerStatus.Running\n                    for worker_status in workers_status\n                )\n            ):\n                message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n                worker_index, worker_status, payload = message\n                workers_status[worker_index] = worker_status\n\n                if worker_status == WorkerStatus.Success:\n                    progresses[worker_index] = progresses_length[worker_index]\n                    progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Running:\n                    progress = cast(int, payload)\n                    progresses[worker_index] = progress\n\n                    if next(generation) % nb_workers == 0:\n                        progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Error:\n                    progress_bars.set_error(worker_index)\n                    progress_bars.update(progresses)\n\n            try:\n                return wrapped_reduce_function(\n                    (Path(output_file.name) for output_file in output_files),\n                    reduce_extra,\n                )\n            except EOFError:\n                # Loading the files failed, this most likely means that there\n                # was some error during processing and the files were never\n                # saved at all.\n                results_promise.get()\n\n                # If the above statement does not raise an exception, that\n                # means the multiprocessing went well and we want to re-raise\n                # the original EOFError.\n                raise\n\n        finally:\n            for output_file in output_files:\n                # When pandarallel stop supporting Python 3.7 and older, replace this\n                # try/except clause by:\n                # Path(output_file.name).unlink(missing_ok=True)\n                try:\n                    Path(output_file.name).unlink()\n                except FileNotFoundError:\n                    # Do nothing, this is the nominal case.\n                    pass\n\n    return closure\n\n\ndef parallelize_with_pipe(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForPipe(data_type.work)\n        dilled_user_defined_function = dill.dumps(user_defined_function)\n        manager: SyncManager = CONTEXT.Manager()\n        master_workers_queue = manager.Queue()\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        work_args_list = [\n            (\n                chunk,\n                progress_bars_type,\n                worker_index,\n                master_workers_queue,\n                dilled_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                {\n                    **work_extra,\n                    **{\n                        \"master_workers_queue\": master_workers_queue,\n                        \"show_progress_bars\": show_progress_bars,\n                        \"worker_index\": worker_index,\n                    },\n                },\n            )\n            for worker_index, chunk in enumerate(chunks)\n        ]\n\n        pool = CONTEXT.Pool(nb_workers)\n        results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n        pool.close()\n\n        generation = count()\n\n        while any(\n            (worker_status == WorkerStatus.Running for worker_status in workers_status)\n        ):\n            message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n            worker_index, worker_status, payload = message\n            workers_status[worker_index] = worker_status\n\n            if worker_status == WorkerStatus.Success:\n                progresses[worker_index] = progresses_length[worker_index]\n                progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Running:\n                progress = cast(int, payload)\n                progresses[worker_index] = progress\n\n                if next(generation) % nb_workers == 0:\n                    progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Error:\n                progress_bars.set_error(worker_index)\n\n        results = results_promise.get()\n\n        return data_type.reduce(results, reduce_extra)\n\n    return closure\n\n\nclass pandarallel:\n    @classmethod\n    def initialize(\n        cls,\n        shm_size_mb=None,\n        nb_workers=NB_PHYSICAL_CORES,\n        progress_bar=False,\n        verbose=2,\n        use_memory_fs: Optional[bool] = None,\n    ) -> None:\n        show_progress_bars = progress_bar\n        is_memory_fs_available = Path(MEMORY_FS_ROOT).exists()\n\n        use_memory_fs = (\n            use_memory_fs if use_memory_fs is not None else is_memory_fs_available\n        )\n\n        parallelize = (\n            parallelize_with_memory_file_system\n            if use_memory_fs\n            else parallelize_with_pipe\n        )\n\n        if use_memory_fs and not is_memory_fs_available:\n            raise SystemError(\"Memory file system is not available\")\n\n        if verbose >= 2:\n            print(f\"INFO: Pandarallel will run on {nb_workers} workers.\")\n\n            message = (\n                (\n                    \"INFO: Pandarallel will use Memory file system to transfer data \"\n                    \"between the main process and workers.\"\n                )\n                if use_memory_fs\n                else (\n                    \"INFO: Pandarallel will use standard multiprocessing data transfer \"\n                    \"(pipe) to transfer data between the main process and workers.\"\n                )\n            )\n\n            print(message)\n\n            if ON_WINDOWS and verbose >= 2:\n                print()\n                print(\n                    (\n                        \"WARNING: You are on Windows. If you detect any issue with \"\n                        \"pandarallel, be sure you checked out the Troubleshooting page:\"\n                    )\n                )\n                print(\"https://nalepae.github.io/pandarallel/troubleshooting/\")\n\n        progress_bars_in_user_defined_function = (\n            ProgressBarsType.InUserDefinedFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_user_defined_function_multiply_by_number_of_columns = (\n            ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_work_function = (\n            ProgressBarsType.InWorkFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        # DataFrame\n        pd.DataFrame.parallel_apply = parallelize(\n            nb_workers, DataFrame.Apply, progress_bars_in_user_defined_function\n        )\n        pd.DataFrame.parallel_applymap = parallelize(\n            nb_workers,\n            DataFrame.ApplyMap,\n            progress_bars_in_user_defined_function_multiply_by_number_of_columns,\n        )\n\n        # DataFrame GroupBy\n        PandaDataFrameGroupBy.parallel_apply = parallelize(\n            nb_workers, DataFrameGroupBy.Apply, progress_bars_in_user_defined_function\n        )\n\n        # Expanding GroupBy\n        PandasExpandingGroupby.parallel_apply = parallelize(\n            nb_workers, ExpandingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Rolling GroupBy\n        PandasRollingGroupby.parallel_apply = parallelize(\n            nb_workers, RollingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Series\n        pd.Series.parallel_apply = parallelize(\n            nb_workers, Series.Apply, progress_bars_in_user_defined_function\n        )\n\n        # Series Rolling\n        pd.core.window.Rolling.parallel_apply = parallelize(\n            nb_workers, SeriesRolling.Apply, progress_bars_in_user_defined_function\n        )\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
      "content": "import multiprocessing\nimport os\nimport shutil\nimport sys\nfrom abc import ABC, abstractmethod\nfrom enum import Enum\nfrom itertools import count\nfrom time import time_ns\nfrom typing import Callable, List, Union\n\nfrom .utils import WorkerStatus\n\nINTERVAL_NS = 250_000_000  # 0.25 sec\nMINIMUM_TERMINAL_WIDTH = 72\n\n\nclass ProgressBarsType(int, Enum):\n    No = 0\n    InUserDefinedFunction = 1\n    InUserDefinedFunctionMultiplyByNumberOfColumns = 2\n    InWorkFunction = 3\n\n\nclass ProgressBars(ABC):\n    @abstractmethod\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        ...\n\n    @abstractmethod\n    def update(self, values: List[int]) -> None:\n        ...\n\n    def set_error(self, index: int) -> None:\n        pass\n\n\nclass ProgressState:\n    def __init__(self, chunk_size: int) -> None:\n        self.last_put_iteration = 0\n        self.next_put_iteration = max(chunk_size // 100, 1)\n        self.last_put_time = time_ns()\n\n\ndef is_notebook_lab() -> bool:\n    try:\n        shell: str = get_ipython().__class__.__name__  # type: ignore\n\n        # Shell: Google Colab\n        # TerminalInteractiveShell: Terminal running IPython\n        # ZMQInteractiveShell: Jupyter notebook/lab or qtconsole\n        return shell in {\"Shell\", \"ZMQInteractiveShell\"}\n    except NameError:\n        # Probably standard Python interpreter\n        return False\n\n\nclass ProgressBarsConsole(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        self.__show = show\n        self.__bars = [[0, max] for max in maxs]\n        self.__width = self.__get_width()\n\n        self.__lines = self.__update_lines()\n\n        if show:\n            sys.stdout.write(\"\\n\".join(self.__lines))\n            sys.stdout.flush()\n\n    def __get_width(self) -> int:\n        try:\n            columns = shutil.get_terminal_size().columns\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except AttributeError:\n            # Python 2\n            pass\n\n        try:\n            columns = int(os.popen(\"stty size\", \"r\").read().split()[1])\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except:\n            return MINIMUM_TERMINAL_WIDTH\n\n    def __remove_displayed_lines(self) -> None:\n        if len(self.__bars) >= 1:\n            sys.stdout.write(\"\\b\" * len(self.__lines[-1]))\n\n        if len(self.__bars) >= 2:\n            sys.stdout.write(\"\\033M\" * (len(self.__lines) - 1))\n\n        self.__lines = []\n\n    def __update_line(self, done: int, total: int) -> str:\n        if total == 0:\n            percent = 0\n        else:\n            percent = done / total\n        bar = (\":\" * int(percent * 40)).ljust(40, \" \")\n        percent = round(percent * 100, 2)\n        format = \" {percent:6.2f}% {bar:s} | {done:8d} / {total:8d} |\"\n        ret = format.format(percent=percent, bar=bar, done=done, total=total)\n        return ret[: self.__width].ljust(self.__width, \" \")\n\n    def __update_lines(self) -> List[str]:\n        return [self.__update_line(value, max) for value, max in self.__bars]\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            self.__bars[index][0] = value\n\n        self.__remove_displayed_lines()\n        self.__lines = self.__update_lines()\n\n        sys.stdout.write(\"\\n\".join(self.__lines))\n        sys.stdout.flush()\n\n\nclass ProgressBarsNotebookLab(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        \"\"\"Initialization.\n        Positional argument:\n        maxs - List containing the max value of each progress bar\n        \"\"\"\n        self.__show = show\n\n        if not show:\n            return\n\n        from IPython.display import display\n        from ipywidgets import HBox, IntProgress, Label, VBox\n\n        self.__bars = [\n            HBox(\n                [\n                    IntProgress(0, 0, max, description=\"{:.2f}%\".format(0)),\n                    Label(\"{} / {}\".format(0, max)),\n                ]\n            )\n            for max in maxs\n        ]\n\n        display(VBox(self.__bars))\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            bar, label = self.__bars[index].children\n\n            label.value = \"{} / {}\".format(value, bar.max)\n            \n            bar.value = value\n\n            if value >= bar.max:\n                bar.bar_style = \"success\"\n\n            if bar.max != 0:\n                bar.description = \"{:.2f}%\".format(bar.value / bar.max * 100)\n\n    def set_error(self, index: int) -> None:\n        \"\"\"Set a bar on error\"\"\"\n        if not self.__show:\n            return\n\n        bar, _ = self.__bars[index].children\n        bar.bar_style = \"danger\"\n\n\ndef get_progress_bars(\n    maxs: List[int], show\n) -> Union[ProgressBarsNotebookLab, ProgressBarsConsole]:\n    return (\n        ProgressBarsNotebookLab(maxs, show)\n        if is_notebook_lab()\n        else ProgressBarsConsole(maxs, show)\n    )\n\n\ndef progress_wrapper(\n    user_defined_function: Callable,\n    master_workers_queue: multiprocessing.Queue,\n    index: int,\n    chunk_size: int,\n) -> Callable:\n    \"\"\"Wrap the function to apply in a function which monitor the part of work already\n    done.\n    \"\"\"\n    counter = count()\n    state = ProgressState(chunk_size)\n\n    def closure(*user_defined_function_args, **user_defined_functions_kwargs):\n        iteration = next(counter)\n\n        if iteration == state.next_put_iteration:\n            time_now = time_ns()\n            master_workers_queue.put_nowait((index, WorkerStatus.Running, iteration))\n\n            delta_t = time_now - state.last_put_time\n            delta_i = iteration - state.last_put_iteration\n\n            state.next_put_iteration += (\n                max(int((delta_i / delta_t) * INTERVAL_NS), 1) if delta_t != 0 else 1\n            )\n\n            state.last_put_iteration = iteration\n            state.last_put_time = time_now\n\n        return user_defined_function(\n            *user_defined_function_args, **user_defined_functions_kwargs\n        )\n\n    return closure\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/__init__.py",
      "content": "from .core import pandarallel\n\n__version__ = \"1.6.5\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/expanding_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass ExpandingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasExpandingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasExpandingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.expanding(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass Series:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n\n    class Map(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.map(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series_rolling.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\nfrom pandas.core.window.rolling import Rolling\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass SeriesRolling:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, rolling: Rolling, **kwargs\n        ) -> Iterator[pd.Series]:\n            chunks = chunk(rolling.obj.size, nb_workers, rolling.window)\n\n            for chunk_ in chunks:\n                yield rolling.obj[chunk_]\n\n        @staticmethod\n        def get_work_extra(data: Rolling) -> Dict[str, Any]:\n            return {\n                \"attributes\": {\n                    attribute: getattr(data, attribute)\n                    for attribute in data._attributes\n                }\n            }\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            attributes: Dict[str, Any] = extra[\"attributes\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            result = data.rolling(**attributes).apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n            return result if worker_index == 0 else result[attributes[\"window\"] :]\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/__init__.py",
      "content": "from .dataframe import DataFrame\nfrom .dataframe_groupby import DataFrameGroupBy\nfrom .expanding_groupby import ExpandingGroupBy\nfrom .rolling_groupby import RollingGroupBy\nfrom .generic import DataType\nfrom .series import Series\nfrom .series_rolling import SeriesRolling\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\nfrom types import GeneratorType\n\nimport pandas as pd\n\nfrom ..utils import chunk, get_axis_int\nfrom .generic import DataType\n\n\nclass DataFrame:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            user_defined_function_kwargs = kwargs[\"user_defined_function_kwargs\"]\n\n            axis_int = get_axis_int(user_defined_function_kwargs)\n            opposite_axis_int = 1 - axis_int\n\n            for chunk_ in chunk(data.shape[opposite_axis_int], nb_workers):\n                yield data.iloc[chunk_] if axis_int == 1 else data.iloc[:, chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs,\n            )\n\n        @staticmethod\n        def get_reduce_extra(\n            data: Any, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"axis\": get_axis_int(user_defined_function_kwargs)}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            if isinstance(datas, GeneratorType):\n                datas = list(datas)\n            axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n            return pd.concat(datas, copy=False, axis=axis)\n\n    class ApplyMap(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            for chunk_ in chunk(data.shape[0], nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.applymap(user_defined_function)\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/generic.py",
      "content": "from abc import ABC, abstractmethod\nfrom typing import Any, Callable, Dict, Iterable, Iterator\n\n\nclass DataType(ABC):\n    @staticmethod\n    @abstractmethod\n    def get_chunks(nb_workers: int, data: Any, **kwargs) -> Iterator[Any]:\n        ...\n\n    @staticmethod\n    def get_work_extra(data: Any) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def work(\n        data: Any,\n        user_defined_function: Callable,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        ...\n\n    @staticmethod\n    def get_reduce_extra(\n        data: Any, user_defined_function_kwargs: Dict[str, Any]\n    ) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def reduce(datas: Iterable[Any], extra: Dict[str, Any]) -> Any:\n        ...\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/rolling_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass RollingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasRollingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasRollingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.rolling(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe_groupby.py",
      "content": "import itertools\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple, Union, cast\n\nimport pandas as pd\nfrom pandas.core.groupby.generic import DataFrameGroupBy as PandasDataFrameGroupBy\n\nfrom ..utils import chunk, df_indexed_like, get_pandas_version\nfrom .generic import DataType\n\n\nclass DataFrameGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, dataframe_groupby: PandasDataFrameGroupBy, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            chunks = chunk(dataframe_groupby.ngroups, nb_workers)\n            iterator = iter(dataframe_groupby)\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[Tuple[int, pd.DataFrame, bool]]:\n            def compute_result(\n                key: int, df: pd.DataFrame\n            ) -> Tuple[int, pd.DataFrame, bool]:\n                result = user_defined_function(\n                    df, *user_defined_function_args, **user_defined_function_kwargs\n                )\n                mutated = not df_indexed_like(result, df.axes)\n                return key, result, mutated\n\n            return [compute_result(key, df) for key, df in data]\n\n        @staticmethod\n        def get_reduce_extra(\n            data: PandasDataFrameGroupBy, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"df_groupby\": data}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[List[Tuple[int, pd.DataFrame, bool]]], extra: Dict[str, Any]\n        ) -> pd.Series:\n            def get_args(\n                keys: List[int],\n                values: List[pd.DataFrame],\n                df_groupby: PandasDataFrameGroupBy,\n            ) -> Union[\n                Tuple[List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[pd.DataFrame]],\n            ]:\n                pandas_version = get_pandas_version()\n\n                if pandas_version < (1, 3):\n                    return keys, values\n                elif pandas_version < (1, 4):\n                    return df_groupby._selected_obj, keys, values\n                else:\n                    return df_groupby._selected_obj, values\n\n            df_groupby: PandasDataFrameGroupBy = extra[\"df_groupby\"]\n\n            results = itertools.chain.from_iterable(datas)\n            keys, values, mutated = zip(*results)\n\n            keys = cast(List[int], keys)\n            values = cast(List[pd.DataFrame], values)\n            mutated = cast(List[bool], mutated)\n\n            args = get_args(keys, values, df_groupby)\n  \n            return df_groupby._wrap_applied_output(*args, not_indexed_same=mutated)\n"
    }
  ],
  "OriginCode": [
    {
      "path": "nalepae_pandarallel/setup.py",
      "content": "from setuptools import setup\n\nsetup()\n"
    },
    {
      "path": "nalepae_pandarallel/tests/test_pandarallel.py",
      "content": "import importlib\nimport math\n\nimport numpy as np\nimport pandas as pd\nimport pytest\nfrom pandarallel import pandarallel\n\n\n@pytest.fixture(params=(1000, 1))\ndef df_size(request):\n    return request.param\n\n\n@pytest.fixture(params=(False, True))\ndef progress_bar(request):\n    return request.param\n\n\n@pytest.fixture(params=(None, False))\ndef use_memory_fs(request):\n    return request.param\n\n\n@pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\ndef exception(request):\n    return request.param\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0(request):\n    def func(x):\n        return max(x) - min(x)\n\n    return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1(request):\n    def func(x):\n        return math.sin(x.a**2) + math.sin(x.b**2)\n\n    return dict(\n        named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_applymap(request):\n    def func(x):\n        return math.sin(x**2) - math.cos(x**2)\n\n    return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n        request.param\n    ]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_map(request):\n    def func(x):\n        return math.log10(math.sqrt(math.exp(x**2)))\n\n    return dict(\n        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_apply(request):\n    def func(x, power, bias=0):\n        return math.log10(math.sqrt(math.exp(x**power))) + bias\n\n    return dict(\n        named=func,\n        anonymous=lambda x, power, bias=0: math.log10(math.sqrt(math.exp(x**power)))\n        + bias,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply():\n    def func(df):\n        dum = 0\n        for item in df.b:\n            dum += math.log10(math.sqrt(math.exp(item**2)))\n\n        return dum / len(df.b)\n\n    return func\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply_complex():\n    def func(df):\n        return pd.DataFrame(\n            [[df.b.mean(), df.b.min(), df.b.max()]],\n            columns=[\"b_mean\", \"b_min\", \"b_max\"],\n        )\n\n    return func\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_expanding_apply(request):\n    def func(x):\n        return (x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))).sum()\n\n    return dict(\n        named=func,\n        anonymous=lambda x: (\n            x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))\n        ).sum(),\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0_no_reduce(request):\n    def func(x):\n        return x\n\n    return dict(named=func, anonymous=lambda x: x)[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1_no_reduce(request):\n    def func(x):\n        return x**2\n\n    return dict(named=func, anonymous=lambda x: x**2)[request.param]\n\n\n@pytest.fixture\ndef pandarallel_init(progress_bar, use_memory_fs):\n    pandarallel.initialize(\n        progress_bar=progress_bar, use_memory_fs=use_memory_fs, nb_workers=2\n    )\n\n\ndef test_dataframe_apply_invalid_function(pandarallel_init, exception):\n    def f(_):\n        raise exception\n\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(exception):\n        df.parallel_apply(f)\n\n\ndef test_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0, df_size):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.randint(1, 8, df_size),\n            d=np.random.rand(df_size),\n            e=np.random.randint(1, 8, df_size),\n            f=np.random.rand(df_size),\n            g=np.random.randint(1, 8, df_size),\n            h=np.random.rand(df_size),\n        )\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_1, axis=1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1, axis=1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_invalid_axis(pandarallel_init):\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(ValueError):\n        df.parallel_apply(lambda x: x, axis=\"invalid\")\n    \ndef test_empty_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\ndef test_empty_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_applymap(pandarallel_init, func_dataframe_applymap, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.applymap(func_dataframe_applymap)\n    res_parallel = df.parallel_applymap(func_dataframe_applymap)\n    assert res.equals(res_parallel)\n\n\ndef test_series_map(pandarallel_init, func_series_map, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.map(func_series_map)\n    res_parallel = df.a.parallel_map(func_series_map)\n    assert res.equals(res_parallel)\n\n\ndef test_series_apply(pandarallel_init, func_series_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\ndef test_empty_series_apply(pandarallel_init, func_series_apply):\n    df = pd.DataFrame(dict(a=[]))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\n\ndef test_series_rolling_apply(pandarallel_init, func_series_rolling_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.randint(1, 8, df_size), b=list(range(df_size))))\n\n    res = df.b.rolling(4).apply(func_series_rolling_apply, raw=False)\n    res_parallel = df.b.rolling(4).parallel_apply(func_series_rolling_apply, raw=False)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply(\n    pandarallel_init, func_dataframe_groupby_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.rand(df_size),\n        )\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\", \"b\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\", \"b\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply_complex(\n    pandarallel_init, func_dataframe_groupby_apply_complex, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 100, df_size), b=np.random.rand(df_size))\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply_complex)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply_complex)\n    res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_rolling_apply(\n    pandarallel_init, func_dataframe_groupby_rolling_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .parallel_apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_expanding_apply(\n    pandarallel_init, func_dataframe_groupby_expanding_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .parallel_apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res.equals(res_parallel)\n\n\ndef test_dataframe_axis_0_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_0_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n    res = df.apply(func_dataframe_apply_axis_0_no_reduce)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0_no_reduce)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_axis_1_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n\n    res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    assert res.equals(res_parallel)\n\ndef test_memory_fs_root_environment_variable(monkeypatch):\n    monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n    from pandarallel import core\n    importlib.reload(core)\n\n    assert core.MEMORY_FS_ROOT == \"/test\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/utils.py",
      "content": "import itertools\nfrom enum import Enum\nfrom typing import Any, Dict, List, Tuple\n\nimport pandas as pd\nfrom pandas import DataFrame, Index\n\n\ndef chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n    \"\"\"\n    Return `nb_chunks` slices of approximatively `nb_item / nb_chunks` each.\n\n    Parameters\n    ----------\n    nb_item : int\n        Total number of items\n\n    nb_chunks : int\n        Number of chunks to return\n\n    start_offset : int\n        Shift start of slice by this amount\n\n    Returns\n    -------\n    A list of slices\n\n    Examples\n    --------\n    >>> chunks = chunk(103, 4)\n    >>> chunks\n    [slice(0, 26, None), slice(26, 52, None), slice(52, 78, None), slice(78, 103, None)]\n    \"\"\"\n    if nb_item == 0:\n        return [slice(0)]\n    \n    if nb_item <= nb_chunks:\n        return [slice(max(0, idx - start_offset), idx + 1) for idx in range(nb_item)]\n\n    quotient = nb_item // nb_chunks\n    remainder = nb_item % nb_chunks\n\n    quotients = [quotient] * nb_chunks\n    remainders = [1] * remainder + [0] * (nb_chunks - remainder)\n\n    nb_elems_per_chunk = [\n        quotient + remainder for quotient, remainder in zip(quotients, remainders)\n    ]\n\n    accumulated = list(itertools.accumulate(nb_elems_per_chunk))\n    shifted_accumulated = accumulated.copy()\n    shifted_accumulated.insert(0, 0)\n    shifted_accumulated.pop()\n\n    return [\n        slice(max(0, begin - start_offset), end)\n        for begin, end in zip(shifted_accumulated, accumulated)\n    ]\n\n\ndef df_indexed_like(df: DataFrame, axes: List[Index]) -> bool:\n    \"\"\"\n    Returns whether a data frame is indexed in the way specified by the\n    provided axes.\n\n    Used by DataFrameGroupBy to determine whether a group has been modified.\n\n    Function adapted from pandas.core.groupby.ops._is_indexed_like\n\n    Parameters\n    ----------\n    df : DataFrame\n        The data frame in question\n\n    axes : List[Index]\n        The axes to which the data frame is compared\n\n    Returns\n    -------\n    Whether or not the data frame is indexed in the same wa as the axes.\n    \"\"\"\n    if isinstance(df, DataFrame):\n        return df.axes[0].equals(axes[0])\n\n    return False\n\n\ndef get_pandas_version() -> Tuple[int, int]:\n    major_str, minor_str, *_ = pd.__version__.split(\".\")\n    return int(major_str), int(minor_str)\n\n\ndef get_axis_int(user_defined_function_kwargs: Dict[str, Any]):\n    axis = user_defined_function_kwargs.get(\"axis\", 0)\n\n    if axis not in {0, 1, \"index\", \"columns\"}:\n        raise ValueError(f\"No axis named {axis} for object type DataFrame\")\n\n    return {0: 0, 1: 1, \"index\": 0, \"columns\": 1}[axis]\n\n\nclass WorkerStatus(int, Enum):\n    Running = 0\n    Success = 1\n    Error = 2\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/core.py",
      "content": "import multiprocessing\nimport os\nimport pickle\nfrom itertools import count\nfrom multiprocessing.managers import SyncManager\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, cast\n\nimport dill\nimport pandas as pd\nimport psutil\nfrom pandas.core.groupby import DataFrameGroupBy as PandaDataFrameGroupBy\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom .data_types import (\n    DataFrame,\n    DataFrameGroupBy,\n    DataType,\n    ExpandingGroupBy,\n    RollingGroupBy,\n    Series,\n    SeriesRolling,\n)\nfrom .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\nfrom .utils import WorkerStatus\n\nON_WINDOWS = os.name == \"nt\"\nCONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n\n# Root of Memory File System\nMEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n\n# By default, Pandarallel use all available CPUs\nNB_PHYSICAL_CORES = psutil.cpu_count(logical=False)\n\n# Prefix and suffix for files used with Memory File System\nPREFIX = \"pandarallel\"\nPREFIX_INPUT = f\"{PREFIX}_input_\"\nPREFIX_OUTPUT = f\"{PREFIX}_output_\"\nSUFFIX = \".pickle\"\n\n# We use these classes decorators pattern instead of the classic one because of this:\n# https://www.stevenengelhardt.com/2013/01/16/python-multiprocessing-module-and-closures/\n\n\nclass WrapWorkFunctionForFileSystem:\n    def __init__(\n        self,\n        work_function: Callable[\n            [Any, Callable, tuple, Dict[str, Any], Dict[str, Any]], Any\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        input_file_path: Path,\n        output_file_path: Path,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> None:\n        try:\n            # Load dataframe from input file\n            with input_file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            # Delete input file since we don't need it any more. It will free some RAM\n            # since the input file is stored into Shared Memory.\n            input_file_path.unlink()\n\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            result = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            with output_file_path.open(\"wb\") as file_descriptor:\n                pickle.dump(result, file_descriptor)\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\nclass WrapWorkFunctionForPipe:\n    def __init__(\n        self,\n        work_function: Callable[\n            [\n                Any,\n                Callable,\n                tuple,\n                Dict[str, Any],\n                Dict[str, Any],\n            ],\n            Any,\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        data: Any,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        try:\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            results = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n            return results\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\ndef wrap_reduce_function_for_file_system(\n    reduce_function: Callable[[Iterator, Dict[str, Any]], Any]\n) -> Callable[[Iterator[Path], Dict[str, Any]], Any]:\n    \"\"\"This wrapper transforms a `reduce` function which takes as input:\n    - A list of pandas Dataframe\n    - An user defined function\n    and which returns a pandas Dataframe, into a `reduct` function which takes as input:\n    - A list of paths where  pandas Dataframe are pickled\n    which returns a pandas Dataframe.\n    \"\"\"\n\n    def closure(output_file_paths: Iterator[Path], extra: Dict[str, Any]) -> Any:\n        def get_dataframe_and_delete_file(file_path: Path) -> Any:\n            with file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            file_path.unlink()\n            return data\n\n        dfs = (\n            get_dataframe_and_delete_file(output_file_path)\n            for output_file_path in output_file_paths\n        )\n\n        return reduce_function(dfs, extra)\n\n    return closure\n\n\ndef parallelize_with_memory_file_system(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForFileSystem(data_type.work)\n        wrapped_reduce_function = wrap_reduce_function_for_file_system(data_type.reduce)\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        input_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_INPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        output_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_OUTPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        try:\n            for chunk, input_file in zip(chunks, input_files):\n                with Path(input_file.name).open(\"wb\") as file_descriptor:\n                    pickle.dump(chunk, file_descriptor)\n\n            dilled_user_defined_function = dill.dumps(user_defined_function)\n            manager: SyncManager = CONTEXT.Manager()\n            master_workers_queue = manager.Queue()\n\n            work_args_list = [\n                (\n                    Path(input_file.name),\n                    Path(output_file.name),\n                    progress_bars_type,\n                    worker_index,\n                    master_workers_queue,\n                    dilled_user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                    {\n                        **work_extra,\n                        **{\n                            \"master_workers_queue\": master_workers_queue,\n                            \"show_progress_bars\": show_progress_bars,\n                            \"worker_index\": worker_index,\n                        },\n                    },\n                )\n                for worker_index, (\n                    input_file,\n                    output_file,\n                ) in enumerate(zip(input_files, output_files))\n            ]\n\n            pool = CONTEXT.Pool(nb_workers)\n            results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n\n            pool.close()\n\n            generation = count()\n\n            while any(\n                (\n                    worker_status == WorkerStatus.Running\n                    for worker_status in workers_status\n                )\n            ):\n                message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n                worker_index, worker_status, payload = message\n                workers_status[worker_index] = worker_status\n\n                if worker_status == WorkerStatus.Success:\n                    progresses[worker_index] = progresses_length[worker_index]\n                    progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Running:\n                    progress = cast(int, payload)\n                    progresses[worker_index] = progress\n\n                    if next(generation) % nb_workers == 0:\n                        progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Error:\n                    progress_bars.set_error(worker_index)\n                    progress_bars.update(progresses)\n\n            try:\n                return wrapped_reduce_function(\n                    (Path(output_file.name) for output_file in output_files),\n                    reduce_extra,\n                )\n            except EOFError:\n                # Loading the files failed, this most likely means that there\n                # was some error during processing and the files were never\n                # saved at all.\n                results_promise.get()\n\n                # If the above statement does not raise an exception, that\n                # means the multiprocessing went well and we want to re-raise\n                # the original EOFError.\n                raise\n\n        finally:\n            for output_file in output_files:\n                # When pandarallel stop supporting Python 3.7 and older, replace this\n                # try/except clause by:\n                # Path(output_file.name).unlink(missing_ok=True)\n                try:\n                    Path(output_file.name).unlink()\n                except FileNotFoundError:\n                    # Do nothing, this is the nominal case.\n                    pass\n\n    return closure\n\n\ndef parallelize_with_pipe(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForPipe(data_type.work)\n        dilled_user_defined_function = dill.dumps(user_defined_function)\n        manager: SyncManager = CONTEXT.Manager()\n        master_workers_queue = manager.Queue()\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        work_args_list = [\n            (\n                chunk,\n                progress_bars_type,\n                worker_index,\n                master_workers_queue,\n                dilled_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                {\n                    **work_extra,\n                    **{\n                        \"master_workers_queue\": master_workers_queue,\n                        \"show_progress_bars\": show_progress_bars,\n                        \"worker_index\": worker_index,\n                    },\n                },\n            )\n            for worker_index, chunk in enumerate(chunks)\n        ]\n\n        pool = CONTEXT.Pool(nb_workers)\n        results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n        pool.close()\n\n        generation = count()\n\n        while any(\n            (worker_status == WorkerStatus.Running for worker_status in workers_status)\n        ):\n            message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n            worker_index, worker_status, payload = message\n            workers_status[worker_index] = worker_status\n\n            if worker_status == WorkerStatus.Success:\n                progresses[worker_index] = progresses_length[worker_index]\n                progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Running:\n                progress = cast(int, payload)\n                progresses[worker_index] = progress\n\n                if next(generation) % nb_workers == 0:\n                    progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Error:\n                progress_bars.set_error(worker_index)\n\n        results = results_promise.get()\n\n        return data_type.reduce(results, reduce_extra)\n\n    return closure\n\n\nclass pandarallel:\n    @classmethod\n    def initialize(\n        cls,\n        shm_size_mb=None,\n        nb_workers=NB_PHYSICAL_CORES,\n        progress_bar=False,\n        verbose=2,\n        use_memory_fs: Optional[bool] = None,\n    ) -> None:\n        show_progress_bars = progress_bar\n        is_memory_fs_available = Path(MEMORY_FS_ROOT).exists()\n\n        use_memory_fs = (\n            use_memory_fs if use_memory_fs is not None else is_memory_fs_available\n        )\n\n        parallelize = (\n            parallelize_with_memory_file_system\n            if use_memory_fs\n            else parallelize_with_pipe\n        )\n\n        if use_memory_fs and not is_memory_fs_available:\n            raise SystemError(\"Memory file system is not available\")\n\n        if verbose >= 2:\n            print(f\"INFO: Pandarallel will run on {nb_workers} workers.\")\n\n            message = (\n                (\n                    \"INFO: Pandarallel will use Memory file system to transfer data \"\n                    \"between the main process and workers.\"\n                )\n                if use_memory_fs\n                else (\n                    \"INFO: Pandarallel will use standard multiprocessing data transfer \"\n                    \"(pipe) to transfer data between the main process and workers.\"\n                )\n            )\n\n            print(message)\n\n            if ON_WINDOWS and verbose >= 2:\n                print()\n                print(\n                    (\n                        \"WARNING: You are on Windows. If you detect any issue with \"\n                        \"pandarallel, be sure you checked out the Troubleshooting page:\"\n                    )\n                )\n                print(\"https://nalepae.github.io/pandarallel/troubleshooting/\")\n\n        progress_bars_in_user_defined_function = (\n            ProgressBarsType.InUserDefinedFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_user_defined_function_multiply_by_number_of_columns = (\n            ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_work_function = (\n            ProgressBarsType.InWorkFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        # DataFrame\n        pd.DataFrame.parallel_apply = parallelize(\n            nb_workers, DataFrame.Apply, progress_bars_in_user_defined_function\n        )\n        pd.DataFrame.parallel_applymap = parallelize(\n            nb_workers,\n            DataFrame.ApplyMap,\n            progress_bars_in_user_defined_function_multiply_by_number_of_columns,\n        )\n\n        # DataFrame GroupBy\n        PandaDataFrameGroupBy.parallel_apply = parallelize(\n            nb_workers, DataFrameGroupBy.Apply, progress_bars_in_user_defined_function\n        )\n\n        # Expanding GroupBy\n        PandasExpandingGroupby.parallel_apply = parallelize(\n            nb_workers, ExpandingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Rolling GroupBy\n        PandasRollingGroupby.parallel_apply = parallelize(\n            nb_workers, RollingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Series\n        pd.Series.parallel_apply = parallelize(\n            nb_workers, Series.Apply, progress_bars_in_user_defined_function\n        )\n        pd.Series.parallel_map = parallelize(nb_workers, Series.Map, show_progress_bars)\n\n        # Series Rolling\n        pd.core.window.Rolling.parallel_apply = parallelize(\n            nb_workers, SeriesRolling.Apply, progress_bars_in_user_defined_function\n        )\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
      "content": "import multiprocessing\nimport os\nimport shutil\nimport sys\nfrom abc import ABC, abstractmethod\nfrom enum import Enum\nfrom itertools import count\nfrom time import time_ns\nfrom typing import Callable, List, Union\n\nfrom .utils import WorkerStatus\n\nINTERVAL_NS = 250_000_000  # 0.25 sec\nMINIMUM_TERMINAL_WIDTH = 72\n\n\nclass ProgressBarsType(int, Enum):\n    No = 0\n    InUserDefinedFunction = 1\n    InUserDefinedFunctionMultiplyByNumberOfColumns = 2\n    InWorkFunction = 3\n\n\nclass ProgressBars(ABC):\n    @abstractmethod\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        ...\n\n    @abstractmethod\n    def update(self, values: List[int]) -> None:\n        ...\n\n    def set_error(self, index: int) -> None:\n        pass\n\n\nclass ProgressState:\n    def __init__(self, chunk_size: int) -> None:\n        self.last_put_iteration = 0\n        self.next_put_iteration = max(chunk_size // 100, 1)\n        self.last_put_time = time_ns()\n\n\ndef is_notebook_lab() -> bool:\n    try:\n        shell: str = get_ipython().__class__.__name__  # type: ignore\n\n        # Shell: Google Colab\n        # TerminalInteractiveShell: Terminal running IPython\n        # ZMQInteractiveShell: Jupyter notebook/lab or qtconsole\n        return shell in {\"Shell\", \"ZMQInteractiveShell\"}\n    except NameError:\n        # Probably standard Python interpreter\n        return False\n\n\nclass ProgressBarsConsole(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        self.__show = show\n        self.__bars = [[0, max] for max in maxs]\n        self.__width = self.__get_width()\n\n        self.__lines = self.__update_lines()\n\n        if show:\n            sys.stdout.write(\"\\n\".join(self.__lines))\n            sys.stdout.flush()\n\n    def __get_width(self) -> int:\n        try:\n            columns = shutil.get_terminal_size().columns\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except AttributeError:\n            # Python 2\n            pass\n\n        try:\n            columns = int(os.popen(\"stty size\", \"r\").read().split()[1])\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except:\n            return MINIMUM_TERMINAL_WIDTH\n\n    def __remove_displayed_lines(self) -> None:\n        if len(self.__bars) >= 1:\n            sys.stdout.write(\"\\b\" * len(self.__lines[-1]))\n\n        if len(self.__bars) >= 2:\n            sys.stdout.write(\"\\033M\" * (len(self.__lines) - 1))\n\n        self.__lines = []\n\n    def __update_line(self, done: int, total: int) -> str:\n        if total == 0:\n            percent = 0\n        else:\n            percent = done / total\n        bar = (\":\" * int(percent * 40)).ljust(40, \" \")\n        percent = round(percent * 100, 2)\n        format = \" {percent:6.2f}% {bar:s} | {done:8d} / {total:8d} |\"\n        ret = format.format(percent=percent, bar=bar, done=done, total=total)\n        return ret[: self.__width].ljust(self.__width, \" \")\n\n    def __update_lines(self) -> List[str]:\n        return [self.__update_line(value, max) for value, max in self.__bars]\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            self.__bars[index][0] = value\n\n        self.__remove_displayed_lines()\n        self.__lines = self.__update_lines()\n\n        sys.stdout.write(\"\\n\".join(self.__lines))\n        sys.stdout.flush()\n\n\nclass ProgressBarsNotebookLab(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        \"\"\"Initialization.\n        Positional argument:\n        maxs - List containing the max value of each progress bar\n        \"\"\"\n        self.__show = show\n\n        if not show:\n            return\n\n        from IPython.display import display\n        from ipywidgets import HBox, IntProgress, Label, VBox\n\n        self.__bars = [\n            HBox(\n                [\n                    IntProgress(0, 0, max, description=\"{:.2f}%\".format(0)),\n                    Label(\"{} / {}\".format(0, max)),\n                ]\n            )\n            for max in maxs\n        ]\n\n        display(VBox(self.__bars))\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            bar, label = self.__bars[index].children\n\n            label.value = \"{} / {}\".format(value, bar.max)\n            \n            bar.value = value\n\n            if value >= bar.max:\n                bar.bar_style = \"success\"\n\n            if bar.max != 0:\n                bar.description = \"{:.2f}%\".format(bar.value / bar.max * 100)\n\n    def set_error(self, index: int) -> None:\n        \"\"\"Set a bar on error\"\"\"\n        if not self.__show:\n            return\n\n        bar, _ = self.__bars[index].children\n        bar.bar_style = \"danger\"\n\n\ndef get_progress_bars(\n    maxs: List[int], show\n) -> Union[ProgressBarsNotebookLab, ProgressBarsConsole]:\n    return (\n        ProgressBarsNotebookLab(maxs, show)\n        if is_notebook_lab()\n        else ProgressBarsConsole(maxs, show)\n    )\n\n\ndef progress_wrapper(\n    user_defined_function: Callable,\n    master_workers_queue: multiprocessing.Queue,\n    index: int,\n    chunk_size: int,\n) -> Callable:\n    \"\"\"Wrap the function to apply in a function which monitor the part of work already\n    done.\n    \"\"\"\n    counter = count()\n    state = ProgressState(chunk_size)\n\n    def closure(*user_defined_function_args, **user_defined_functions_kwargs):\n        iteration = next(counter)\n\n        if iteration == state.next_put_iteration:\n            time_now = time_ns()\n            master_workers_queue.put_nowait((index, WorkerStatus.Running, iteration))\n\n            delta_t = time_now - state.last_put_time\n            delta_i = iteration - state.last_put_iteration\n\n            state.next_put_iteration += (\n                max(int((delta_i / delta_t) * INTERVAL_NS), 1) if delta_t != 0 else 1\n            )\n\n            state.last_put_iteration = iteration\n            state.last_put_time = time_now\n\n        return user_defined_function(\n            *user_defined_function_args, **user_defined_functions_kwargs\n        )\n\n    return closure\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/__init__.py",
      "content": "from .core import pandarallel\n\n__version__ = \"1.6.5\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/expanding_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass ExpandingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasExpandingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasExpandingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.expanding(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass Series:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n\n    class Map(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.map(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series_rolling.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\nfrom pandas.core.window.rolling import Rolling\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass SeriesRolling:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, rolling: Rolling, **kwargs\n        ) -> Iterator[pd.Series]:\n            chunks = chunk(rolling.obj.size, nb_workers, rolling.window)\n\n            for chunk_ in chunks:\n                yield rolling.obj[chunk_]\n\n        @staticmethod\n        def get_work_extra(data: Rolling) -> Dict[str, Any]:\n            return {\n                \"attributes\": {\n                    attribute: getattr(data, attribute)\n                    for attribute in data._attributes\n                }\n            }\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            attributes: Dict[str, Any] = extra[\"attributes\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            result = data.rolling(**attributes).apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n            return result if worker_index == 0 else result[attributes[\"window\"] :]\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/__init__.py",
      "content": "from .dataframe import DataFrame\nfrom .dataframe_groupby import DataFrameGroupBy\nfrom .expanding_groupby import ExpandingGroupBy\nfrom .rolling_groupby import RollingGroupBy\nfrom .generic import DataType\nfrom .series import Series\nfrom .series_rolling import SeriesRolling\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\nfrom types import GeneratorType\n\nimport pandas as pd\n\nfrom ..utils import chunk, get_axis_int\nfrom .generic import DataType\n\n\nclass DataFrame:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            user_defined_function_kwargs = kwargs[\"user_defined_function_kwargs\"]\n\n            axis_int = get_axis_int(user_defined_function_kwargs)\n            opposite_axis_int = 1 - axis_int\n\n            for chunk_ in chunk(data.shape[opposite_axis_int], nb_workers):\n                yield data.iloc[chunk_] if axis_int == 1 else data.iloc[:, chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs,\n            )\n\n        @staticmethod\n        def get_reduce_extra(\n            data: Any, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"axis\": get_axis_int(user_defined_function_kwargs)}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            if isinstance(datas, GeneratorType):\n                datas = list(datas)\n            axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n            return pd.concat(datas, copy=False, axis=axis)\n\n    class ApplyMap(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            for chunk_ in chunk(data.shape[0], nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.applymap(user_defined_function)\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/generic.py",
      "content": "from abc import ABC, abstractmethod\nfrom typing import Any, Callable, Dict, Iterable, Iterator\n\n\nclass DataType(ABC):\n    @staticmethod\n    @abstractmethod\n    def get_chunks(nb_workers: int, data: Any, **kwargs) -> Iterator[Any]:\n        ...\n\n    @staticmethod\n    def get_work_extra(data: Any) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def work(\n        data: Any,\n        user_defined_function: Callable,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        ...\n\n    @staticmethod\n    def get_reduce_extra(\n        data: Any, user_defined_function_kwargs: Dict[str, Any]\n    ) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def reduce(datas: Iterable[Any], extra: Dict[str, Any]) -> Any:\n        ...\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/rolling_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass RollingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasRollingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasRollingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.rolling(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe_groupby.py",
      "content": "import itertools\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple, Union, cast\n\nimport pandas as pd\nfrom pandas.core.groupby.generic import DataFrameGroupBy as PandasDataFrameGroupBy\n\nfrom ..utils import chunk, df_indexed_like, get_pandas_version\nfrom .generic import DataType\n\n\nclass DataFrameGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, dataframe_groupby: PandasDataFrameGroupBy, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            chunks = chunk(dataframe_groupby.ngroups, nb_workers)\n            iterator = iter(dataframe_groupby)\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[Tuple[int, pd.DataFrame, bool]]:\n            def compute_result(\n                key: int, df: pd.DataFrame\n            ) -> Tuple[int, pd.DataFrame, bool]:\n                result = user_defined_function(\n                    df, *user_defined_function_args, **user_defined_function_kwargs\n                )\n                mutated = not df_indexed_like(result, df.axes)\n                return key, result, mutated\n\n            return [compute_result(key, df) for key, df in data]\n\n        @staticmethod\n        def get_reduce_extra(\n            data: PandasDataFrameGroupBy, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"df_groupby\": data}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[List[Tuple[int, pd.DataFrame, bool]]], extra: Dict[str, Any]\n        ) -> pd.Series:\n            def get_args(\n                keys: List[int],\n                values: List[pd.DataFrame],\n                df_groupby: PandasDataFrameGroupBy,\n            ) -> Union[\n                Tuple[List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[pd.DataFrame]],\n            ]:\n                pandas_version = get_pandas_version()\n\n                if pandas_version < (1, 3):\n                    return keys, values\n                elif pandas_version < (1, 4):\n                    return df_groupby._selected_obj, keys, values\n                else:\n                    return df_groupby._selected_obj, values\n\n            df_groupby: PandasDataFrameGroupBy = extra[\"df_groupby\"]\n\n            results = itertools.chain.from_iterable(datas)\n            keys, values, mutated = zip(*results)\n\n            keys = cast(List[int], keys)\n            values = cast(List[pd.DataFrame], values)\n            mutated = cast(List[bool], mutated)\n\n            args = get_args(keys, values, df_groupby)\n  \n            return df_groupby._wrap_applied_output(*args, not_indexed_same=mutated)\n"
    }
  ],
  "ErrorMessage": "--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n_____________________________________________________________________________ test_series_map[anonymous-1000-True-None] _____________________________________________________________________________\n\npandarallel_init = None, func_series_map = <function func_series_map.<locals>.<lambda> at 0x7ab850f28b80>, df_size = 1000\n\n    def test_series_map(pandarallel_init, func_series_map, df_size):\n        df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n    \n        res = df.a.map(func_series_map)\n>       res_parallel = df.a.parallel_map(func_series_map)\n\ntests/test_pandarallel.py:244: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\n\nself = 0      1.184774\n1      1.212423\n2      1.506710\n3      1.622536\n4      1.454472\n         ...   \n995    1.916059\n996    1.969549\n997    1.039518\n998    1.929592\n999    1.276179\nName: a, Length: 1000, dtype: float64\nname = 'parallel_map'\n\n    @final\n    def __getattr__(self, name: str):\n        \"\"\"\n        After regular attribute access, try looking up the name\n        This allows simpler access to columns for interactive use.\n        \"\"\"\n        # Note: obj.x will always call obj.__getattribute__('x') prior to\n        # calling obj.__getattr__('x').\n        if (\n            name not in self._internal_names_set\n            and name not in self._metadata\n            and name not in self._accessors\n            and self._info_axis._can_hold_identifiers_and_holds_name(name)\n        ):\n            return self[name]\n>       return object.__getattribute__(self, name)\nE       AttributeError: 'Series' object has no attribute 'parallel_map'\n\n../../../../anaconda3/envs/py39/lib/python3.9/site-packages/pandas/core/generic.py:6204: AttributeError\n--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n____________________________________________________________________________ test_series_map[anonymous-1000-True-False] _____________________________________________________________________________\n\npandarallel_init = None, func_series_map = <function func_series_map.<locals>.<lambda> at 0x7ab850f285e0>, df_size = 1000\n\n    def test_series_map(pandarallel_init, func_series_map, df_size):\n        df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n    \n        res = df.a.map(func_series_map)\n>       res_parallel = df.a.parallel_map(func_series_map)\n\ntests/test_pandarallel.py:244: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\n\nself = 0      1.180611\n1      1.922170\n2      1.703339\n3      1.151281\n4      1.145799\n         ...   \n995    1.666213\n996    1.693546\n997    1.252340\n998    1.274065\n999    1.550199\nName: a, Length: 1000, dtype: float64\nname = 'parallel_map'\n\n    @final\n    def __getattr__(self, name: str):\n        \"\"\"\n        After regular attribute access, try looking up the name\n        This allows simpler access to columns for interactive use.\n        \"\"\"\n        # Note: obj.x will always call obj.__getattribute__('x') prior to\n        # calling obj.__getattr__('x').\n        if (\n            name not in self._internal_names_set\n            and name not in self._metadata\n            and name not in self._accessors\n            and self._info_axis._can_hold_identifiers_and_holds_name(name)\n        ):\n            return self[name]\n>       return object.__getattribute__(self, name)\nE       AttributeError: 'Series' object has no attribute 'parallel_map'\n\n../../../../anaconda3/envs/py39/lib/python3.9/site-packages/pandas/core/generic.py:6204: AttributeError\n--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n______________________________________________________________________________ test_series_map[anonymous-1-False-None] ______________________________________________________________________________\n\npandarallel_init = None, func_series_map = <function func_series_map.<locals>.<lambda> at 0x7ab850f28700>, df_size = 1\n\n    def test_series_map(pandarallel_init, func_series_map, df_size):\n        df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n    \n        res = df.a.map(func_series_map)\n>       res_parallel = df.a.parallel_map(func_series_map)\n\ntests/test_pandarallel.py:244: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\n\nself = 0    1.312353\nName: a, dtype: float64, name = 'parallel_map'\n\n    @final\n    def __getattr__(self, name: str):\n        \"\"\"\n        After regular attribute access, try looking up the name\n        This allows simpler access to columns for interactive use.\n        \"\"\"\n        # Note: obj.x will always call obj.__getattribute__('x') prior to\n        # calling obj.__getattr__('x').\n        if (\n            name not in self._internal_names_set\n            and name not in self._metadata\n            and name not in self._accessors\n            and self._info_axis._can_hold_identifiers_and_holds_name(name)\n        ):\n            return self[name]\n>       return object.__getattribute__(self, name)\nE       AttributeError: 'Series' object has no attribute 'parallel_map'\n\n../../../../anaconda3/envs/py39/lib/python3.9/site-packages/pandas/core/generic.py:6204: AttributeError\n--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n_____________________________________________________________________________ test_series_map[anonymous-1-False-False] ______________________________________________________________________________\n\npandarallel_init = None, func_series_map = <function func_series_map.<locals>.<lambda> at 0x7ab850f28ca0>, df_size = 1\n\n    def test_series_map(pandarallel_init, func_series_map, df_size):\n        df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n    \n        res = df.a.map(func_series_map)\n>       res_parallel = df.a.parallel_map(func_series_map)\n\ntests/test_pandarallel.py:244: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\n\nself = 0    1.108444\nName: a, dtype: float64, name = 'parallel_map'\n\n    @final\n    def __getattr__(self, name: str):\n        \"\"\"\n        After regular attribute access, try looking up the name\n        This allows simpler access to columns for interactive use.\n        \"\"\"\n        # Note: obj.x will always call obj.__getattribute__('x') prior to\n        # calling obj.__getattr__('x').\n        if (\n            name not in self._internal_names_set\n            and name not in self._metadata\n            and name not in self._accessors\n            and self._info_axis._can_hold_identifiers_and_holds_name(name)\n        ):\n            return self[name]\n>       return object.__getattribute__(self, name)\nE       AttributeError: 'Series' object has no attribute 'parallel_map'\n\n../../../../anaconda3/envs/py39/lib/python3.9/site-packages/pandas/core/generic.py:6204: AttributeError\n--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n______________________________________________________________________________ test_series_map[anonymous-1-True-None] _______________________________________________________________________________\n\npandarallel_init = None, func_series_map = <function func_series_map.<locals>.<lambda> at 0x7ab850f328b0>, df_size = 1\n\n    def test_series_map(pandarallel_init, func_series_map, df_size):\n        df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n    \n        res = df.a.map(func_series_map)\n>       res_parallel = df.a.parallel_map(func_series_map)\n\ntests/test_pandarallel.py:244: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\n\nself = 0    1.478144\nName: a, dtype: float64, name = 'parallel_map'\n\n    @final\n    def __getattr__(self, name: str):\n        \"\"\"\n        After regular attribute access, try looking up the name\n        This allows simpler access to columns for interactive use.\n        \"\"\"\n        # Note: obj.x will always call obj.__getattribute__('x') prior to\n        # calling obj.__getattr__('x').\n        if (\n            name not in self._internal_names_set\n            and name not in self._metadata\n            and name not in self._accessors\n            and self._info_axis._can_hold_identifiers_and_holds_name(name)\n        ):\n            return self[name]\n>       return object.__getattribute__(self, name)\nE       AttributeError: 'Series' object has no attribute 'parallel_map'\n\n../../../../anaconda3/envs/py39/lib/python3.9/site-packages/pandas/core/generic.py:6204: AttributeError\n--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n______________________________________________________________________________ test_series_map[anonymous-1-True-False] ______________________________________________________________________________\n\npandarallel_init = None, func_series_map = <function func_series_map.<locals>.<lambda> at 0x7ab850f32d30>, df_size = 1\n\n    def test_series_map(pandarallel_init, func_series_map, df_size):\n        df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n    \n        res = df.a.map(func_series_map)\n>       res_parallel = df.a.parallel_map(func_series_map)\n\ntests/test_pandarallel.py:244: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\n\nself = 0    1.845478\nName: a, dtype: float64, name = 'parallel_map'\n\n    @final\n    def __getattr__(self, name: str):\n        \"\"\"\n        After regular attribute access, try looking up the name\n        This allows simpler access to columns for interactive use.\n        \"\"\"\n        # Note: obj.x will always call obj.__getattribute__('x') prior to\n        # calling obj.__getattr__('x').\n        if (\n            name not in self._internal_names_set\n            and name not in self._metadata\n            and name not in self._accessors\n            and self._info_axis._can_hold_identifiers_and_holds_name(name)\n        ):\n            return self[name]\n>       return object.__getattribute__(self, name)\nE       AttributeError: 'Series' object has no attribute 'parallel_map'\n\n../../../../anaconda3/envs/py39/lib/python3.9/site-packages/pandas/core/generic.py:6204: AttributeError\n--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n========================================================================================= warnings summary ==========================================================================================\ntests/test_pandarallel.py: 16 warnings\n  /home/user/Documents/repoben/buggycode/nalepae_pandarallel/tests/test_pandarallel.py:235: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n    res = df.applymap(func_dataframe_applymap)\n\n-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html\n====================================================================================== short test summary info ======================================================================================\nFAILED tests/test_pandarallel.py::test_series_map[named-1000-False-None] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[named-1000-False-False] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[named-1000-True-None] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[named-1000-True-False] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[named-1-False-None] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[named-1-False-False] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[named-1-True-None] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[named-1-True-False] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[anonymous-1000-False-None] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[anonymous-1000-False-False] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[anonymous-1000-True-None] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[anonymous-1000-True-False] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[anonymous-1-False-None] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[anonymous-1-False-False] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[anonymous-1-True-None] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[anonymous-1-True-False] - AttributeError: 'Series' object has no attribute 'parallel_map'\n============================================================================ 16 failed, 201 passed, 16 warnings in 8.21s ============================================================================",
  "Patch": "--- a/nalepae_pandarallel/pandarallel/core.py\n+++ b/nalepae_pandarallel/pandarallel/core.py\n@@ -548,6 +548,7 @@\n         pd.Series.parallel_apply = parallelize(\n             nb_workers, Series.Apply, progress_bars_in_user_defined_function\n         )\n+        pd.Series.parallel_map = parallelize(nb_workers, Series.Map, show_progress_bars)\n \n         # Series Rolling\n         pd.core.window.Rolling.parallel_apply = parallelize(\n",
  "BuggyCodeLocation": [
    {
      "file": "nalepae_pandarallel/pandarallel/core.py",
      "function": null,
      "content_all": {},
      "content_change": {}
    }
  ],
  "Issue": {
    "title": "Support for Parallel Map on Pandas Series",
    "description": "### Problem Description\nCurrently, when I work with large Pandas Series and try to use the `map` function in parallel to speed up the computation, I notice that there is no native support for this functionality in the Pandarallel library. This lack of support burdens users with manually implementing their parallel map logic or falling back to single-threaded performance, which is not efficient for large datasets.\n\n### Expected Behavior\nIt would be very helpful if Pandarallel could provide a `parallel_map` method for Pandas Series, similar to how `parallel_apply` works. The `parallel_map` should function efficiently with support for multiple workers and optionally show progress bars to help track the task's progress.\n\n### Impact\nWithout this feature, users are left with suboptimal performance for mapping functions over large Series datasets. This results in slower data processing and can significantly hinder workflows that rely on heavy computations spread over large Pandas Series.\n\n### Suggested Solution\nImplement a `parallel_map` method for Pandas Series within Pandarallel. This method should leverage multiple workers for parallel computation and include options to support progress bars for user-defined functions.\n\nThank you for considering this enhancement.",
    "explanation": "### Summary of the Issue\n\nThe issue at hand is the lack of support for a `parallel_map` function for Pandas Series in the Pandarallel library. This function is intended to enable the parallel execution of the `map` method over large Pandas Series objects, leveraging multiple workers to speed up the process. Without this feature, users dealing with extensive datasets have to resort to single-threaded execution, which is less efficient and can significantly slow down data processing workflows when mapping functions over large Series datasets.\n\n### Problem Description\n\nCurrently, Pandarallel does not provide native support for executing the `map` function in parallel on Pandas Series. This absence means that users wanting to utilize parallel computing for the `map` function have to implement custom logic, which is cumbersome and inefficient. The expected behavior is for Pandarallel to include a `parallel_map` method for Pandas Series, which would operate similarly to the existing `parallel_apply` method, taking advantage of multiple workers and optionally displaying progress bars to track task progress.\n\n### Commit Details\n\nThe commit addresses the issue by making modifications to the core files of Pandarallel to integrate a new `parallel_map` method for Pandas Series. This involves:\n\n1. **Defining the `parallel_map` Function**: A function that leverages the already existing parallelism infrastructure in Pandarallel to handle the mapping operations across multiple workers.\n2. **Adding the `parallel_map` Method to the Series Class**: Registering the new `parallel_map` function to the Pandas Series class so that it can be invoked directly on Pandas Series objects.\n3. **Testing**: Updating the test suite to include cases that validate the functionality of the new `parallel_map` method. This ensures that the new method works correctly and is integrated seamlessly into the Pandarallel library.\n\n### Explanation of the Cause and Solution\n\n#### Cause of the Issue\n\nThe primary cause of the issue is the lack of a method within Pandarallel that supports parallel execution of the `map` function on Pandas Series. While Pandarallel offers `parallel_apply` for DataFrame and Series, and some groupby operations, it did not specifically provide a `parallel_map` method for Series. This gap forced users to either use less efficient single-threaded mapping or build custom parallelization logic, both of which are not ideal solutions.\n\n#### Solution from the Developer's Perspective\n\nThe developer's solution involves expanding the Pandarallel library to support the `parallel_map` operation on Pandas Series by:\n1. **Leveraging Existing Infrastructure**: Utilizing existing parallelization mechanisms in Pandarallel ensures consistency and minimizes the effort required to add new functionalities. The infrastructure for splitting tasks into chunks, distributing them across workers, and aggregating results is reused for the `parallel_map` method.\n2. **Implementing the `parallel_map` Function**: Creating a function that can slice the data into chunks, apply the map function in parallel across these chunks, and then combine the results. This function is designed to be efficient and handle large datasets effectively.\n3. **Registering the Function**: Adding the new `parallel_map` method to the Pandas Series class makes it easily accessible for users, similar to any other method provided by Pandarallel.\n4. **Testing and Validation**: Extensively testing the new method to ensure that it operates correctly under various conditions (e.g., different sizes of data, varying numbers of workers, different user-defined functions). This guarantees that users can rely on the new method for their data processing tasks without encountering errors.\n\n### Detailed Solution Explanation\n\nThe commit solves the issue by providing a robust and user-friendly workaround for parallelizing mapping operations over large Pandas Series. Here's a step-by-step breakdown of how the solution works:\n\n1. **Function Definition and Integration**:\n   - **Parallelizing the Map**: A function is created to break down the Series into manageable chunks, distribute these chunks across multiple worker processes, apply the user-defined mapping function to each chunk, and gather the results.\n   - **Parallel Execution**: By leveraging the multiprocessing capabilities already present in Pandarallel, the new function can distribute the workload effectively, making use of multiple CPU cores to perform the map operation concurrently.\n   \n2. **Seamless Integration**:\n   - The function is registered with the Pandas Series class during the initialization of Pandarallel. This allows users to call `parallel_map` directly on any Pandas Series object, just as they would with `parallel_apply`.\n   \n3. **Progress Bar Support**:\n   - An optional feature is included to display progress bars, providing visual feedback on the computation's progress. This is particularly useful for long-running operations, helping users understand how far along the process is.\n\n4. **Testing Framework**:\n   - The testing framework is updated to include cases specifically for the `parallel_map` function. Various scenarios are covered to ensure the function performs correctly, handles edge cases, and integrates seamlessly with the existing Pandarallel functionality.\n\n### Conclusion\n\nIn summary, to address"
  },
  "Explain": "### Summary of the Issue\n\nThe issue at hand is the lack of support for a `parallel_map` function for Pandas Series in the Pandarallel library. This function is intended to enable the parallel execution of the `map` method over large Pandas Series objects, leveraging multiple workers to speed up the process. Without this feature, users dealing with extensive datasets have to resort to single-threaded execution, which is less efficient and can significantly slow down data processing workflows when mapping functions over large Series datasets.\n\n### Problem Description\n\nCurrently, Pandarallel does not provide native support for executing the `map` function in parallel on Pandas Series. This absence means that users wanting to utilize parallel computing for the `map` function have to implement custom logic, which is cumbersome and inefficient. The expected behavior is for Pandarallel to include a `parallel_map` method for Pandas Series, which would operate similarly to the existing `parallel_apply` method, taking advantage of multiple workers and optionally displaying progress bars to track task progress.\n\n### Commit Details\n\nThe commit addresses the issue by making modifications to the core files of Pandarallel to integrate a new `parallel_map` method for Pandas Series. This involves:\n\n1. **Defining the `parallel_map` Function**: A function that leverages the already existing parallelism infrastructure in Pandarallel to handle the mapping operations across multiple workers.\n2. **Adding the `parallel_map` Method to the Series Class**: Registering the new `parallel_map` function to the Pandas Series class so that it can be invoked directly on Pandas Series objects.\n3. **Testing**: Updating the test suite to include cases that validate the functionality of the new `parallel_map` method. This ensures that the new method works correctly and is integrated seamlessly into the Pandarallel library.\n\n### Explanation of the Cause and Solution\n\n#### Cause of the Issue\n\nThe primary cause of the issue is the lack of a method within Pandarallel that supports parallel execution of the `map` function on Pandas Series. While Pandarallel offers `parallel_apply` for DataFrame and Series, and some groupby operations, it did not specifically provide a `parallel_map` method for Series. This gap forced users to either use less efficient single-threaded mapping or build custom parallelization logic, both of which are not ideal solutions.\n\n#### Solution from the Developer's Perspective\n\nThe developer's solution involves expanding the Pandarallel library to support the `parallel_map` operation on Pandas Series by:\n1. **Leveraging Existing Infrastructure**: Utilizing existing parallelization mechanisms in Pandarallel ensures consistency and minimizes the effort required to add new functionalities. The infrastructure for splitting tasks into chunks, distributing them across workers, and aggregating results is reused for the `parallel_map` method.\n2. **Implementing the `parallel_map` Function**: Creating a function that can slice the data into chunks, apply the map function in parallel across these chunks, and then combine the results. This function is designed to be efficient and handle large datasets effectively.\n3. **Registering the Function**: Adding the new `parallel_map` method to the Pandas Series class makes it easily accessible for users, similar to any other method provided by Pandarallel.\n4. **Testing and Validation**: Extensively testing the new method to ensure that it operates correctly under various conditions (e.g., different sizes of data, varying numbers of workers, different user-defined functions). This guarantees that users can rely on the new method for their data processing tasks without encountering errors.\n\n### Detailed Solution Explanation\n\nThe commit solves the issue by providing a robust and user-friendly workaround for parallelizing mapping operations over large Pandas Series. Here's a step-by-step breakdown of how the solution works:\n\n1. **Function Definition and Integration**:\n   - **Parallelizing the Map**: A function is created to break down the Series into manageable chunks, distribute these chunks across multiple worker processes, apply the user-defined mapping function to each chunk, and gather the results.\n   - **Parallel Execution**: By leveraging the multiprocessing capabilities already present in Pandarallel, the new function can distribute the workload effectively, making use of multiple CPU cores to perform the map operation concurrently.\n   \n2. **Seamless Integration**:\n   - The function is registered with the Pandas Series class during the initialization of Pandarallel. This allows users to call `parallel_map` directly on any Pandas Series object, just as they would with `parallel_apply`.\n   \n3. **Progress Bar Support**:\n   - An optional feature is included to display progress bars, providing visual feedback on the computation's progress. This is particularly useful for long-running operations, helping users understand how far along the process is.\n\n4. **Testing Framework**:\n   - The testing framework is updated to include cases specifically for the `parallel_map` function. Various scenarios are covered to ensure the function performs correctly, handles edge cases, and integrates seamlessly with the existing Pandarallel functionality.\n\n### Conclusion\n\nIn summary, to address",
  "Source": "Human",
  "Token": 1130,
  "Command": [
    "pytest tests"
  ],
  "FilteredCode": [
    {
      "path": "nalepae_pandarallel/tests/test_pandarallel.py",
      "content": "1 import importlib\n2 import math\n3 \n4 import numpy as np\n5 import pandas as pd\n6 import pytest\n7 from pandarallel import pandarallel\n8 \n9 \n10 @pytest.fixture(params=(1000, 1))\n11 def df_size(request):\n12     return request.param\n13 \n14 \n15 @pytest.fixture(params=(False, True))\n16 def progress_bar(request):\n17     return request.param\n18 \n19 \n20 @pytest.fixture(params=(None, False))\n21 def use_memory_fs(request):\n22     return request.param\n23 \n24 \n25 @pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\n26 def exception(request):\n27     return request.param\n28 \n29 \n30 @pytest.fixture(params=(\"named\", \"anonymous\"))\n31 def func_dataframe_apply_axis_0(request):\n32     def func(x):\n33         return max(x) - min(x)\n34 \n35     return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n36 \n37 \n38 @pytest.fixture(params=(\"named\", \"anonymous\"))\n39 def func_dataframe_apply_axis_1(request):\n40     def func(x):\n41         return math.sin(x.a**2) + math.sin(x.b**2)\n42 \n43     return dict(\n44         named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n45     )[request.param]\n46 \n47 \n48 @pytest.fixture(params=(\"named\", \"anonymous\"))\n49 def func_dataframe_applymap(request):\n50     def func(x):\n51         return math.sin(x**2) - math.cos(x**2)\n52 \n53     return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n54         request.param\n55     ]\n56 \n57 \n58 @pytest.fixture(params=(\"named\", \"anonymous\"))\n59 def func_series_map(request):\n60     def func(x):\n61         return math.log10(math.sqrt(math.exp(x**2)))\n62 \n63     return dict(\n64         named=func, anonymous=lambda x: math.(...truncated)"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/core.py",
      "content": "1 import multiprocessing\n2 import os\n3 import pickle\n4 from itertools import count\n5 from multiprocessing.managers import SyncManager\n6 from pathlib import Path\n7 from tempfile import NamedTemporaryFile\n8 from typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, cast\n9 \n10 import dill\n11 import pandas as pd\n12 import psutil\n13 from pandas.core.groupby import DataFrameGroupBy as PandaDataFrameGroupBy\n14 from pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\n15 from pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n16 \n17 from .data_types import (\n18     DataFrame,\n19     DataFrameGroupBy,\n20     DataType,\n21     ExpandingGroupBy,\n22     RollingGroupBy,\n23     Series,\n24     SeriesRolling,\n25 )\n26 from .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\n27 from .utils import WorkerStatus\n28 \n29 ON_WINDOWS = os.name == \"nt\"\n30 CONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n31 \n32 # Root of Memory File System\n33 MEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n34 \n35 # By default, Pand(...truncated)"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
      "content": "1 import multiprocessing\n2 import os\n3 import shutil\n4 import sys\n5 from abc import ABC, abstractmethod\n6 from enum import Enum\n7 from itertools import count\n8 from time import time_ns\n9 from typing import Callable, List, Union\n10 \n11 from .utils import WorkerStatus\n12 \n13 INTERVAL_NS = 250_000_000  # 0.25 sec\n14 MINIMUM_TERMINAL_WIDTH = 72\n15 \n16 \n17 class ProgressBarsType(int, Enum):\n18     No = 0\n19     InUserDefinedFunction = 1\n20     InUserDefinedFunctionMultiplyByNumberOfColumns = 2\n21     InWorkFunction = 3\n22 \n23 \n24 class ProgressBars(ABC):\n25     @abstractmethod\n26     def __init__(self, maxs: List[int], show: bool) -> None:\n27         ...\n28 \n29     @abstractmethod\n30     def update(self, values: List[int]) -> None:\n31         ...\n32 \n33     def set_error(self, index: int) -> None:\n34         pass\n35 \n36 \n37 class ProgressState(...truncated)"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/utils.py",
      "content": "1 import itertools\n2 from enum import Enum\n3 from typing import Any, Dict, List, Tuple\n4 \n5 import pandas as pd\n6 from pandas import DataFrame, Index\n7 \n8 \n9 def chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n10     \"\"\"\n11     Return `nb_chunks` slices of approximatively `nb_item / nb_chunks` each.\n12 \n13     Parameters\n14     ----------\n15     nb_item : int\n16         Total number of items\n17 \n18     nb_chunks : int\n19         Number of chunks to return\n20 \n21     start_offset : int\n22         Shift start of slice by this amount\n23 \n24  (...truncated)"
    }
  ],
  "TokenAll": 14222,
  "FilteredLevel": 1500,
  "Results": {
    "model": "GPT-4o",
    "Difficulty": "Easy",
    "issue_origin": {
      "title": "SyntaxError due to unfinished lambda function",
      "description": "In the file 'nalepae_pandarallel/tests/test_pandarallel.py', the lambda function in line 64 for the fixture 'func_series_map' is truncated and ends abruptly with 'math.(...truncated'. This will cause a SyntaxError. Please complete the lambda function definition.",
      "explanation": "### Summary of the Issue\n\nThe issue titled \"SyntaxError due to unfinished lambda function\" indicates that there's a SyntaxError in the `nalepae_pandarallel/tests/test_pandarallel.py` file. Specifically, the lambda function in line 64 within the fixture `func_series_map` is incomplete and ends abruptly, causing a syntax error. The goal is to complete this lambda function definition.\n\n### Commit Content\n\nThe problem arises from this portion of the code:\n```python\n63     return dict(\n64         named=func, anonymous=lambda x: math.(...truncated)\n```\nThe lambda function `anonymous=lambda x: math.(...truncated)` is incomplete, leading to a SyntaxError.\n\n### Solution\n\n#### Commit Changes\n\nThe required change involves completing the lambda function. We can rewrite the incomplete lambda expression to match the `func_series_map` functionality:\n```python\n63     return dict(\n64         named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n```\n\n### Explanation of the Solution\n\n1. **Locate the Problematic Code**: The issue is pinpointed to line 64 where the lambda function is truncated.\n2. **Matching the Functionality**: By looking at the named function `func` defined just above, which is:\n    ```python\n    def func(x):\n        return math.log10(math.sqrt(math.exp(x**2)))\n    ```\n    We can see that this function takes an input `x`, computes the square of `x`, finds the exponential of that value, takes the square root, and then the base-10 logarithm.\n\n3. **Implement the Lambda Function**:\n    The anonymous lambda function should perform the same operations as the `func`, which is:\n    ```python\n    lambda x: math.log10(math.sqrt(math.exp(x**2)))\n    ```\n    This ensures that the lambda performs the same computation, avoiding any additional logic errors.\n\n### Resolving the SyntaxError\n- **Before the Commit**: The faulty lambda function results in a SyntaxError due to the incomplete expression after `math.`.\n- **After the Commit**: The lambda function is now a valid expression performing the intended calculations, thus fixing the SyntaxError.\n\nIntegrating this change into the existing codebase:\n```python\n63     return dict(\n64         named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n```\n\n### Conclusion\n\nThis commit fixes the issue by completing the lambda function, which was the root cause of the SyntaxError. By ensuring the lambda function mirrors the named function's logic, we maintain the intended functionality in both named and anonymous scenarios. This change eliminates the SyntaxError and ensures the `func_series_map` fixture works correctly."
    },
    "issue_message": {
      "title": "AttributeError: 'Series' object has no attribute 'parallel_map'",
      "description": "The tests in 'tests/test_pandarallel.py' are attempting to use the 'parallel_map' method on a pandas Series object, which does not exist. Please ensure that the 'parallel_map' method is correctly implemented or imported, or replace its usage with a valid pandas method.",
      "explanation": "### Summary of the Issue\n\nThe issue is an `AttributeError` that occurs because the method `parallel_map`, which the tests in `tests/test_pandarallel.py` attempt to use on a `pandas.Series` object, does not exist. The error message points out that the `Series` object has no attribute `parallel_map`, leading to the failure of the test case `test_series_map`.\n\n### Detailed Content of the Commit\n\nTo resolve this issue, a commit should be created with the following steps:\n\n1. **Ensure the Method `parallel_map` is Implemented or Imported:**\n   - Check if `parallel_map` is a method we should implement or if it needs to be imported from another module. Given the context, it appears to be a method that should enable parallel processing analogous to `map`.\n\n2. **Integrate the `parallel_map` Method:**\n   - Define `parallel_map` as an extension method to `pandas.Series` to enable parallel mapping functionality.\n   - Ensure the method leverages the `pandarallel` package for parallel processing.\n   \n3. **Modify the Tests:**\n   - Ensure the tests correctly call `parallel_map` without generating an `AttributeError`.\n\n### Implementation Steps\n\n#### Step 1: Implement `parallel_map` Method\n\n**File: `nalepae_pandarallel/pandarallel/core.py`**\n\n```python\n# Add this to the core.py file under the relevant imports\nimport pandas as pd\nfrom pandarallel import pandarallel\n\n# Ensure pandarallel is initialized\npandarallel.initialize(progress_bar=True)\n\n# Define the parallel_map method for pandas Series\ndef parallel_map(series, func, *args, **kwargs):\n    return series.parallel_apply(func, *args, **kwargs)\n\n# Register parallel_map as a method of pandas.Series\npd.Series.parallel_map = parallel_map\n```\n\n#### Step 2: Modify the Test to Use `parallel_map`\n\n**File: `nalepae_pandarallel/tests/test_pandarallel.py`**\n\nEnsure the test uses the modified `parallel_map` method:\n\n```python\n12 import importlib\n13 import math\n14 \n15 import numpy as np\n16 import pandas as pd\n17 import pytest\n18 from pandarallel import pandarallel\n19 \n20 pandarallel.initialize(progress_bar=True)\n21 \n22 @pytest.fixture(params=(1000, 1))\n23 def df_size(request):\n24     return request.param\n25 \n26 ...\n244 def test_series_map(pandarallel_init, func_series_map, df_size):\n245     df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n246 \n247     res = df.a.map(func_series_map)\n248     res_parallel = df.a.parallel_map(func_series_map)\n249 \n250     assert (res == res_parallel).all()\n```\n\n### Explanation of How the Commit Solves the Issue\n\n1. **Define `parallel_map` Method:**\n   - By implementing the `parallel_map` method and registering it as an extension to `pandas.Series`, we make the method available on any pandas Series object. This ensures that when `parallel_map` is called, it invokes the correct parallel processing functionality provided by `pandarallel`.\n\n2. **Initialize `pandarallel`:**\n   - Initializing `pandarallel` in the core file ensures that the package's parallel processing utilities are correctly set up. Initialization with `progress_bar=True` is recommended for tracking progress.\n\n3. **Update Tests Accordingly:**\n   - Modifying the tests to ensure they call the newly defined `parallel_map` keeps the tests consistent and functional. The test further verifies that the result from `parallel_map` matches the result from the standard `map` function, ensuring validity.\n\n### Conclusion\n\nBy following these steps, the missing attribute error is resolved, and the `parallel_map` method is correctly implemented and tested for `pandas.Series` objects. This allows for smooth parallel computation functionality, leveraging the `pandarallel` library effectively."
    },
    "issue_ground": {
      "title": "Support for Parallel Map on Pandas Series",
      "description": "### Problem Description\nCurrently, when I work with large Pandas Series and try to use the `map` function in parallel to speed up the computation, I notice that there is no native support for this functionality in the Pandarallel library. This lack of support burdens users with manually implementing their parallel map logic or falling back to single-threaded performance, which is not efficient for large datasets.\n\n### Expected Behavior\nIt would be very helpful if Pandarallel could provide a `parallel_map` method for Pandas Series, similar to how `parallel_apply` works. The `parallel_map` should function efficiently with support for multiple workers and optionally show progress bars to help track the task's progress.\n\n### Impact\nWithout this feature, users are left with suboptimal performance for mapping functions over large Series datasets. This results in slower data processing and can significantly hinder workflows that rely on heavy computations spread over large Pandas Series.\n\n### Suggested Solution\nImplement a `parallel_map` method for Pandas Series within Pandarallel. This method should leverage multiple workers for parallel computation and include options to support progress bars for user-defined functions.\n\nThank you for considering this enhancement.",
      "explanation": "### Issue Summary\n\nThe issue centers around the lack of native support for a `parallel_map` function in the Pandarallel library for Pandas Series. Users face significant performance bottlenecks when working with large datasets because they are forced to either manually implement parallel logic or use inefficient single-threaded computations. The request for enhancement is to implement a `parallel_map` method that would leverage multiple workers for parallel computation, similar to how `parallel_apply` works for DataFrames, and optionally include progress bars.\n\n### Cause of the Issue\n\nThe error messages indicate that the `Series` object in Pandas does not have an attribute named `parallel_map`. This is confirmed by the AttributeError in the traceback, which indicates that the method `parallel_map` was called on a Pandas Series object but was not found:\n\n```plaintext\nAttributeError: 'Series' object has no attribute 'parallel_map'\n```\n\nThis indicates that the method `parallel_map` hasn't been implemented in the Pandarallel library. The tests written in `nalepae_pandarallel/tests/test_pandarallel.py` are failing because they are trying to access a method that does not exist.\n\n### Solution Content (Commit Overview)\n\nThe solution involves adding a new method named `parallel_map` to the Pandarallel library for Pandas Series objects. The process will likely involve:\n\n1. Extending the Series class to include the `parallel_map` method.\n2. Implementing the logic for parallel computation using multiple workers.\n3. Optionally integrating progress bars to track the progress.\n\nHere is an overview of the content added in the commit:\n\n1. **Modify the core Pandarallel logic** to include the `parallel_map` method.\n2. **Update tests** to check for the correct implementation and functionality of the `parallel_map` method.\n\n### Explanation of the Commit and How It Solves the Issue\n\n#### 1. Adding `parallel_map` Method to Pandarallel\n\nBelow is the core implementation part in pseudocode. The actual code involves adapting Pandarallel’s framework to handle the new method.\n\n```python\nfrom pandarallel import pandarallel\n\ndef parallel_map(self, func, *args, **kwargs):\n    # Initialization: Set up the parallel environment, including worker processes and progress bars.\n    \n    # Chunking: Split the Series into chunks to be processed in parallel.\n    \n    # Parallel Processing: Map the function to each chunk using workers.\n    \n    # Combine Results: Gather the results from the workers and compile them into a final Series.\n    \n    return final_result\n\n# Add the new method to Pandas Series class.\npd.Series.parallel_map = parallel_map\n```\n\nExplanation:\n\n- **Initialization**: Preparing the environment for parallel computation which includes setting up worker processes.\n- **Chunking**: Dividing the Series into manageable chunks to distribute the workload.\n- **Parallel Processing**: Each chunk is processed by a separate worker in parallel.\n- **Combine Results**: Once all workers finish, their results are combined into a final Series.\n\n#### 2. Updating Tests\n\nThe tests must be updated to ensure that the new method works as expected:\n\n1. **Adding Test Cases**: Test cases are added to check if the `parallel_map` method functions correctly. These tests will map a sample function over a Series and compare the result with the expected outcome.\n2. **Ensuring Coverage**: Tests cover different scenarios such as with and without progress bars, handling different types of input functions (named and anonymous functions), and different Series sizes.\n\nSample test modification:\n```python\ndef test_series_map(pandarallel_init, func_series_map, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.map(func_series_map)\n    res_parallel = df.a.parallel_map(func_series_map)\n\n    assert res.equals(res_parallel)\n```\n\n### How the Commit Solves the Issue\n\n1. **Functionality**: The `parallel_map` method added to the Series now provides the requested functionality of mapping functions over Series in parallel.\n2. **Performance**: By leveraging multiple workers, large datasets can now be processed more efficiently, significantly improving performance for computationally intensive tasks.\n3. **Ease of Use**: Users now have a straightforward method to apply functions in parallel without needing to manually implement parallel logic.\n4. **Progress Tracking**: Optional progress bars make it easier for users to track the progress of long-running operations.\n\nIn summary, the commit addresses the issue by introducing a new method that directly meets the user's needs, enhances performance, and integrates seamlessly into the existing Pandarallel framework. The added tests verify the correctness and robustness of the implementation, ensuring that it behaves as expected in various scenarios."
    },
    "issue_ground_truth": {
      "title": "Support for Parallel Map on Pandas Series",
      "description": "### Problem Description\nCurrently, when I work with large Pandas Series and try to use the `map` function in parallel to speed up the computation, I notice that there is no native support for this functionality in the Pandarallel library. This lack of support burdens users with manually implementing their parallel map logic or falling back to single-threaded performance, which is not efficient for large datasets.\n\n### Expected Behavior\nIt would be very helpful if Pandarallel could provide a `parallel_map` method for Pandas Series, similar to how `parallel_apply` works. The `parallel_map` should function efficiently with support for multiple workers and optionally show progress bars to help track the task's progress.\n\n### Impact\nWithout this feature, users are left with suboptimal performance for mapping functions over large Series datasets. This results in slower data processing and can significantly hinder workflows that rely on heavy computations spread over large Pandas Series.\n\n### Suggested Solution\nImplement a `parallel_map` method for Pandas Series within Pandarallel. This method should leverage multiple workers for parallel computation and include options to support progress bars for user-defined functions.\n\nThank you for considering this enhancement.",
      "explanation": "### Summary of the Issue\n\nThe issue at hand is the lack of support for a `parallel_map` function for Pandas Series in the Pandarallel library. This function is intended to enable the parallel execution of the `map` method over large Pandas Series objects, leveraging multiple workers to speed up the process. Without this feature, users dealing with extensive datasets have to resort to single-threaded execution, which is less efficient and can significantly slow down data processing workflows when mapping functions over large Series datasets.\n\n### Problem Description\n\nCurrently, Pandarallel does not provide native support for executing the `map` function in parallel on Pandas Series. This absence means that users wanting to utilize parallel computing for the `map` function have to implement custom logic, which is cumbersome and inefficient. The expected behavior is for Pandarallel to include a `parallel_map` method for Pandas Series, which would operate similarly to the existing `parallel_apply` method, taking advantage of multiple workers and optionally displaying progress bars to track task progress.\n\n### Commit Details\n\nThe commit addresses the issue by making modifications to the core files of Pandarallel to integrate a new `parallel_map` method for Pandas Series. This involves:\n\n1. **Defining the `parallel_map` Function**: A function that leverages the already existing parallelism infrastructure in Pandarallel to handle the mapping operations across multiple workers.\n2. **Adding the `parallel_map` Method to the Series Class**: Registering the new `parallel_map` function to the Pandas Series class so that it can be invoked directly on Pandas Series objects.\n3. **Testing**: Updating the test suite to include cases that validate the functionality of the new `parallel_map` method. This ensures that the new method works correctly and is integrated seamlessly into the Pandarallel library.\n\n### Explanation of the Cause and Solution\n\n#### Cause of the Issue\n\nThe primary cause of the issue is the lack of a method within Pandarallel that supports parallel execution of the `map` function on Pandas Series. While Pandarallel offers `parallel_apply` for DataFrame and Series, and some groupby operations, it did not specifically provide a `parallel_map` method for Series. This gap forced users to either use less efficient single-threaded mapping or build custom parallelization logic, both of which are not ideal solutions.\n\n#### Solution from the Developer's Perspective\n\nThe developer's solution involves expanding the Pandarallel library to support the `parallel_map` operation on Pandas Series by:\n1. **Leveraging Existing Infrastructure**: Utilizing existing parallelization mechanisms in Pandarallel ensures consistency and minimizes the effort required to add new functionalities. The infrastructure for splitting tasks into chunks, distributing them across workers, and aggregating results is reused for the `parallel_map` method.\n2. **Implementing the `parallel_map` Function**: Creating a function that can slice the data into chunks, apply the map function in parallel across these chunks, and then combine the results. This function is designed to be efficient and handle large datasets effectively.\n3. **Registering the Function**: Adding the new `parallel_map` method to the Pandas Series class makes it easily accessible for users, similar to any other method provided by Pandarallel.\n4. **Testing and Validation**: Extensively testing the new method to ensure that it operates correctly under various conditions (e.g., different sizes of data, varying numbers of workers, different user-defined functions). This guarantees that users can rely on the new method for their data processing tasks without encountering errors.\n\n### Detailed Solution Explanation\n\nThe commit solves the issue by providing a robust and user-friendly workaround for parallelizing mapping operations over large Pandas Series. Here's a step-by-step breakdown of how the solution works:\n\n1. **Function Definition and Integration**:\n   - **Parallelizing the Map**: A function is created to break down the Series into manageable chunks, distribute these chunks across multiple worker processes, apply the user-defined mapping function to each chunk, and gather the results.\n   - **Parallel Execution**: By leveraging the multiprocessing capabilities already present in Pandarallel, the new function can distribute the workload effectively, making use of multiple CPU cores to perform the map operation concurrently.\n   \n2. **Seamless Integration**:\n   - The function is registered with the Pandas Series class during the initialization of Pandarallel. This allows users to call `parallel_map` directly on any Pandas Series object, just as they would with `parallel_apply`.\n   \n3. **Progress Bar Support**:\n   - An optional feature is included to display progress bars, providing visual feedback on the computation's progress. This is particularly useful for long-running operations, helping users understand how far along the process is.\n\n4. **Testing Framework**:\n   - The testing framework is updated to include cases specifically for the `parallel_map` function. Various scenarios are covered to ensure the function performs correctly, handles edge cases, and integrates seamlessly with the existing Pandarallel functionality.\n\n### Conclusion\n\nIn summary, to address"
    },
    "location_origin": [
      {
        "file": "nalepae_pandarallel/tests/test_pandarallel.py",
        "function": {
          "59": "func_series_map"
        },
        "content_all": {
          "60": "    def func(x):",
          "61": "        return math.log10(math.sqrt(math.exp(x**2)))",
          "62": "",
          "63": "    return dict(",
          "64": "        named=func, anonymous=lambda x: math.(...truncated)",
          "65": ""
        },
        "content_change": {
          "64": "        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))"
        }
      }
    ],
    "location_message": [
      {
        "file": "nalepae_pandarallel/tests/test_pandarallel.py",
        "function": {
          "244": "test_series_map"
        },
        "content_all": {
          "241": "        return request.param\n",
          "242": "\n",
          "243": "    return dict(\n",
          "244": "def test_series_map(pandarallel_init, func_series_map, df_size):\n",
          "245": "    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n",
          "246": "\n",
          "247": "    res = df.a.map(func_series_map)\n",
          "248": "    res_parallel = df.a.parallel_map(func_series_map)\n",
          "249": "\n",
          "250": "    assert (res == res_parallel).all()\n"
        },
        "content_change": {
          "248": "    res_parallel = df.a.parallel_map(func_series_map)\n"
        }
      },
      {
        "file": "nalepae_pandarallel/pandarallel/core.py",
        "function": {
          "32": "core.py initialization"
        },
        "content_all": {
          "29": "ON_WINDOWS = os.name == \"nt\"\n",
          "30": "CONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n",
          "31": "\n",
          "32": "# Root of Memory File System\n",
          "33": "MEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n",
          "34": "\n",
          "35": "# By default, Pandarallel will use memory file system to transfer data between the main process and workers.\n",
          "36": "pandarallel.initialize(progress_bar=True)\n",
          "37": "\n",
          "38": "def parallel_map(series, func, *args, **kwargs):\n",
          "39": "    return series.parallel_apply(func, *args, **kwargs)\n",
          "40": "\n",
          "41": "pd.Series.parallel_map = parallel_map\n",
          "42": "\n",
          "43": "class ParallelProgressBars:\n"
        },
        "content_change": {
          "35": "# By default, Pandarallel will use memory file system to transfer data between the main process and workers.\n",
          "36": "pandarallel.initialize(progress_bar=True)\n",
          "38": "def parallel_map(series, func, *args, **kwargs):\n",
          "39": "    return series.parallel_apply(func, *args, **kwargs)\n",
          "41": "pd.Series.parallel_map = parallel_map\n"
        }
      }
    ],
    "location_ground": [
      {
        "file": "nalepae_pandarallel/tests/test_pandarallel.py",
        "function": {
          "58": "func_series_map"
        },
        "content_all": {
          "55": "    ]\n",
          "56": "\n",
          "57": "\n",
          "58": "@pytest.fixture(params=(\"named\", \"anonymous\"))\n",
          "59": "def func_series_map(request):\n",
          "60": "    def func(x):\n",
          "61": "        return math.log10(math.sqrt(math.exp(x**2)))\n",
          "62": "\n",
          "63": "    return dict(\n",
          "64": "        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n",
          "65": "    )[request.param]\n"
        },
        "content_change": {
          "63": "    return dict(\n",
          "64": "        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n"
        }
      },
      {
        "file": "nalepae_pandarallel/pandarallel/core.py",
        "function": {
          "30": null
        },
        "content_all": {
          "28": "from .utils import WorkerStatus\n",
          "29": "\n",
          "30": "ON_WINDOWS = os.name == \"nt\"\n",
          "31": "CONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n",
          "32": "\n",
          "33": "# Root of Memory File System\n",
          "34": "MEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n",
          "35": "\n"
        },
        "content_change": {
          "31": "CONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n"
        }
      },
      {
        "file": "nalepae_pandarallel/pandarallel/progress_bars.py",
        "function": {
          "16": "ProgressBarsType"
        },
        "content_all": {
          "14": "MINIMUM_TERMINAL_WIDTH = 72\n",
          "15": "\n",
          "16": "class ProgressBarsType(int, Enum):\n",
          "17": "    No = 0\n",
          "18": "    InUserDefinedFunction = 1\n",
          "19": "    InUserDefinedFunctionMultiplyByNumberOfColumns = 2\n",
          "20": "    InWorkFunction = 3\n",
          "21": "\n",
          "22": "\n"
        },
        "content_change": {
          "16": "class ProgressBarsType(int, Enum):\n"
        }
      },
      {
        "file": "nalepae_pandarallel/pandarallel/utils.py",
        "function": {
          "9": "chunk"
        },
        "content_all": {
          "7": "\n",
          "8": "\n",
          "9": "def chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n",
          "10": "    \"\"\"\n",
          "11": "    Return `nb_chunks` slices of approximatively `nb_item / nb_chunks` each.\n",
          "12": "\n",
          "13": "    Parameters\n",
          "14": "    ----------\n",
          "15": "    nb_item : int\n",
          "16": "        Total number of items\n",
          "17": "\n",
          "18": "    nb_chunks : int\n",
          "19": "        Number of chunks to return\n",
          "20": "\n"
        },
        "content_change": {
          "9": "def chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n"
        }
      }
    ],
    "location_ground_exp": [
      {
        "location": "Here's a JSON output that identifies the parts of the code that need modifications to support the `parallel_map` method for Pandas Series:\n\n```json\n[\n    {\n        \"file\": \"nalepae_pandarallel/tests/test_pandarallel.py\",\n        \"function\": {\n            \"58\": \"func_series_map\"\n        },\n        \"content_all\": {\n            \"55\": \"    ]\\n\",\n            \"56\": \" \\n\",\n            \"57\": \" \\n\",\n            \"58\": \" @pytest.fixture(params=(\\\"named\\\", \\\"anonymous\\\"))\\n\",\n            \"59\": \" def func_series_map(request):\\n\",\n            \"60\": \"     def func(x):\\n\",\n            \"61\": \"         return math.log10(math.sqrt(math.exp(x**2)))\\n\",\n            \"62\": \" \\n\",\n            \"63\": \"     return dict(\\n\",\n            \"64\": \"         named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\\n\",\n            \"65\": \"     )[request.param]\\n\"\n        },\n        \"content_change\": {\n            \"64\": \"         named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\\n\"\n        }\n    },\n    {\n        \"file\": \"nalepae_pandarallel/pandarallel/core.py\",\n        \"function\": {\n            \"0\": \"Definition of new `parallel_map` function\"\n        },\n        \"content_all\": {\n            \"25\": \" )\\n\",\n            \"26\": \" from .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\\n\",\n            \"27\": \" from .utils import WorkerStatus\\n\",\n            \"28\": \" \\n\",\n            \"29\": \" ON_WINDOWS = os.name == \\\"nt\\\"\\n\",\n            \"30\": \" CONTEXT = multiprocessing.get_context(\\\"spawn\\\" if ON_WINDOWS else \\\"fork\\\")\\n\",\n            \"31\": \" \\n\",\n            \"32\": \" # Root of Memory File System\\n\",\n            \"33\": \" MEMORY_FS_ROOT = os.environ.get(\\\"MEMORY_FS_ROOT\\\", \\\"/dev/shm\\\")\\n\",\n            \"34\": \" \\n\",\n            \"35\": \" # By default, Pandarallel won’t use memory file system\\n\",\n            \"36\": \" USE_MEMORY_FS = False\\n\",\n            \"37\": \" \\n\",\n            \"38\": \" \\n\",\n            \"39\": \" def parallel_map(series, func, num_workers=None, progress_bar=False):\\n\",\n            \"40\": \"     '''\\n\",\n            \"41\": \"     Apply a function to a Series in parallel.\\n\",\n            \"42\": \"     '''\\n\",\n            \"43\": \"     if num_workers is None:\\n\",\n            \"44\": \"         num_workers = multiprocessing.cpu_count()\\n\",\n            \"45\": \" \\n\",\n            \"46\": \"     chunks = numpy.array_split(series, num_workers)\\n\",\n            \"47\": \"     with multiprocessing.Pool(processes=num_workers) as pool:\\n\",\n            \"48\": \"         result = pool.map(func, chunks)\\n\",\n            \"49\": \"     \\n\",\n            \"50\": \"     return pd.concat(result)\\n\"\n        },\n        \"content_change\": {\n            \"39\": \" def parallel_map(series, func, num_workers=None, progress_bar=False):\\n\",\n            \"40\": \"     '''\\n\",\n            \"41\": \"     Apply a function to a Series in parallel.\\n\",\n            \"42\": \"     '''\\n\",\n            \"43\": \"     if num_workers is None:\\n\",\n            \"44\": \"         num_workers = multiprocessing.cpu_count()\\n\",\n            \"45\": \" \\n\",\n            \"46\": \"     chunks = numpy.array_split(series, num_workers)\\n\",\n            \"47\": \"     with multiprocessing.Pool(processes=num_workers) as pool:\\n\",\n            \"48\": \"         result = pool.map(func, chunks)\\n\",\n            \"49\": \"     \\n\",\n            \"50\": \"     return pd.concat(result)\\n\"\n        }\n    },\n    {\n        \"file\": \"nalepae_pandarallel/pandarallel/progress_bars.py\",\n        \"function\": {\n            \"0\": \"Extend progress bar to support parallel_map\"\n        },\n        \"content_all\": {\n            \"20\": \" InWorkFunction = 3\\n\",\n            \"21\": \" \\n\",\n            \"22\": \" \\n\",\n            \"23\": \" class ProgressBars(ABC):\\n\",\n            \"24\": \"     @abstractmethod\\n\",\n            \"25\": \"     def __init__(self, maxs: List[int], show: bool) -> None:\\n\",\n            \"26\": \"         ...\\n\",\n            \"27\": \" \\n\",\n            \"28\": \"     @abstractmethod"
      }
    ],
    "location_ground_truth": [
      {
        "file": "nalepae_pandarallel/pandarallel/core.py",
        "function": null,
        "content_all": {},
        "content_change": {}
      }
    ],
    "patch_i": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -61,7 +61,7 @@ def func(x):\n         return math.log10(math.sqrt(math.exp(x**2)))\n \n     return dict(\n-        named=func, anonymous=lambda x: math.(...truncated)\n+        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n     )[request.param]\n \n @pytest.fixture(params=(\"named\", \"anonymous\"))\n",
    "patch_im": "\n--- a/nalepae_pandarallel/pandarallel/core.py\n+++ b/nalepae_pandarallel/pandarallel/core.py\n@@ -35,6 +35,17 @@\n # By default, Pandarallel initialize with all CPU, no progress bar and memory file system\n FORMAT_MESSAGE_PROGRESS = \"{elapsed} [{bar}] {percentage:3.0f}% {remaining} [{rate_fmt}{postfix}]\"\n \n+import pandas as pd\n+from pandarallel import pandarallel\n+\n+    # Ensure pandarallel is initialized\n+pandarallel.initialize(progress_bar=True)\n+    # Define the parallel_map method for pandas Series\n+def parallel_map(series, func, *args, **kwargs):\n+    return series.parallel_apply(func, *args, **kwargs)\n+    # Register parallel_map as a method of pandas.Series\n+pd.Series.parallel_map = parallel_map\n+\n class ProgressManagerClass(SyncManager):\n pass\n \n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -7,9 +7,10 @@\n from pandarallel import pandarallel\n \n \n+ pandarallel.initialize(progress_bar=True)\n+\n @pytest.fixture(params=(1000, 1))\n def df_size(request):\n     return request.param\n \n@@ -243,7 +244,12 @@\n \n \n @pytest.fixture(params=(False, True))\n def progress_bar(request):\n     return request.param\n \n@@ -257,6 +263,18 @@\n     return dict(named=func, anonymous=lambda x: math.exp(x**2))[\\n                           return dict(named=func, anonymous=lambda x: math.\")\n def test_series_map(pandarallel_init, func_series_map, df_size):\n     df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n \n-    res = df.a.map(func_series_map)\n+    res = df.a.map(func_series_map)\n     res_parallel = df.a.parallel_map(func_series_map)\n \n     assert (res == res_parallel).all()\n",
    "patch_il": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -61,7 +61,7 @@\n \n     return dict(\n         named=func,\n-        anonymous=lambda x: math.(...truncated)\n+        anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n     )[request.param]\n \n \n",
    "patch_iml": "\n--- a/nalepae_pandarallel/pandarallel/core.py\n+++ b/nalepae_pandarallel/pandarallel/core.py\n@@ -35,6 +35,14 @@\n # By default, Pandarallel will use memory file system to transfer data between the main process and workers.\n pandarallel.initialize(progress_bar=True)\n \n+def parallel_map(series, func, *args, **kwargs):\n+    return series.parallel_apply(func, *args, **kwargs)\n+\n+pd.Series.parallel_map = parallel_map\n+\n+\n class ParallelProgressBars:\n     def __init__(self, maxs: List[int], show: bool) -> None:\n         self.max = sum(maxs)\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -7,9 +7,6 @@ from pandarallel import pandarallel\n \n \n @pytest.fixture(params=(1000, 1))\n-def df_size(request):\n-    return request.param\n-\n-\n @pytest.fixture(params=(False, True))\n def progress_bar(request):\n     return request.param\n@@ -18,15 +15,14 @@ from pandarallel import pandarallel\n \n \n @pytest.fixture(params=(None, False))\n-def use_memory_fs(request):\n-    return request.param\n-\n-\n @pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\n def exception(request):\n     return request.param\n\n(@pytest.fixture(request):\nparams = 1000\n def df_size(request):\n     return request.param\n@@ -58,8 +62,6 @@ def func_series_map(request):\n         return math.log10(math.sqrt(math.exp(x**2)))\n \n     return dict(\n         named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n     )[request.param]\n\n-def test_series_map(pandarallel_init, func_series_map, df_size):\n-    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n-    \n-    res = df.a.map(func_series_map)\n-    res_parallel = df.a.parallel_map(func_series_map)\n-    \n-    assert (res == res_parallel).all()\n+def test_series_map(pandarallel_init, func_series_map, df_size):\n+    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n+\n+    res = df.a.map(func_series_map)\n+    res_parallel = df.a.parallel_map(func_series_map)\n+\n+    assert (res == res_parallel).all()\n",
    "patch_ground": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -63,7 +63,8 @@\n     return dict(\n         named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n     )[request.param]\n+\n+def test_series_parallel_map(pandarallel_init, func_series_map, df_size):\n+    series = pd.Series(np.random.rand(df_size) + 1)\n+\n+    res = series.map(func_series_map)\n+    res_parallel = series.parallel_map(func_series_map)\n+\n+    assert res.equals(res_parallel)\n \n--- a/nalepae_pandarallel/pandarallel/core.py\n+++ b/nalepae_pandarallel/pandarallel/core.py\n@@ -35,7 +35,8 @@\n     Series,\n     SeriesRolling,\n )\n-from .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\n+from .progress_bars import (\n+    ProgressBarsType, get_progress_bars, progress_wrapper\n+)\n from .utils import WorkerStatus, chunk\n \n ON_WINDOWS = os.name == \"nt\"\n CONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n@@ -42,6 +43,37 @@ MEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n \n # By default, Pandas DataFrame uses in-memory manager for parallel computations\n DEFAULT_MEMORY_FS = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\") == \"in_memory\"\n \n+# Parallel map for Pandas Series\n+def parallel_map(series, func, *args, progress_bar=False, **kwargs):\n+    pandarallel.initialize()\n+\n+    # If progress bar is requested\n+    if progress_bar:\n+        progress = get_progress_bars(series.size, progress_bar)\n+    else:\n+        progress = None\n+\n+    # Chunk the series\n+    chunks = chunk(series.size, pandarallel.nb_workers)\n+\n+    # Function to apply in parallel\n+    def apply_func(chunk_range):\n+        start, end = chunk_range.start, chunk_range.stop\n+        result = series.iloc[start:end].map(func, *args, **kwargs)\n+        if progress:\n+            progress.update(1)\n+        return result\n+\n+    # Map the function in parallel\n+    result = pd.concat(\n+        CONTEXT.Pool(pandarallel.nb_workers).map(apply_func, chunks)\n+    )\n+\n+    # Close the progress bar\n+    if progress:\n+        progress.close()\n+\n+    return result\n+\n--- a/nalepae_pandarallel/pandarallel/progress_bars.py\n+++ b/nalepae_pandarallel/pandarallel/progress_bars.py\n@@ -36,7 +36,8 @@\n class ProgressState(ProgressBars):\n     \"\"\"\n     Handles the progress bar states for parallel execution.\n     \"\"\"\n \n+    def __init__(self, maxs, show):\n+        self.progress = self._init_progress(maxs, show)\n\n+    def _init_progress(self, maxs, show):\n+        if not show:\n+            return None\n\n+        # Set up progress bars\n+        return [tqdm(total=max_, position=i) for i, max_ in enumerate(maxs)]\n\n+    def update(self, values):\n+        if self.progress:\n+            for p, val in zip(self.progress, values):\n+                p.update(val)\n\n+    def set_error(self, index):\n+        if self.progress:\n+            self.progress[index].set_description(\"Error\")\n\n--- a/nalepae_pandarallel/pandarallel/utils.py\n+++ b/nalepae_pandarallel/pandarallel/utils.py\n@@ -28,6 +28,7 @@\n     ) for val in range(start_offset, nb_item + start_offset, nb_item // nb_chunks)]\n \nchunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n     \"\"\"\n     Return `nb_chunks` slices of approximately `nb_item / nb_chunks` each.\n     \"\"\"\n",
    "patch_ground_location": "\n--- a/nalepae_pandarallel/pandarallel/core.py\n+++ b/nalepae_pandarallel/pandarallel/core.py\n@@ -35,6 +35,26 @@ MEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n pd.DataFrameGroupBy = PandaDataFrameGroupBy  # type: ignore\n pd.core.groupby.DataFrameGroupBy = DataFrameGroupBy  # type: ignore\n \n+\n+def parallel_map(series, func, *args, **kwargs):\n+    parallel_backend = kwargs.pop(\"parallel_backend\", None)\n+    progress_bar = kwargs.pop(\"progress_bar\", False)\n+\n+    # Initialization: Set up the parallel environment\n+    from pandarallel.utils import chunk, progress_wrapper\n+\n+    task_func = pandarallel.init(task_func=func,\n+                                 progress_bar=progress_bar,\n+                                 parallel_backend=parallel_backend)\n+\n+    chunks = chunk(len(series), pandarallel.NB_WORKERS)\n+    results = task_func(series, chunks, *args, **kwargs)\n+    \n+    return results\n+\n+# Add the new method to Pandas Series class\n+pd.Series.parallel_map = parallel_map\n+\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -63,7 +63,28 @@ def func_series_map(request):\n     return dict(\n         named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n     )[request.param]\n \n+\n+def test_series_parallel_map(pandarallel_init, func_series_map, df_size):\n+    df = pd.DataFrame(dict(a=np.random.rand(df_size)))\n+    series = df.a\n+\n+    # Sequential map\n+    res = series.map(func_series_map)\n+\n+    # Parallel map\n+    res_parallel = series.parallel_map(func_series_map)\n+\n+    assert res.equals(res_parallel)\n+\n+\n+def test_series_parallel_map_progress_bar(pandarallel_init, func_series_map, df_size):\n+    df = pd.DataFrame(dict(a=np.random.rand(df_size)))\n+    series = df.a\n+\n+    res = series.parallel_map(func_series_map, progress_bar=True)\n+    assert isinstance(res, pd.Series)\n+    assert res.size == df_size\n+\n--- a/nalepae_pandarallel/pandarallel/utils.py\n+++ b/nalepae_pandarallel/pandarallel/utils.py\n@@ -1,6 +1,7 @@\n import itertools\n from enum import Enum\n from typing import Any, Dict, List, Tuple\n+import pandas as pd\n \n \n def chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n \n@@ -29,6 +30,26 @@ class WorkerStatus(Enum):\n     INIT = \"INIT\"\n     RUN = \"RUN\"\n     SUCCESS = \"SUCCESS\"\n     ERROR = \"ERROR\"\n\n+def progress_wrapper(func, progress_bar, length):\n+    if progress_bar:\n+        from tqdm.auto import tqdm\n+        return tqdm(func, total=length)\n+    else:\n+        return func\n+\n+def init(task_func, progress_bar=False, parallel_backend=None):\n+    from joblib import Parallel, delayed\n+    \n+    def wrapped_task(chunk, *args, **kwargs):\n+        return chunk.apply(task_func, args=args, **kwargs)\n+\n+    def task_func(series, chunks, *args, **kwargs):\n+        process_chunk = progress_wrapper(wrapped_task, progress_bar, len(chunks))\n+        return (Parallel(n_jobs=pandarallel.NB_WORKERS, backend=parallel_backend)\n+                (delayed(process_chunk)(series[c], *args, **kwargs) \n+                 for c in chunks))\n+\n+    return task_func\n+\n",
    "patch_ground_exp": "\n--- a/nalepae_pandarallel/pandarallel/core.py\n+++ b/nalepae_pandarallel/pandarallel/core.py\n@@ -25,6 +25,7 @@ from .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\n from .utils import WorkerStatus\n \n ON_WINDOWS = os.name == \"nt\"\n CONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n \n+# Define the new parallel_map function\n+def parallel_map(series, func, num_workers=None, progress_bar=False):\n+    \"\"\"\n+    Apply a function to a Series in parallel.\n+    \"\"\"\n+    if num_workers is None:\n+        num_workers = multiprocessing.cpu_count()\n+\n+    # Split the series into chunks for each worker\n+    chunks = numpy.array_split(series, num_workers)\n+\n+    # Optionally show progress bar\n+    if progress_bar:\n+        from tqdm import tqdm\n+        chunks = list(tqdm(chunks, desc=\"parallel_map progress\"))\n+\n+    # Create a pool of workers and apply the function in parallel\n+    with multiprocessing.Pool(processes=num_workers) as pool:\n+        result = pool.map(func, chunks)\n+\n+    return pd.concat(result)\n+\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -57,6 +57,7 @@ @pytest.fixture(params=(\"named\", \"anonymous\"))\n def func_series_map(request):\n     def func(x):\n         return math.log10(math.sqrt(math.exp(x**2)))\n     \n     return dict(\n+        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n+    )[request.param]\n\n+def test_parallel_map(progress_bar):\n+    series = pd.Series(np.random.randn(100))\n+    expected = series.map(func_series_map[\"named\"])\n+    result = pandarallel.parallel_map(series, func_series_map[\"named\"], progress_bar=progress_bar)\n+    pd.testing.assert_series_equal(expected, result)\n--- a/nalepae_pandarallel/pandarallel/progress_bars.py\n+++ b/nalepae_pandarallel/pandarallel/progress_bars.py\n@@ -20,6 +20,7 @@ class ProgressBarsType(int, Enum):\n     InUserDefinedFunctionMultiplyByNumberOfColumns = 2\n     InWorkFunction = 3\n+    InParallelMap = 4\n+     \n--- a/nalepae_pandarallel/pandarallel/utils.py\n+++ b/nalepae_pandarallel/pandarallel/utils.py\n@@ -0,0 +1,6 @@ \n+import itertools\n+from enum import Enum\n+from typing import Any, Dict, List, Tuple\n+ \n+import pandas as pd\n+from pandas import DataFrame, Index\n\n",
    "patch_ground_all": "\n--- a/nalepae_pandarallel/pandarallel/core.py\n+++ b/nalepae_pandarallel/pandarallel/core.py\n@@ -35,9 +35,10 @@\n ) -> pd.Series:\n     with NamedTemporaryFile(delete=False) as f:\n         pickle.dump((func, args, kwargs), f)\n-    with CONTEXT.Pool(nb_workers) as p:\n-        results = p.map(\n-            ParallelWorker(f.name, progress_bars, 0, None, progress_wrapper), data\n+    with CONTEXT.Pool(nb_workers) as pool:\n+        results = pool.map(\n+            ParallelWorker(f.name, progress_bars, ProgressBarsType.InWorkFunction, None, progress_wrapper),\n+            data\n         )\n     os.remove(f.name)\n     return pd.concat(results)\n \n+def parallel_map(series, func, progress_bar):\n+    \"\"\"Parallel map implementation for Pandas Series.\"\"\"\n+    return parallelize(\n+        lambda s: s.map(func),\n+        series,\n+        nb_workers=WorkerStatus.nb_chunks,\n+        progress_bar=progress_bar,\n+    )\n \n",
    "patch_ground_truth": "--- a/nalepae_pandarallel/pandarallel/core.py\n+++ b/nalepae_pandarallel/pandarallel/core.py\n@@ -548,6 +548,7 @@\n         pd.Series.parallel_apply = parallelize(\n             nb_workers, Series.Apply, progress_bars_in_user_defined_function\n         )\n+        pd.Series.parallel_map = parallelize(nb_workers, Series.Map, show_progress_bars)\n \n         # Series Rolling\n         pd.core.window.Rolling.parallel_apply = parallelize(\n",
    "message": "--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n_____________________________________________________________________________ test_series_map[anonymous-1000-True-None] _____________________________________________________________________________\n\npandarallel_init = None, func_series_map = <function func_series_map.<locals>.<lambda> at 0x7ab850f28b80>, df_size = 1000\n\n    def test_series_map(pandarallel_init, func_series_map, df_size):\n        df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n    \n        res = df.a.map(func_series_map)\n>       res_parallel = df.a.parallel_map(func_series_map)\n\ntests/test_pandarallel.py:244: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\n\nself = 0      1.184774\n1      1.212423\n2      1.506710\n3      1.622536\n4      1.454472\n         ...   \n995    1.916059\n996    1.969549\n997    1.039518\n998    1.929592\n999    1.276179\nName: a, Length: 1000, dtype: float64\nname = 'parallel_map'\n\n    @final\n    def __getattr__(self, name: str):\n        \"\"\"\n        After regular attribute access, try looking up the name\n        This allows simpler access to columns for interactive use.\n        \"\"\"\n        # Note: obj.x will always call obj.__getattribute__('x') prior to\n        # calling obj.__getattr__('x').\n        if (\n            name not in self._internal_names_set\n            and name not in self._metadata\n            and name not in self._accessors\n            and self._info_axis._can_hold_identifiers_and_holds_name(name)\n        ):\n            return self[name]\n>       return object.__getattribute__(self, name)\nE       AttributeError: 'Series' object has no attribute 'parallel_map'\n\n../../../../anaconda3/envs/py39/lib/python3.9/site-packages/pandas/core/generic.py:6204: AttributeError\n--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n____________________________________________________________________________ test_series_map[anonymous-1000-True-False] _____________________________________________________________________________\n\npandarallel_init = None, func_series_map = <function func_series_map.<locals>.<lambda> at 0x7ab850f285e0>, df_size = 1000\n\n    def test_series_map(pandarallel_init, func_series_map, df_size):\n        df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n    \n        res = df.a.map(func_series_map)\n>       res_parallel = df.a.parallel_map(func_series_map)\n\ntests/test_pandarallel.py:244: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\n\nself = 0      1.180611\n1      1.922170\n2      1.703339\n3      1.151281\n4      1.145799\n         ...   \n995    1.666213\n996    1.693546\n997    1.252340\n998    1.274065\n999    1.550199\nName: a, Length: 1000, dtype: float64\nname = 'parallel_map'\n\n    @final\n    def __getattr__(self, name: str):\n        \"\"\"\n        After regular attribute access, try looking up the name\n        This allows simpler access to columns for interactive use.\n        \"\"\"\n        # Note: obj.x will always call obj.__getattribute__('x') prior to\n        # calling obj.__getattr__('x').\n        if (\n            name not in self._internal_names_set\n            and name not in self._metadata\n            and name not in self._accessors\n            and self._info_axis._can_hold_identifiers_and_holds_name(name)\n        ):\n            return self[name]\n>       return object.__getattribute__(self, name)\nE       AttributeError: 'Series' object has no attribute 'parallel_map'\n\n../../../../anaconda3/envs/py39/lib/python3.9/site-packages/pandas/core/generic.py:6204: AttributeError\n--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n______________________________________________________________________________ test_series_map[anonymous-1-False-None] ______________________________________________________________________________\n\npandarallel_init = None, func_series_map = <function func_series_map.<locals>.<lambda> at 0x7ab850f28700>, df_size = 1\n\n    def test_series_map(pandarallel_init, func_series_map, df_size):\n        df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n    \n        res = df.a.map(func_series_map)\n>       res_parallel = df.a.parallel_map(func_series_map)\n\ntests/test_pandarallel.py:244: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\n\nself = 0    1.312353\nName: a, dtype: float64, name = 'parallel_map'\n\n    @final\n    def __getattr__(self, name: str):\n        \"\"\"\n        After regular attribute access, try looking up the name\n        This allows simpler access to columns for interactive use.\n        \"\"\"\n        # Note: obj.x will always call obj.__getattribute__('x') prior to\n        # calling obj.__getattr__('x').\n        if (\n            name not in self._internal_names_set\n            and name not in self._metadata\n            and name not in self._accessors\n            and self._info_axis._can_hold_identifiers_and_holds_name(name)\n        ):\n            return self[name]\n>       return object.__getattribute__(self, name)\nE       AttributeError: 'Series' object has no attribute 'parallel_map'\n\n../../../../anaconda3/envs/py39/lib/python3.9/site-packages/pandas/core/generic.py:6204: AttributeError\n--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n_____________________________________________________________________________ test_series_map[anonymous-1-False-False] ______________________________________________________________________________\n\npandarallel_init = None, func_series_map = <function func_series_map.<locals>.<lambda> at 0x7ab850f28ca0>, df_size = 1\n\n    def test_series_map(pandarallel_init, func_series_map, df_size):\n        df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n    \n        res = df.a.map(func_series_map)\n>       res_parallel = df.a.parallel_map(func_series_map)\n\ntests/test_pandarallel.py:244: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\n\nself = 0    1.108444\nName: a, dtype: float64, name = 'parallel_map'\n\n    @final\n    def __getattr__(self, name: str):\n        \"\"\"\n        After regular attribute access, try looking up the name\n        This allows simpler access to columns for interactive use.\n        \"\"\"\n        # Note: obj.x will always call obj.__getattribute__('x') prior to\n        # calling obj.__getattr__('x').\n        if (\n            name not in self._internal_names_set\n            and name not in self._metadata\n            and name not in self._accessors\n            and self._info_axis._can_hold_identifiers_and_holds_name(name)\n        ):\n            return self[name]\n>       return object.__getattribute__(self, name)\nE       AttributeError: 'Series' object has no attribute 'parallel_map'\n\n../../../../anaconda3/envs/py39/lib/python3.9/site-packages/pandas/core/generic.py:6204: AttributeError\n--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n______________________________________________________________________________ test_series_map[anonymous-1-True-None] _______________________________________________________________________________\n\npandarallel_init = None, func_series_map = <function func_series_map.<locals>.<lambda> at 0x7ab850f328b0>, df_size = 1\n\n    def test_series_map(pandarallel_init, func_series_map, df_size):\n        df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n    \n        res = df.a.map(func_series_map)\n>       res_parallel = df.a.parallel_map(func_series_map)\n\ntests/test_pandarallel.py:244: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\n\nself = 0    1.478144\nName: a, dtype: float64, name = 'parallel_map'\n\n    @final\n    def __getattr__(self, name: str):\n        \"\"\"\n        After regular attribute access, try looking up the name\n        This allows simpler access to columns for interactive use.\n        \"\"\"\n        # Note: obj.x will always call obj.__getattribute__('x') prior to\n        # calling obj.__getattr__('x').\n        if (\n            name not in self._internal_names_set\n            and name not in self._metadata\n            and name not in self._accessors\n            and self._info_axis._can_hold_identifiers_and_holds_name(name)\n        ):\n            return self[name]\n>       return object.__getattribute__(self, name)\nE       AttributeError: 'Series' object has no attribute 'parallel_map'\n\n../../../../anaconda3/envs/py39/lib/python3.9/site-packages/pandas/core/generic.py:6204: AttributeError\n--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n______________________________________________________________________________ test_series_map[anonymous-1-True-False] ______________________________________________________________________________\n\npandarallel_init = None, func_series_map = <function func_series_map.<locals>.<lambda> at 0x7ab850f32d30>, df_size = 1\n\n    def test_series_map(pandarallel_init, func_series_map, df_size):\n        df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n    \n        res = df.a.map(func_series_map)\n>       res_parallel = df.a.parallel_map(func_series_map)\n\ntests/test_pandarallel.py:244: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\n\nself = 0    1.845478\nName: a, dtype: float64, name = 'parallel_map'\n\n    @final\n    def __getattr__(self, name: str):\n        \"\"\"\n        After regular attribute access, try looking up the name\n        This allows simpler access to columns for interactive use.\n        \"\"\"\n        # Note: obj.x will always call obj.__getattribute__('x') prior to\n        # calling obj.__getattr__('x').\n        if (\n            name not in self._internal_names_set\n            and name not in self._metadata\n            and name not in self._accessors\n            and self._info_axis._can_hold_identifiers_and_holds_name(name)\n        ):\n            return self[name]\n>       return object.__getattribute__(self, name)\nE       AttributeError: 'Series' object has no attribute 'parallel_map'\n\n../../../../anaconda3/envs/py39/lib/python3.9/site-packages/pandas/core/generic.py:6204: AttributeError\n--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n========================================================================================= warnings summary ==========================================================================================\ntests/test_pandarallel.py: 16 warnings\n  /home/user/Documents/repoben/buggycode/nalepae_pandarallel/tests/test_pandarallel.py:235: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n    res = df.applymap(func_dataframe_applymap)\n\n-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html\n====================================================================================== short test summary info ======================================================================================\nFAILED tests/test_pandarallel.py::test_series_map[named-1000-False-None] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[named-1000-False-False] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[named-1000-True-None] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[named-1000-True-False] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[named-1-False-None] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[named-1-False-False] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[named-1-True-None] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[named-1-True-False] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[anonymous-1000-False-None] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[anonymous-1000-False-False] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[anonymous-1000-True-None] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[anonymous-1000-True-False] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[anonymous-1-False-None] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[anonymous-1-False-False] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[anonymous-1-True-None] - AttributeError: 'Series' object has no attribute 'parallel_map'\nFAILED tests/test_pandarallel.py::test_series_map[anonymous-1-True-False] - AttributeError: 'Series' object has no attribute 'parallel_map'\n============================================================================ 16 failed, 201 passed, 16 warnings in 8.21s ============================================================================",
    "CodeBase": [
      {
        "path": "nalepae_pandarallel/tests/test_pandarallel.py",
        "content": "1 import importlib\n2 import math\n3 \n4 import numpy as np\n5 import pandas as pd\n6 import pytest\n7 from pandarallel import pandarallel\n8 \n9 \n10 @pytest.fixture(params=(1000, 1))\n11 def df_size(request):\n12     return request.param\n13 \n14 \n15 @pytest.fixture(params=(False, True))\n16 def progress_bar(request):\n17     return request.param\n18 \n19 \n20 @pytest.fixture(params=(None, False))\n21 def use_memory_fs(request):\n22     return request.param\n23 \n24 \n25 @pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\n26 def exception(request):\n27     return request.param\n28 \n29 \n30 @pytest.fixture(params=(\"named\", \"anonymous\"))\n31 def func_dataframe_apply_axis_0(request):\n32     def func(x):\n33         return max(x) - min(x)\n34 \n35     return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n36 \n37 \n38 @pytest.fixture(params=(\"named\", \"anonymous\"))\n39 def func_dataframe_apply_axis_1(request):\n40     def func(x):\n41         return math.sin(x.a**2) + math.sin(x.b**2)\n42 \n43     return dict(\n44         named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n45     )[request.param]\n46 \n47 \n48 @pytest.fixture(params=(\"named\", \"anonymous\"))\n49 def func_dataframe_applymap(request):\n50     def func(x):\n51         return math.sin(x**2) - math.cos(x**2)\n52 \n53     return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n54         request.param\n55     ]\n56 \n57 \n58 @pytest.fixture(params=(\"named\", \"anonymous\"))\n59 def func_series_map(request):\n60     def func(x):\n61         return math.log10(math.sqrt(math.exp(x**2)))\n62 \n63     return dict(\n64         named=func, anonymous=lambda x: math.(...truncated)"
      },
      {
        "path": "nalepae_pandarallel/pandarallel/core.py",
        "content": "1 import multiprocessing\n2 import os\n3 import pickle\n4 from itertools import count\n5 from multiprocessing.managers import SyncManager\n6 from pathlib import Path\n7 from tempfile import NamedTemporaryFile\n8 from typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, cast\n9 \n10 import dill\n11 import pandas as pd\n12 import psutil\n13 from pandas.core.groupby import DataFrameGroupBy as PandaDataFrameGroupBy\n14 from pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\n15 from pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n16 \n17 from .data_types import (\n18     DataFrame,\n19     DataFrameGroupBy,\n20     DataType,\n21     ExpandingGroupBy,\n22     RollingGroupBy,\n23     Series,\n24     SeriesRolling,\n25 )\n26 from .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\n27 from .utils import WorkerStatus\n28 \n29 ON_WINDOWS = os.name == \"nt\"\n30 CONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n31 \n32 # Root of Memory File System\n33 MEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n34 \n35 # By default, Pand(...truncated)"
      },
      {
        "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
        "content": "1 import multiprocessing\n2 import os\n3 import shutil\n4 import sys\n5 from abc import ABC, abstractmethod\n6 from enum import Enum\n7 from itertools import count\n8 from time import time_ns\n9 from typing import Callable, List, Union\n10 \n11 from .utils import WorkerStatus\n12 \n13 INTERVAL_NS = 250_000_000  # 0.25 sec\n14 MINIMUM_TERMINAL_WIDTH = 72\n15 \n16 \n17 class ProgressBarsType(int, Enum):\n18     No = 0\n19     InUserDefinedFunction = 1\n20     InUserDefinedFunctionMultiplyByNumberOfColumns = 2\n21     InWorkFunction = 3\n22 \n23 \n24 class ProgressBars(ABC):\n25     @abstractmethod\n26     def __init__(self, maxs: List[int], show: bool) -> None:\n27         ...\n28 \n29     @abstractmethod\n30     def update(self, values: List[int]) -> None:\n31         ...\n32 \n33     def set_error(self, index: int) -> None:\n34         pass\n35 \n36 \n37 class ProgressState(...truncated)"
      },
      {
        "path": "nalepae_pandarallel/pandarallel/utils.py",
        "content": "1 import itertools\n2 from enum import Enum\n3 from typing import Any, Dict, List, Tuple\n4 \n5 import pandas as pd\n6 from pandas import DataFrame, Index\n7 \n8 \n9 def chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n10     \"\"\"\n11     Return `nb_chunks` slices of approximatively `nb_item / nb_chunks` each.\n12 \n13     Parameters\n14     ----------\n15     nb_item : int\n16         Total number of items\n17 \n18     nb_chunks : int\n19         Number of chunks to return\n20 \n21     start_offset : int\n22         Shift start of slice by this amount\n23 \n24  (...truncated)"
      }
    ],
    "CommitSHA": "261a652cddb219ac353ff803e81646c08b72fc6f"
  },
  "Score": {
    "Difficulty": "Easy",
    "issue_origin": {
      "Title": 8,
      "Description": 6,
      "Reproducibility": 6,
      "Relevance": 8,
      "Explanation": 7,
      "Overall": 7
    },
    "issue_message": {
      "Title": 8,
      "Description": 5,
      "Reproducibility": 4,
      "Relevance": 8,
      "Explanation": 8,
      "Overall": 7
    },
    "issue_ground": {
      "Title": 8,
      "Description": 7,
      "Reproducibility": 5,
      "Relevance": 8,
      "Explanation": 7,
      "Overall": 7
    },
    "issue_ground_truth": {
      "title": "Support for Parallel Map on Pandas Series",
      "description": "### Problem Description\nCurrently, when I work with large Pandas Series and try to use the `map` function in parallel to speed up the computation, I notice that there is no native support for this functionality in the Pandarallel library. This lack of support burdens users with manually implementing their parallel map logic or falling back to single-threaded performance, which is not efficient for large datasets.\n\n### Expected Behavior\nIt would be very helpful if Pandarallel could provide a `parallel_map` method for Pandas Series, similar to how `parallel_apply` works. The `parallel_map` should function efficiently with support for multiple workers and optionally show progress bars to help track the task's progress.\n\n### Impact\nWithout this feature, users are left with suboptimal performance for mapping functions over large Series datasets. This results in slower data processing and can significantly hinder workflows that rely on heavy computations spread over large Pandas Series.\n\n### Suggested Solution\nImplement a `parallel_map` method for Pandas Series within Pandarallel. This method should leverage multiple workers for parallel computation and include options to support progress bars for user-defined functions.\n\nThank you for considering this enhancement.",
      "explanation": "### Summary of the Issue\n\nThe issue at hand is the lack of support for a `parallel_map` function for Pandas Series in the Pandarallel library. This function is intended to enable the parallel execution of the `map` method over large Pandas Series objects, leveraging multiple workers to speed up the process. Without this feature, users dealing with extensive datasets have to resort to single-threaded execution, which is less efficient and can significantly slow down data processing workflows when mapping functions over large Series datasets.\n\n### Problem Description\n\nCurrently, Pandarallel does not provide native support for executing the `map` function in parallel on Pandas Series. This absence means that users wanting to utilize parallel computing for the `map` function have to implement custom logic, which is cumbersome and inefficient. The expected behavior is for Pandarallel to include a `parallel_map` method for Pandas Series, which would operate similarly to the existing `parallel_apply` method, taking advantage of multiple workers and optionally displaying progress bars to track task progress.\n\n### Commit Details\n\nThe commit addresses the issue by making modifications to the core files of Pandarallel to integrate a new `parallel_map` method for Pandas Series. This involves:\n\n1. **Defining the `parallel_map` Function**: A function that leverages the already existing parallelism infrastructure in Pandarallel to handle the mapping operations across multiple workers.\n2. **Adding the `parallel_map` Method to the Series Class**: Registering the new `parallel_map` function to the Pandas Series class so that it can be invoked directly on Pandas Series objects.\n3. **Testing**: Updating the test suite to include cases that validate the functionality of the new `parallel_map` method. This ensures that the new method works correctly and is integrated seamlessly into the Pandarallel library.\n\n### Explanation of the Cause and Solution\n\n#### Cause of the Issue\n\nThe primary cause of the issue is the lack of a method within Pandarallel that supports parallel execution of the `map` function on Pandas Series. While Pandarallel offers `parallel_apply` for DataFrame and Series, and some groupby operations, it did not specifically provide a `parallel_map` method for Series. This gap forced users to either use less efficient single-threaded mapping or build custom parallelization logic, both of which are not ideal solutions.\n\n#### Solution from the Developer's Perspective\n\nThe developer's solution involves expanding the Pandarallel library to support the `parallel_map` operation on Pandas Series by:\n1. **Leveraging Existing Infrastructure**: Utilizing existing parallelization mechanisms in Pandarallel ensures consistency and minimizes the effort required to add new functionalities. The infrastructure for splitting tasks into chunks, distributing them across workers, and aggregating results is reused for the `parallel_map` method.\n2. **Implementing the `parallel_map` Function**: Creating a function that can slice the data into chunks, apply the map function in parallel across these chunks, and then combine the results. This function is designed to be efficient and handle large datasets effectively.\n3. **Registering the Function**: Adding the new `parallel_map` method to the Pandas Series class makes it easily accessible for users, similar to any other method provided by Pandarallel.\n4. **Testing and Validation**: Extensively testing the new method to ensure that it operates correctly under various conditions (e.g., different sizes of data, varying numbers of workers, different user-defined functions). This guarantees that users can rely on the new method for their data processing tasks without encountering errors.\n\n### Detailed Solution Explanation\n\nThe commit solves the issue by providing a robust and user-friendly workaround for parallelizing mapping operations over large Pandas Series. Here's a step-by-step breakdown of how the solution works:\n\n1. **Function Definition and Integration**:\n   - **Parallelizing the Map**: A function is created to break down the Series into manageable chunks, distribute these chunks across multiple worker processes, apply the user-defined mapping function to each chunk, and gather the results.\n   - **Parallel Execution**: By leveraging the multiprocessing capabilities already present in Pandarallel, the new function can distribute the workload effectively, making use of multiple CPU cores to perform the map operation concurrently.\n   \n2. **Seamless Integration**:\n   - The function is registered with the Pandas Series class during the initialization of Pandarallel. This allows users to call `parallel_map` directly on any Pandas Series object, just as they would with `parallel_apply`.\n   \n3. **Progress Bar Support**:\n   - An optional feature is included to display progress bars, providing visual feedback on the computation's progress. This is particularly useful for long-running operations, helping users understand how far along the process is.\n\n4. **Testing Framework**:\n   - The testing framework is updated to include cases specifically for the `parallel_map` function. Various scenarios are covered to ensure the function performs correctly, handles edge cases, and integrates seamlessly with the existing Pandarallel functionality.\n\n### Conclusion\n\nIn summary, to address"
    }
  }
}