{
  "RepoName": "https://github.com/nalepae/pandarallel.git",
  "CommitSHA": "261a652cddb219ac353ff803e81646c08b72fc6f",
  "Time": "",
  "Difficulty": "Medium",
  "Type": "undefined objects",
  "BuggyCode": [
    {
      "path": "nalepae_pandarallel/setup.py",
      "content": "from setuptools import setup\n\nsetup()\n"
    },
    {
      "path": "nalepae_pandarallel/tests/test_pandarallel.py",
      "content": "import importlib\nimport math\n\nimport numpy as np\nimport pandas as pd\nimport pytest\nfrom pandarallel import pandarallel\n\n\n@pytest.fixture(params=(1000, 1))\ndef df_size(request):\n    return request.param\n\n\n@pytest.fixture(params=(False, True))\ndef progress_bar(request):\n    return request.param\n\n\n@pytest.fixture(params=(None, False))\ndef use_memory_fs(request):\n    return request.param\n\n\n@pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\ndef exception(request):\n    return request.param\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0(request):\n    def func(x):\n        return max(x) - min(x)\n\n    return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1(request):\n    def func(x):\n        return math.sin(x.a**2) + math.sin(x.b**2)\n\n    return dict(\n        named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_applymap(request):\n    def func(x):\n        return math.sin(x**2) - math.cos(x**2)\n\n    return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n        request.param\n    ]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_map(request):\n    def func(x):\n        return math.log10(math.sqrt(math.exp(x**2)))\n\n    return dict(\n        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_apply(request):\n    def func(x, power, bias=0):\n        return math.log10(math.sqrt(math.exp(x**power))) + bias\n\n    return dict(\n        named=func,\n        anonymous=lambda x, power, bias=0: math.log10(math.sqrt(math.exp(x**power)))\n        + bias,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply():\n    def func(df):\n        dum = 0\n        for item in df.b:\n            dum += math.log10(math.sqrt(math.exp(item**2)))\n\n        return dum / len(df.b)\n\n    return func\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply_complex():\n    def func(df):\n        return pd.DataFrame(\n            [[df.b.mean(), df.b.min(), df.b.max()]],\n            columns=[\"b_mean\", \"b_min\", \"b_max\"],\n        )\n\n    return func\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_expanding_apply(request):\n    def func(x):\n        return (x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))).sum()\n\n    return dict(\n        named=func,\n        anonymous=lambda x: (\n            x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))\n        ).sum(),\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0_no_reduce(request):\n    def func(x):\n        return x\n\n    return dict(named=func, anonymous=lambda x: x)[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1_no_reduce(request):\n    def func(x):\n        return x**2\n\n    return dict(named=func, anonymous=lambda x: x**2)[request.params]\n\n\n@pytest.fixture\ndef pandarallel_init(progress_bar, use_memory_fs):\n    pandarallel.initialize(\n        progress_bar=progress_bar, use_memory_fs=use_memory_fs, nb_workers=2\n    )\n\n\ndef test_dataframe_apply_invalid_function(pandarallel_init, exception):\n    def f(_):\n        raise exception\n\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(exception):\n        df.parallel_apply(f)\n\n\ndef test_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0, df_size):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.randint(1, 8, df_size),\n            d=np.random.rand(df_size),\n            e=np.random.randint(1, 8, df_size),\n            f=np.random.rand(df_size),\n            g=np.random.randint(1, 8, df_size),\n            h=np.random.rand(df_size),\n        )\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_1, axis=1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1, axis=1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_invalid_axis(pandarallel_init):\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(ValueError):\n        df.parallel_apply(lambda x: x, axis=\"invalid\")\n    \ndef test_empty_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\ndef test_empty_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_applymap(pandarallel_init, func_dataframe_applymap, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.applymap(func_dataframe_applymap)\n    res_parallel = df.parallel_applymap(func_dataframe_applymap)\n    assert res.equals(res_parallel)\n\n\ndef test_series_map(pandarallel_init, func_series_map, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.map(func_series_map)\n    res_parallel = df.a.parallel_map(func_series_map)\n    assert res.equals(res_parallel)\n\n\ndef test_series_apply(pandarallel_init, func_series_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\ndef test_empty_series_apply(pandarallel_init, func_series_apply):\n    df = pd.DataFrame(dict(a=[]))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\n\ndef test_series_rolling_apply(pandarallel_init, func_series_rolling_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.randint(1, 8, df_size), b=list(range(df_size))))\n\n    res = df.b.rolling(4).apply(func_series_rolling_apply, raw=False)\n    res_parallel = df.b.rolling(4).parallel_apply(func_series_rolling_apply, raw=False)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply(\n    pandarallel_init, func_dataframe_groupby_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.rand(df_size),\n        )\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\", \"b\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\", \"b\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply_complex(\n    pandarallel_init, func_dataframe_groupby_apply_complex, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 100, df_size), b=np.random.rand(df_size))\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply_complex)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply_complex)\n    res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_rolling_apply(\n    pandarallel_init, func_dataframe_groupby_rolling_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .parallel_apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_expanding_apply(\n    pandarallel_init, func_dataframe_groupby_expanding_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .parallel_apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res.equals(res_parallel)\n\n\ndef test_dataframe_axis_0_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_0_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n    res = df.apply(func_dataframe_apply_axis_0_no_reduce)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0_no_reduce)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_axis_1_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n\n    res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    assert res.equals(res_parallel)\n\ndef test_memory_fs_root_environment_variable(monkeypatch):\n    monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n    from pandarallel import core\n    importlib.reload(core)\n\n    assert core.MEMORY_FS_ROOT == \"/test\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/utils.py",
      "content": "import itertools\nfrom enum import Enum\nfrom typing import Any, Dict, List, Tuple\n\nimport pandas as pd\nfrom pandas import DataFrame, Index\n\n\ndef chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n    \"\"\"\n    Return `nb_chunks` slices of approximatively `nb_item / nb_chunks` each.\n\n    Parameters\n    ----------\n    nb_item : int\n        Total number of items\n\n    nb_chunks : int\n        Number of chunks to return\n\n    start_offset : int\n        Shift start of slice by this amount\n\n    Returns\n    -------\n    A list of slices\n\n    Examples\n    --------\n    >>> chunks = chunk(103, 4)\n    >>> chunks\n    [slice(0, 26, None), slice(26, 52, None), slice(52, 78, None), slice(78, 103, None)]\n    \"\"\"\n    if nb_item == 0:\n        return [slice(0)]\n    \n    if nb_item <= nb_chunks:\n        return [slice(max(0, idx - start_offset), idx + 1) for idx in range(nb_item)]\n\n    quotient = nb_item // nb_chunks\n    remainder = nb_item % nb_chunks\n\n    quotients = [quotient] * nb_chunks\n    remainders = [1] * remainder + [0] * (nb_chunks - remainder)\n\n    nb_elems_per_chunk = [\n        quotient + remainder for quotient, remainder in zip(quotients, remainders)\n    ]\n\n    accumulated = list(itertools.accumulate(nb_elems_per_chunk))\n    shifted_accumulated = accumulated.copy()\n    shifted_accumulated.insert(0, 0)\n    shifted_accumulated.pop()\n\n    return [\n        slice(max(0, begin - start_offset), end)\n        for begin, end in zip(shifted_accumulated, accumulated)\n    ]\n\n\ndef df_indexed_like(df: DataFrame, axes: List[Index]) -> bool:\n    \"\"\"\n    Returns whether a data frame is indexed in the way specified by the\n    provided axes.\n\n    Used by DataFrameGroupBy to determine whether a group has been modified.\n\n    Function adapted from pandas.core.groupby.ops._is_indexed_like\n\n    Parameters\n    ----------\n    df : DataFrame\n        The data frame in question\n\n    axes : List[Index]\n        The axes to which the data frame is compared\n\n    Returns\n    -------\n    Whether or not the data frame is indexed in the same wa as the axes.\n    \"\"\"\n    if isinstance(df, DataFrame):\n        return df.axes[0].equals(axes[0])\n\n    return False\n\n\ndef get_pandas_version() -> Tuple[int, int]:\n    major_str, minor_str, *_ = pd.__version__.split(\".\")\n    return int(major_str), int(minor_str)\n\n\ndef get_axis_int(user_defined_function_kwargs: Dict[str, Any]):\n    axis = user_defined_function_kwargs.get(\"axis\", 0)\n\n    if axis not in {0, 1, \"index\", \"columns\"}:\n        raise ValueError(f\"No axis named {axis} for object type DataFrame\")\n\n    return {0: 0, 1: 1, \"index\": 0, \"columns\": 1}[axis]\n\n\nclass WorkerStatus(int, Enum):\n    Running = 0\n    Success = 1\n    Error = 2\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/core.py",
      "content": "import multiprocessing\nimport os\nimport pickle\nfrom itertools import count\nfrom multiprocessing.managers import SyncManager\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, cast\n\nimport dill\nimport pandas as pd\nimport psutil\nfrom pandas.core.groupby import DataFrameGroupBy as PandaDataFrameGroupBy\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom .data_types import (\n    DataFrame,\n    DataFrameGroupBy,\n    DataType,\n    ExpandingGroupBy,\n    RollingGroupBy,\n    Series,\n    SeriesRolling,\n)\nfrom .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\nfrom .utils import WorkerStatus\n\nON_WINDOWS = os.name == \"nt\"\nCONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n\n# Root of Memory File System\nMEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n\n# By default, Pandarallel use all available CPUs\nNB_PHYSICAL_CORES = psutil.cpu_count(logical=False)\n\n# Prefix and suffix for files used with Memory File System\nPREFIX = \"pandarallel\"\nPREFIX_INPUT = f\"{PREFIX}_input_\"\nPREFIX_OUTPUT = f\"{PREFIX}_output_\"\nSUFFIX = \".pickle\"\n\n# We use these classes decorators pattern instead of the classic one because of this:\n# https://www.stevenengelhardt.com/2013/01/16/python-multiprocessing-module-and-closures/\n\n\nclass WrapWorkFunctionForFileSystem:\n    def __init__(\n        self,\n        work_function: Callable[\n            [Any, Callable, tuple, Dict[str, Any], Dict[str, Any]], Any\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        input_file_path: Path,\n        output_file_path: Path,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> None:\n        try:\n            # Load dataframe from input file\n            with input_file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            # Delete input file since we don't need it any more. It will free some RAM\n            # since the input file is stored into Shared Memory.\n            input_file_path.unlink()\n\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            result = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            with output_file_path.open(\"wb\") as file_descriptor:\n                pickle.dump(result, file_descriptor)\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\nclass WrapWorkFunctionForPipe:\n    def __init__(\n        self,\n        work_function: Callable[\n            [\n                Any,\n                Callable,\n                tuple,\n                Dict[str, Any],\n                Dict[str, Any],\n            ],\n            Any,\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        data: Any,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        try:\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            results = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n            return results\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\ndef wrap_reduce_function_for_file_system(\n    reduce_function: Callable[[Iterator, Dict[str, Any]], Any]\n) -> Callable[[Iterator[Path], Dict[str, Any]], Any]:\n    \"\"\"This wrapper transforms a `reduce` function which takes as input:\n    - A list of pandas Dataframe\n    - An user defined function\n    and which returns a pandas Dataframe, into a `reduct` function which takes as input:\n    - A list of paths where  pandas Dataframe are pickled\n    which returns a pandas Dataframe.\n    \"\"\"\n\n    def closure(output_file_paths: Iterator[Path], extra: Dict[str, Any]) -> Any:\n        def get_dataframe_and_delete_file(file_path: Path) -> Any:\n            with file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            file_path.unlink()\n            return data\n\n        dfs = (\n            get_dataframe_and_delete_file(output_file_path)\n            for output_file_path in output_file_paths\n        )\n\n        return reduce_function(dfs, extra)\n\n    return closure\n\n\ndef parallelize_with_memory_file_system(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForFileSystem(data_type.work)\n        wrapped_reduce_function = wrap_reduce_function_for_file_system(data_type.reduce)\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        input_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_INPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        output_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_OUTPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        try:\n            for chunk, input_file in zip(chunks, input_files):\n                with Path(input_file.name).open(\"wb\") as file_descriptor:\n                    pickle.dump(chunk, file_descriptor)\n\n            dilled_user_defined_function = dill.dumps(user_defined_function)\n            manager: SyncManager = CONTEXT.Manager()\n            master_workers_queue = manager.Queue()\n\n            work_args_list = [\n                (\n                    Path(input_file.name),\n                    Path(output_file.name),\n                    progress_bars_type,\n                    worker_index,\n                    master_workers_queue,\n                    dilled_user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                    {\n                        **work_extra,\n                        **{\n                            \"master_workers_queue\": master_workers_queue,\n                            \"show_progress_bars\": show_progress_bars,\n                            \"worker_index\": worker_index,\n                        },\n                    },\n                )\n                for worker_index, (\n                    input_file,\n                    output_file,\n                ) in enumerate(zip(input_files, output_files))\n            ]\n\n            pool = CONTEXT.Pool(nb_workers)\n            results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n\n            pool.close()\n\n            generation = count()\n\n            while any(\n                (\n                    worker_status == WorkerStatus.Running\n                    for worker_status in workers_status\n                )\n            ):\n                message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n                worker_index, worker_status, payload = message\n                workers_status[worker_index] = worker_status\n\n                if worker_status == WorkerStatus.Success:\n                    progresses[worker_index] = progresses_length[worker_index]\n                    progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Running:\n                    progress = cast(int, payload)\n                    progresses[worker_index] = progress\n\n                    if next(generation) % nb_workers == 0:\n                        progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Error:\n                    progress_bars.set_error(worker_index)\n                    progress_bars.update(progresses)\n\n            try:\n                return wrapped_reduce_function(\n                    (Path(output_file.name) for output_file in output_files),\n                    reduce_extra,\n                )\n            except EOFError:\n                # Loading the files failed, this most likely means that there\n                # was some error during processing and the files were never\n                # saved at all.\n                results_promise.get()\n\n                # If the above statement does not raise an exception, that\n                # means the multiprocessing went well and we want to re-raise\n                # the original EOFError.\n                raise\n\n        finally:\n            for output_file in output_files:\n                # When pandarallel stop supporting Python 3.7 and older, replace this\n                # try/except clause by:\n                # Path(output_file.name).unlink(missing_ok=True)\n                try:\n                    Path(output_file.name).unlink()\n                except FileNotFoundError:\n                    # Do nothing, this is the nominal case.\n                    pass\n\n    return closure\n\n\ndef parallelize_with_pipe(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForPipe(data_type.work)\n        dilled_user_defined_function = dill.dumps(user_defined_function)\n        manager: SyncManager = CONTEXT.Manager()\n        master_workers_queue = manager.Queue()\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        work_args_list = [\n            (\n                chunk,\n                progress_bars_type,\n                worker_index,\n                master_workers_queue,\n                dilled_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                {\n                    **work_extra,\n                    **{\n                        \"master_workers_queue\": master_workers_queue,\n                        \"show_progress_bars\": show_progress_bars,\n                        \"worker_index\": worker_index,\n                    },\n                },\n            )\n            for worker_index, chunk in enumerate(chunks)\n        ]\n\n        pool = CONTEXT.Pool(nb_workers)\n        results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n        pool.close()\n\n        generation = count()\n\n        while any(\n            (worker_status == WorkerStatus.Running for worker_status in workers_status)\n        ):\n            message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n            worker_index, worker_status, payload = message\n            workers_status[worker_index] = worker_status\n\n            if worker_status == WorkerStatus.Success:\n                progresses[worker_index] = progresses_length[worker_index]\n                progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Running:\n                progress = cast(int, payload)\n                progresses[worker_index] = progress\n\n                if next(generation) % nb_workers == 0:\n                    progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Error:\n                progress_bars.set_error(worker_index)\n\n        results = results_promise.get()\n\n        return data_type.reduce(results, reduce_extra)\n\n    return closure\n\n\nclass pandarallel:\n    @classmethod\n    def initialize(\n        cls,\n        shm_size_mb=None,\n        nb_workers=NB_PHYSICAL_CORES,\n        progress_bar=False,\n        verbose=2,\n        use_memory_fs: Optional[bool] = None,\n    ) -> None:\n        show_progress_bars = progress_bar\n        is_memory_fs_available = Path(MEMORY_FS_ROOT).exists()\n\n        use_memory_fs = (\n            use_memory_fs if use_memory_fs is not None else is_memory_fs_available\n        )\n\n        parallelize = (\n            parallelize_with_memory_file_system\n            if use_memory_fs\n            else parallelize_with_pipe\n        )\n\n        if use_memory_fs and not is_memory_fs_available:\n            raise SystemError(\"Memory file system is not available\")\n\n        if verbose >= 2:\n            print(f\"INFO: Pandarallel will run on {nb_workers} workers.\")\n\n            message = (\n                (\n                    \"INFO: Pandarallel will use Memory file system to transfer data \"\n                    \"between the main process and workers.\"\n                )\n                if use_memory_fs\n                else (\n                    \"INFO: Pandarallel will use standard multiprocessing data transfer \"\n                    \"(pipe) to transfer data between the main process and workers.\"\n                )\n            )\n\n            print(message)\n\n            if ON_WINDOWS and verbose >= 2:\n                print()\n                print(\n                    (\n                        \"WARNING: You are on Windows. If you detect any issue with \"\n                        \"pandarallel, be sure you checked out the Troubleshooting page:\"\n                    )\n                )\n                print(\"https://nalepae.github.io/pandarallel/troubleshooting/\")\n\n        progress_bars_in_user_defined_function = (\n            ProgressBarsType.InUserDefinedFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_user_defined_function_multiply_by_number_of_columns = (\n            ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_work_function = (\n            ProgressBarsType.InWorkFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        # DataFrame\n        pd.DataFrame.parallel_apply = parallelize(\n            nb_workers, DataFrame.Apply, progress_bars_in_user_defined_function\n        )\n        pd.DataFrame.parallel_applymap = parallelize(\n            nb_workers,\n            DataFrame.ApplyMap,\n            progress_bars_in_user_defined_function_multiply_by_number_of_columns,\n        )\n\n        # DataFrame GroupBy\n        PandaDataFrameGroupBy.parallel_apply = parallelize(\n            nb_workers, DataFrameGroupBy.Apply, progress_bars_in_user_defined_function\n        )\n\n        # Expanding GroupBy\n        PandasExpandingGroupby.parallel_apply = parallelize(\n            nb_workers, ExpandingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Rolling GroupBy\n        PandasRollingGroupby.parallel_apply = parallelize(\n            nb_workers, RollingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Series\n        pd.Series.parallel_apply = parallelize(\n            nb_workers, Series.Apply, progress_bars_in_user_defined_function\n        )\n        pd.Series.parallel_map = parallelize(nb_workers, Series.Map, show_progress_bars)\n\n        # Series Rolling\n        pd.core.window.Rolling.parallel_apply = parallelize(\n            nb_workers, SeriesRolling.Apply, progress_bars_in_user_defined_function\n        )\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
      "content": "import multiprocessing\nimport os\nimport shutil\nimport sys\nfrom abc import ABC, abstractmethod\nfrom enum import Enum\nfrom itertools import count\nfrom time import time_ns\nfrom typing import Callable, List, Union\n\nfrom .utils import WorkerStatus\n\nINTERVAL_NS = 250_000_000  # 0.25 sec\nMINIMUM_TERMINAL_WIDTH = 72\n\n\nclass ProgressBarsType(int, Enum):\n    No = 0\n    InUserDefinedFunction = 1\n    InUserDefinedFunctionMultiplyByNumberOfColumns = 2\n    InWorkFunction = 3\n\n\nclass ProgressBars(ABC):\n    @abstractmethod\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        ...\n\n    @abstractmethod\n    def update(self, values: List[int]) -> None:\n        ...\n\n    def set_error(self, index: int) -> None:\n        pass\n\n\nclass ProgressState:\n    def __init__(self, chunk_size: int) -> None:\n        self.last_put_iteration = 0\n        self.next_put_iteration = max(chunk_size // 100, 1)\n        self.last_put_time = time_ns()\n\n\ndef is_notebook_lab() -> bool:\n    try:\n        shell: str = get_ipython().__class__.__name__  # type: ignore\n\n        # Shell: Google Colab\n        # TerminalInteractiveShell: Terminal running IPython\n        # ZMQInteractiveShell: Jupyter notebook/lab or qtconsole\n        return shell in {\"Shell\", \"ZMQInteractiveShell\"}\n    except NameError:\n        # Probably standard Python interpreter\n        return False\n\n\nclass ProgressBarsConsole(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        self.__show = show\n        self.__bars = [[0, max] for max in maxs]\n        self.__width = self.__get_width()\n\n        self.__lines = self.__update_lines()\n\n        if show:\n            sys.stdout.write(\"\\n\".join(self.__lines))\n            sys.stdout.flush()\n\n    def __get_width(self) -> int:\n        try:\n            columns = shutil.get_terminal_size().columns\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except AttributeError:\n            # Python 2\n            pass\n\n        try:\n            columns = int(os.popen(\"stty size\", \"r\").read().split()[1])\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except:\n            return MINIMUM_TERMINAL_WIDTH\n\n    def __remove_displayed_lines(self) -> None:\n        if len(self.__bars) >= 1:\n            sys.stdout.write(\"\\b\" * len(self.__lines[-1]))\n\n        if len(self.__bars) >= 2:\n            sys.stdout.write(\"\\033M\" * (len(self.__lines) - 1))\n\n        self.__lines = []\n\n    def __update_line(self, done: int, total: int) -> str:\n        if total == 0:\n            percent = 0\n        else:\n            percent = done / total\n        bar = (\":\" * int(percent * 40)).ljust(40, \" \")\n        percent = round(percent * 100, 2)\n        format = \" {percent:6.2f}% {bar:s} | {done:8d} / {total:8d} |\"\n        ret = format.format(percent=percent, bar=bar, done=done, total=total)\n        return ret[: self.__width].ljust(self.__width, \" \")\n\n    def __update_lines(self) -> List[str]:\n        return [self.__update_line(value, max) for value, max in self.__bars]\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            self.__bars[index][0] = value\n\n        self.__remove_displayed_lines()\n        self.__lines = self.__update_lines()\n\n        sys.stdout.write(\"\\n\".join(self.__lines))\n        sys.stdout.flush()\n\n\nclass ProgressBarsNotebookLab(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        \"\"\"Initialization.\n        Positional argument:\n        maxs - List containing the max value of each progress bar\n        \"\"\"\n        self.__show = show\n\n        if not show:\n            return\n\n        from IPython.display import display\n        from ipywidgets import HBox, IntProgress, Label, VBox\n\n        self.__bars = [\n            HBox(\n                [\n                    IntProgress(0, 0, max, description=\"{:.2f}%\".format(0)),\n                    Label(\"{} / {}\".format(0, max)),\n                ]\n            )\n            for max in maxs\n        ]\n\n        display(VBox(self.__bars))\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            bar, label = self.__bars[index].children\n\n            label.value = \"{} / {}\".format(value, bar.max)\n            \n            bar.value = value\n\n            if value >= bar.max:\n                bar.bar_style = \"success\"\n\n            if bar.max != 0:\n                bar.description = \"{:.2f}%\".format(bar.value / bar.max * 100)\n\n    def set_error(self, index: int) -> None:\n        \"\"\"Set a bar on error\"\"\"\n        if not self.__show:\n            return\n\n        bar, _ = self.__bars[index].children\n        bar.bar_style = \"danger\"\n\n\ndef get_progress_bars(\n    maxs: List[int], show\n) -> Union[ProgressBarsNotebookLab, ProgressBarsConsole]:\n    return (\n        ProgressBarsNotebookLab(maxs, show)\n        if is_notebook_lab()\n        else ProgressBarsConsole(maxs, show)\n    )\n\n\ndef progress_wrapper(\n    user_defined_function: Callable,\n    master_workers_queue: multiprocessing.Queue,\n    index: int,\n    chunk_size: int,\n) -> Callable:\n    \"\"\"Wrap the function to apply in a function which monitor the part of work already\n    done.\n    \"\"\"\n    counter = count()\n    state = ProgressState(chunk_size)\n\n    def closure(*user_defined_function_args, **user_defined_functions_kwargs):\n        iteration = next(counter)\n\n        if iteration == state.next_put_iteration:\n            time_now = time_ns()\n            master_workers_queue.put_nowait((index, WorkerStatus.Running, iteration))\n\n            delta_t = time_now - state.last_put_time\n            delta_i = iteration - state.last_put_iteration\n\n            state.next_put_iteration += (\n                max(int((delta_i / delta_t) * INTERVAL_NS), 1) if delta_t != 0 else 1\n            )\n\n            state.last_put_iteration = iteration\n            state.last_put_time = time_now\n\n        return user_defined_function(\n            *user_defined_function_args, **user_defined_functions_kwargs\n        )\n\n    return closure\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/__init__.py",
      "content": "from .core import pandarallel\n\n__version__ = \"1.6.5\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/expanding_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass ExpandingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasExpandingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasExpandingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.expanding(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass Series:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n\n    class Map(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.map(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series_rolling.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\nfrom pandas.core.window.rolling import Rolling\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass SeriesRolling:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, rolling: Rolling, **kwargs\n        ) -> Iterator[pd.Series]:\n            chunks = chunk(rolling.obj.size, nb_workers, rolling.window)\n\n            for chunk_ in chunks:\n                yield rolling.obj[chunk_]\n\n        @staticmethod\n        def get_work_extra(data: Rolling) -> Dict[str, Any]:\n            return {\n                \"attributes\": {\n                    attribute: getattr(data, attribute)\n                    for attribute in data._attributes\n                }\n            }\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            attributes: Dict[str, Any] = extra[\"attributes\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            result = data.rolling(**attributes).apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n            return result if worker_index == 0 else result[attributes[\"window\"] :]\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/__init__.py",
      "content": "from .dataframe import DataFrame\nfrom .dataframe_groupby import DataFrameGroupBy\nfrom .expanding_groupby import ExpandingGroupBy\nfrom .rolling_groupby import RollingGroupBy\nfrom .generic import DataType\nfrom .series import Series\nfrom .series_rolling import SeriesRolling\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\nfrom types import GeneratorType\n\nimport pandas as pd\n\nfrom ..utils import chunk, get_axis_int\nfrom .generic import DataType\n\n\nclass DataFrame:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            user_defined_function_kwargs = kwargs[\"user_defined_function_kwargs\"]\n\n            axis_int = get_axis_int(user_defined_function_kwargs)\n            opposite_axis_int = 1 - axis_int\n\n            for chunk_ in chunk(data.shape[opposite_axis_int], nb_workers):\n                yield data.iloc[chunk_] if axis_int == 1 else data.iloc[:, chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs,\n            )\n\n        @staticmethod\n        def get_reduce_extra(\n            data: Any, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"axis\": get_axis_int(user_defined_function_kwargs)}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            if isinstance(datas, GeneratorType):\n                datas = list(datas)\n            axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n            return pd.concat(datas, copy=False, axis=axis)\n\n    class ApplyMap(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            for chunk_ in chunk(data.shape[0], nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.applymap(user_defined_function)\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/generic.py",
      "content": "from abc import ABC, abstractmethod\nfrom typing import Any, Callable, Dict, Iterable, Iterator\n\n\nclass DataType(ABC):\n    @staticmethod\n    @abstractmethod\n    def get_chunks(nb_workers: int, data: Any, **kwargs) -> Iterator[Any]:\n        ...\n\n    @staticmethod\n    def get_work_extra(data: Any) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def work(\n        data: Any,\n        user_defined_function: Callable,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        ...\n\n    @staticmethod\n    def get_reduce_extra(\n        data: Any, user_defined_function_kwargs: Dict[str, Any]\n    ) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def reduce(datas: Iterable[Any], extra: Dict[str, Any]) -> Any:\n        ...\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/rolling_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass RollingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasRollingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasRollingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.rolling(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe_groupby.py",
      "content": "import itertools\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple, Union, cast\n\nimport pandas as pd\nfrom pandas.core.groupby.generic import DataFrameGroupBy as PandasDataFrameGroupBy\n\nfrom ..utils import chunk, df_indexed_like, get_pandas_version\nfrom .generic import DataType\n\n\nclass DataFrameGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, dataframe_groupby: PandasDataFrameGroupBy, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            chunks = chunk(dataframe_groupby.ngroups, nb_workers)\n            iterator = iter(dataframe_groupby)\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[Tuple[int, pd.DataFrame, bool]]:\n            def compute_result(\n                key: int, df: pd.DataFrame\n            ) -> Tuple[int, pd.DataFrame, bool]:\n                result = user_defined_function(\n                    df, *user_defined_function_args, **user_defined_function_kwargs\n                )\n                mutated = not df_indexed_like(result, df.axes)\n                return key, result, mutated\n\n            return [compute_result(key, df) for key, df in data]\n\n        @staticmethod\n        def get_reduce_extra(\n            data: PandasDataFrameGroupBy, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"df_groupby\": data}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[List[Tuple[int, pd.DataFrame, bool]]], extra: Dict[str, Any]\n        ) -> pd.Series:\n            def get_args(\n                keys: List[int],\n                values: List[pd.DataFrame],\n                df_groupby: PandasDataFrameGroupBy,\n            ) -> Union[\n                Tuple[List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[pd.DataFrame]],\n            ]:\n                pandas_version = get_pandas_version()\n\n                if pandas_version < (1, 3):\n                    return keys, values\n                elif pandas_version < (1, 4):\n                    return df_groupby._selected_obj, keys, values\n                else:\n                    return df_groupby._selected_obj, values\n\n            df_groupby: PandasDataFrameGroupBy = extra[\"df_groupby\"]\n\n            results = itertools.chain.from_iterable(datas)\n            keys, values, mutated = zip(*results)\n\n            keys = cast(List[int], keys)\n            values = cast(List[pd.DataFrame], values)\n            mutated = cast(List[bool], mutated)\n\n            args = get_args(keys, values, df_groupby)\n  \n            return df_groupby._wrap_applied_output(*args, not_indexed_same=mutated)\n"
    }
  ],
  "OriginCode": [
    {
      "path": "nalepae_pandarallel/setup.py",
      "content": "from setuptools import setup\n\nsetup()\n"
    },
    {
      "path": "nalepae_pandarallel/tests/test_pandarallel.py",
      "content": "import importlib\nimport math\n\nimport numpy as np\nimport pandas as pd\nimport pytest\nfrom pandarallel import pandarallel\n\n\n@pytest.fixture(params=(1000, 1))\ndef df_size(request):\n    return request.param\n\n\n@pytest.fixture(params=(False, True))\ndef progress_bar(request):\n    return request.param\n\n\n@pytest.fixture(params=(None, False))\ndef use_memory_fs(request):\n    return request.param\n\n\n@pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\ndef exception(request):\n    return request.param\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0(request):\n    def func(x):\n        return max(x) - min(x)\n\n    return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1(request):\n    def func(x):\n        return math.sin(x.a**2) + math.sin(x.b**2)\n\n    return dict(\n        named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_applymap(request):\n    def func(x):\n        return math.sin(x**2) - math.cos(x**2)\n\n    return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n        request.param\n    ]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_map(request):\n    def func(x):\n        return math.log10(math.sqrt(math.exp(x**2)))\n\n    return dict(\n        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_apply(request):\n    def func(x, power, bias=0):\n        return math.log10(math.sqrt(math.exp(x**power))) + bias\n\n    return dict(\n        named=func,\n        anonymous=lambda x, power, bias=0: math.log10(math.sqrt(math.exp(x**power)))\n        + bias,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply():\n    def func(df):\n        dum = 0\n        for item in df.b:\n            dum += math.log10(math.sqrt(math.exp(item**2)))\n\n        return dum / len(df.b)\n\n    return func\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply_complex():\n    def func(df):\n        return pd.DataFrame(\n            [[df.b.mean(), df.b.min(), df.b.max()]],\n            columns=[\"b_mean\", \"b_min\", \"b_max\"],\n        )\n\n    return func\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_expanding_apply(request):\n    def func(x):\n        return (x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))).sum()\n\n    return dict(\n        named=func,\n        anonymous=lambda x: (\n            x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))\n        ).sum(),\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0_no_reduce(request):\n    def func(x):\n        return x\n\n    return dict(named=func, anonymous=lambda x: x)[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1_no_reduce(request):\n    def func(x):\n        return x**2\n\n    return dict(named=func, anonymous=lambda x: x**2)[request.param]\n\n\n@pytest.fixture\ndef pandarallel_init(progress_bar, use_memory_fs):\n    pandarallel.initialize(\n        progress_bar=progress_bar, use_memory_fs=use_memory_fs, nb_workers=2\n    )\n\n\ndef test_dataframe_apply_invalid_function(pandarallel_init, exception):\n    def f(_):\n        raise exception\n\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(exception):\n        df.parallel_apply(f)\n\n\ndef test_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0, df_size):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.randint(1, 8, df_size),\n            d=np.random.rand(df_size),\n            e=np.random.randint(1, 8, df_size),\n            f=np.random.rand(df_size),\n            g=np.random.randint(1, 8, df_size),\n            h=np.random.rand(df_size),\n        )\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_1, axis=1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1, axis=1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_invalid_axis(pandarallel_init):\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(ValueError):\n        df.parallel_apply(lambda x: x, axis=\"invalid\")\n    \ndef test_empty_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\ndef test_empty_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_applymap(pandarallel_init, func_dataframe_applymap, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.applymap(func_dataframe_applymap)\n    res_parallel = df.parallel_applymap(func_dataframe_applymap)\n    assert res.equals(res_parallel)\n\n\ndef test_series_map(pandarallel_init, func_series_map, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.map(func_series_map)\n    res_parallel = df.a.parallel_map(func_series_map)\n    assert res.equals(res_parallel)\n\n\ndef test_series_apply(pandarallel_init, func_series_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\ndef test_empty_series_apply(pandarallel_init, func_series_apply):\n    df = pd.DataFrame(dict(a=[]))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\n\ndef test_series_rolling_apply(pandarallel_init, func_series_rolling_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.randint(1, 8, df_size), b=list(range(df_size))))\n\n    res = df.b.rolling(4).apply(func_series_rolling_apply, raw=False)\n    res_parallel = df.b.rolling(4).parallel_apply(func_series_rolling_apply, raw=False)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply(\n    pandarallel_init, func_dataframe_groupby_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.rand(df_size),\n        )\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\", \"b\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\", \"b\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply_complex(\n    pandarallel_init, func_dataframe_groupby_apply_complex, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 100, df_size), b=np.random.rand(df_size))\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply_complex)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply_complex)\n    res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_rolling_apply(\n    pandarallel_init, func_dataframe_groupby_rolling_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .parallel_apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_expanding_apply(\n    pandarallel_init, func_dataframe_groupby_expanding_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .parallel_apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res.equals(res_parallel)\n\n\ndef test_dataframe_axis_0_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_0_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n    res = df.apply(func_dataframe_apply_axis_0_no_reduce)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0_no_reduce)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_axis_1_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n\n    res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    assert res.equals(res_parallel)\n\ndef test_memory_fs_root_environment_variable(monkeypatch):\n    monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n    from pandarallel import core\n    importlib.reload(core)\n\n    assert core.MEMORY_FS_ROOT == \"/test\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/utils.py",
      "content": "import itertools\nfrom enum import Enum\nfrom typing import Any, Dict, List, Tuple\n\nimport pandas as pd\nfrom pandas import DataFrame, Index\n\n\ndef chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n    \"\"\"\n    Return `nb_chunks` slices of approximatively `nb_item / nb_chunks` each.\n\n    Parameters\n    ----------\n    nb_item : int\n        Total number of items\n\n    nb_chunks : int\n        Number of chunks to return\n\n    start_offset : int\n        Shift start of slice by this amount\n\n    Returns\n    -------\n    A list of slices\n\n    Examples\n    --------\n    >>> chunks = chunk(103, 4)\n    >>> chunks\n    [slice(0, 26, None), slice(26, 52, None), slice(52, 78, None), slice(78, 103, None)]\n    \"\"\"\n    if nb_item == 0:\n        return [slice(0)]\n    \n    if nb_item <= nb_chunks:\n        return [slice(max(0, idx - start_offset), idx + 1) for idx in range(nb_item)]\n\n    quotient = nb_item // nb_chunks\n    remainder = nb_item % nb_chunks\n\n    quotients = [quotient] * nb_chunks\n    remainders = [1] * remainder + [0] * (nb_chunks - remainder)\n\n    nb_elems_per_chunk = [\n        quotient + remainder for quotient, remainder in zip(quotients, remainders)\n    ]\n\n    accumulated = list(itertools.accumulate(nb_elems_per_chunk))\n    shifted_accumulated = accumulated.copy()\n    shifted_accumulated.insert(0, 0)\n    shifted_accumulated.pop()\n\n    return [\n        slice(max(0, begin - start_offset), end)\n        for begin, end in zip(shifted_accumulated, accumulated)\n    ]\n\n\ndef df_indexed_like(df: DataFrame, axes: List[Index]) -> bool:\n    \"\"\"\n    Returns whether a data frame is indexed in the way specified by the\n    provided axes.\n\n    Used by DataFrameGroupBy to determine whether a group has been modified.\n\n    Function adapted from pandas.core.groupby.ops._is_indexed_like\n\n    Parameters\n    ----------\n    df : DataFrame\n        The data frame in question\n\n    axes : List[Index]\n        The axes to which the data frame is compared\n\n    Returns\n    -------\n    Whether or not the data frame is indexed in the same wa as the axes.\n    \"\"\"\n    if isinstance(df, DataFrame):\n        return df.axes[0].equals(axes[0])\n\n    return False\n\n\ndef get_pandas_version() -> Tuple[int, int]:\n    major_str, minor_str, *_ = pd.__version__.split(\".\")\n    return int(major_str), int(minor_str)\n\n\ndef get_axis_int(user_defined_function_kwargs: Dict[str, Any]):\n    axis = user_defined_function_kwargs.get(\"axis\", 0)\n\n    if axis not in {0, 1, \"index\", \"columns\"}:\n        raise ValueError(f\"No axis named {axis} for object type DataFrame\")\n\n    return {0: 0, 1: 1, \"index\": 0, \"columns\": 1}[axis]\n\n\nclass WorkerStatus(int, Enum):\n    Running = 0\n    Success = 1\n    Error = 2\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/core.py",
      "content": "import multiprocessing\nimport os\nimport pickle\nfrom itertools import count\nfrom multiprocessing.managers import SyncManager\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, cast\n\nimport dill\nimport pandas as pd\nimport psutil\nfrom pandas.core.groupby import DataFrameGroupBy as PandaDataFrameGroupBy\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom .data_types import (\n    DataFrame,\n    DataFrameGroupBy,\n    DataType,\n    ExpandingGroupBy,\n    RollingGroupBy,\n    Series,\n    SeriesRolling,\n)\nfrom .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\nfrom .utils import WorkerStatus\n\nON_WINDOWS = os.name == \"nt\"\nCONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n\n# Root of Memory File System\nMEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n\n# By default, Pandarallel use all available CPUs\nNB_PHYSICAL_CORES = psutil.cpu_count(logical=False)\n\n# Prefix and suffix for files used with Memory File System\nPREFIX = \"pandarallel\"\nPREFIX_INPUT = f\"{PREFIX}_input_\"\nPREFIX_OUTPUT = f\"{PREFIX}_output_\"\nSUFFIX = \".pickle\"\n\n# We use these classes decorators pattern instead of the classic one because of this:\n# https://www.stevenengelhardt.com/2013/01/16/python-multiprocessing-module-and-closures/\n\n\nclass WrapWorkFunctionForFileSystem:\n    def __init__(\n        self,\n        work_function: Callable[\n            [Any, Callable, tuple, Dict[str, Any], Dict[str, Any]], Any\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        input_file_path: Path,\n        output_file_path: Path,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> None:\n        try:\n            # Load dataframe from input file\n            with input_file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            # Delete input file since we don't need it any more. It will free some RAM\n            # since the input file is stored into Shared Memory.\n            input_file_path.unlink()\n\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            result = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            with output_file_path.open(\"wb\") as file_descriptor:\n                pickle.dump(result, file_descriptor)\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\nclass WrapWorkFunctionForPipe:\n    def __init__(\n        self,\n        work_function: Callable[\n            [\n                Any,\n                Callable,\n                tuple,\n                Dict[str, Any],\n                Dict[str, Any],\n            ],\n            Any,\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        data: Any,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        try:\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            results = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n            return results\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\ndef wrap_reduce_function_for_file_system(\n    reduce_function: Callable[[Iterator, Dict[str, Any]], Any]\n) -> Callable[[Iterator[Path], Dict[str, Any]], Any]:\n    \"\"\"This wrapper transforms a `reduce` function which takes as input:\n    - A list of pandas Dataframe\n    - An user defined function\n    and which returns a pandas Dataframe, into a `reduct` function which takes as input:\n    - A list of paths where  pandas Dataframe are pickled\n    which returns a pandas Dataframe.\n    \"\"\"\n\n    def closure(output_file_paths: Iterator[Path], extra: Dict[str, Any]) -> Any:\n        def get_dataframe_and_delete_file(file_path: Path) -> Any:\n            with file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            file_path.unlink()\n            return data\n\n        dfs = (\n            get_dataframe_and_delete_file(output_file_path)\n            for output_file_path in output_file_paths\n        )\n\n        return reduce_function(dfs, extra)\n\n    return closure\n\n\ndef parallelize_with_memory_file_system(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForFileSystem(data_type.work)\n        wrapped_reduce_function = wrap_reduce_function_for_file_system(data_type.reduce)\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        input_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_INPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        output_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_OUTPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        try:\n            for chunk, input_file in zip(chunks, input_files):\n                with Path(input_file.name).open(\"wb\") as file_descriptor:\n                    pickle.dump(chunk, file_descriptor)\n\n            dilled_user_defined_function = dill.dumps(user_defined_function)\n            manager: SyncManager = CONTEXT.Manager()\n            master_workers_queue = manager.Queue()\n\n            work_args_list = [\n                (\n                    Path(input_file.name),\n                    Path(output_file.name),\n                    progress_bars_type,\n                    worker_index,\n                    master_workers_queue,\n                    dilled_user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                    {\n                        **work_extra,\n                        **{\n                            \"master_workers_queue\": master_workers_queue,\n                            \"show_progress_bars\": show_progress_bars,\n                            \"worker_index\": worker_index,\n                        },\n                    },\n                )\n                for worker_index, (\n                    input_file,\n                    output_file,\n                ) in enumerate(zip(input_files, output_files))\n            ]\n\n            pool = CONTEXT.Pool(nb_workers)\n            results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n\n            pool.close()\n\n            generation = count()\n\n            while any(\n                (\n                    worker_status == WorkerStatus.Running\n                    for worker_status in workers_status\n                )\n            ):\n                message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n                worker_index, worker_status, payload = message\n                workers_status[worker_index] = worker_status\n\n                if worker_status == WorkerStatus.Success:\n                    progresses[worker_index] = progresses_length[worker_index]\n                    progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Running:\n                    progress = cast(int, payload)\n                    progresses[worker_index] = progress\n\n                    if next(generation) % nb_workers == 0:\n                        progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Error:\n                    progress_bars.set_error(worker_index)\n                    progress_bars.update(progresses)\n\n            try:\n                return wrapped_reduce_function(\n                    (Path(output_file.name) for output_file in output_files),\n                    reduce_extra,\n                )\n            except EOFError:\n                # Loading the files failed, this most likely means that there\n                # was some error during processing and the files were never\n                # saved at all.\n                results_promise.get()\n\n                # If the above statement does not raise an exception, that\n                # means the multiprocessing went well and we want to re-raise\n                # the original EOFError.\n                raise\n\n        finally:\n            for output_file in output_files:\n                # When pandarallel stop supporting Python 3.7 and older, replace this\n                # try/except clause by:\n                # Path(output_file.name).unlink(missing_ok=True)\n                try:\n                    Path(output_file.name).unlink()\n                except FileNotFoundError:\n                    # Do nothing, this is the nominal case.\n                    pass\n\n    return closure\n\n\ndef parallelize_with_pipe(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForPipe(data_type.work)\n        dilled_user_defined_function = dill.dumps(user_defined_function)\n        manager: SyncManager = CONTEXT.Manager()\n        master_workers_queue = manager.Queue()\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        work_args_list = [\n            (\n                chunk,\n                progress_bars_type,\n                worker_index,\n                master_workers_queue,\n                dilled_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                {\n                    **work_extra,\n                    **{\n                        \"master_workers_queue\": master_workers_queue,\n                        \"show_progress_bars\": show_progress_bars,\n                        \"worker_index\": worker_index,\n                    },\n                },\n            )\n            for worker_index, chunk in enumerate(chunks)\n        ]\n\n        pool = CONTEXT.Pool(nb_workers)\n        results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n        pool.close()\n\n        generation = count()\n\n        while any(\n            (worker_status == WorkerStatus.Running for worker_status in workers_status)\n        ):\n            message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n            worker_index, worker_status, payload = message\n            workers_status[worker_index] = worker_status\n\n            if worker_status == WorkerStatus.Success:\n                progresses[worker_index] = progresses_length[worker_index]\n                progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Running:\n                progress = cast(int, payload)\n                progresses[worker_index] = progress\n\n                if next(generation) % nb_workers == 0:\n                    progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Error:\n                progress_bars.set_error(worker_index)\n\n        results = results_promise.get()\n\n        return data_type.reduce(results, reduce_extra)\n\n    return closure\n\n\nclass pandarallel:\n    @classmethod\n    def initialize(\n        cls,\n        shm_size_mb=None,\n        nb_workers=NB_PHYSICAL_CORES,\n        progress_bar=False,\n        verbose=2,\n        use_memory_fs: Optional[bool] = None,\n    ) -> None:\n        show_progress_bars = progress_bar\n        is_memory_fs_available = Path(MEMORY_FS_ROOT).exists()\n\n        use_memory_fs = (\n            use_memory_fs if use_memory_fs is not None else is_memory_fs_available\n        )\n\n        parallelize = (\n            parallelize_with_memory_file_system\n            if use_memory_fs\n            else parallelize_with_pipe\n        )\n\n        if use_memory_fs and not is_memory_fs_available:\n            raise SystemError(\"Memory file system is not available\")\n\n        if verbose >= 2:\n            print(f\"INFO: Pandarallel will run on {nb_workers} workers.\")\n\n            message = (\n                (\n                    \"INFO: Pandarallel will use Memory file system to transfer data \"\n                    \"between the main process and workers.\"\n                )\n                if use_memory_fs\n                else (\n                    \"INFO: Pandarallel will use standard multiprocessing data transfer \"\n                    \"(pipe) to transfer data between the main process and workers.\"\n                )\n            )\n\n            print(message)\n\n            if ON_WINDOWS and verbose >= 2:\n                print()\n                print(\n                    (\n                        \"WARNING: You are on Windows. If you detect any issue with \"\n                        \"pandarallel, be sure you checked out the Troubleshooting page:\"\n                    )\n                )\n                print(\"https://nalepae.github.io/pandarallel/troubleshooting/\")\n\n        progress_bars_in_user_defined_function = (\n            ProgressBarsType.InUserDefinedFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_user_defined_function_multiply_by_number_of_columns = (\n            ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_work_function = (\n            ProgressBarsType.InWorkFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        # DataFrame\n        pd.DataFrame.parallel_apply = parallelize(\n            nb_workers, DataFrame.Apply, progress_bars_in_user_defined_function\n        )\n        pd.DataFrame.parallel_applymap = parallelize(\n            nb_workers,\n            DataFrame.ApplyMap,\n            progress_bars_in_user_defined_function_multiply_by_number_of_columns,\n        )\n\n        # DataFrame GroupBy\n        PandaDataFrameGroupBy.parallel_apply = parallelize(\n            nb_workers, DataFrameGroupBy.Apply, progress_bars_in_user_defined_function\n        )\n\n        # Expanding GroupBy\n        PandasExpandingGroupby.parallel_apply = parallelize(\n            nb_workers, ExpandingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Rolling GroupBy\n        PandasRollingGroupby.parallel_apply = parallelize(\n            nb_workers, RollingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Series\n        pd.Series.parallel_apply = parallelize(\n            nb_workers, Series.Apply, progress_bars_in_user_defined_function\n        )\n        pd.Series.parallel_map = parallelize(nb_workers, Series.Map, show_progress_bars)\n\n        # Series Rolling\n        pd.core.window.Rolling.parallel_apply = parallelize(\n            nb_workers, SeriesRolling.Apply, progress_bars_in_user_defined_function\n        )\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
      "content": "import multiprocessing\nimport os\nimport shutil\nimport sys\nfrom abc import ABC, abstractmethod\nfrom enum import Enum\nfrom itertools import count\nfrom time import time_ns\nfrom typing import Callable, List, Union\n\nfrom .utils import WorkerStatus\n\nINTERVAL_NS = 250_000_000  # 0.25 sec\nMINIMUM_TERMINAL_WIDTH = 72\n\n\nclass ProgressBarsType(int, Enum):\n    No = 0\n    InUserDefinedFunction = 1\n    InUserDefinedFunctionMultiplyByNumberOfColumns = 2\n    InWorkFunction = 3\n\n\nclass ProgressBars(ABC):\n    @abstractmethod\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        ...\n\n    @abstractmethod\n    def update(self, values: List[int]) -> None:\n        ...\n\n    def set_error(self, index: int) -> None:\n        pass\n\n\nclass ProgressState:\n    def __init__(self, chunk_size: int) -> None:\n        self.last_put_iteration = 0\n        self.next_put_iteration = max(chunk_size // 100, 1)\n        self.last_put_time = time_ns()\n\n\ndef is_notebook_lab() -> bool:\n    try:\n        shell: str = get_ipython().__class__.__name__  # type: ignore\n\n        # Shell: Google Colab\n        # TerminalInteractiveShell: Terminal running IPython\n        # ZMQInteractiveShell: Jupyter notebook/lab or qtconsole\n        return shell in {\"Shell\", \"ZMQInteractiveShell\"}\n    except NameError:\n        # Probably standard Python interpreter\n        return False\n\n\nclass ProgressBarsConsole(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        self.__show = show\n        self.__bars = [[0, max] for max in maxs]\n        self.__width = self.__get_width()\n\n        self.__lines = self.__update_lines()\n\n        if show:\n            sys.stdout.write(\"\\n\".join(self.__lines))\n            sys.stdout.flush()\n\n    def __get_width(self) -> int:\n        try:\n            columns = shutil.get_terminal_size().columns\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except AttributeError:\n            # Python 2\n            pass\n\n        try:\n            columns = int(os.popen(\"stty size\", \"r\").read().split()[1])\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except:\n            return MINIMUM_TERMINAL_WIDTH\n\n    def __remove_displayed_lines(self) -> None:\n        if len(self.__bars) >= 1:\n            sys.stdout.write(\"\\b\" * len(self.__lines[-1]))\n\n        if len(self.__bars) >= 2:\n            sys.stdout.write(\"\\033M\" * (len(self.__lines) - 1))\n\n        self.__lines = []\n\n    def __update_line(self, done: int, total: int) -> str:\n        if total == 0:\n            percent = 0\n        else:\n            percent = done / total\n        bar = (\":\" * int(percent * 40)).ljust(40, \" \")\n        percent = round(percent * 100, 2)\n        format = \" {percent:6.2f}% {bar:s} | {done:8d} / {total:8d} |\"\n        ret = format.format(percent=percent, bar=bar, done=done, total=total)\n        return ret[: self.__width].ljust(self.__width, \" \")\n\n    def __update_lines(self) -> List[str]:\n        return [self.__update_line(value, max) for value, max in self.__bars]\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            self.__bars[index][0] = value\n\n        self.__remove_displayed_lines()\n        self.__lines = self.__update_lines()\n\n        sys.stdout.write(\"\\n\".join(self.__lines))\n        sys.stdout.flush()\n\n\nclass ProgressBarsNotebookLab(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        \"\"\"Initialization.\n        Positional argument:\n        maxs - List containing the max value of each progress bar\n        \"\"\"\n        self.__show = show\n\n        if not show:\n            return\n\n        from IPython.display import display\n        from ipywidgets import HBox, IntProgress, Label, VBox\n\n        self.__bars = [\n            HBox(\n                [\n                    IntProgress(0, 0, max, description=\"{:.2f}%\".format(0)),\n                    Label(\"{} / {}\".format(0, max)),\n                ]\n            )\n            for max in maxs\n        ]\n\n        display(VBox(self.__bars))\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            bar, label = self.__bars[index].children\n\n            label.value = \"{} / {}\".format(value, bar.max)\n            \n            bar.value = value\n\n            if value >= bar.max:\n                bar.bar_style = \"success\"\n\n            if bar.max != 0:\n                bar.description = \"{:.2f}%\".format(bar.value / bar.max * 100)\n\n    def set_error(self, index: int) -> None:\n        \"\"\"Set a bar on error\"\"\"\n        if not self.__show:\n            return\n\n        bar, _ = self.__bars[index].children\n        bar.bar_style = \"danger\"\n\n\ndef get_progress_bars(\n    maxs: List[int], show\n) -> Union[ProgressBarsNotebookLab, ProgressBarsConsole]:\n    return (\n        ProgressBarsNotebookLab(maxs, show)\n        if is_notebook_lab()\n        else ProgressBarsConsole(maxs, show)\n    )\n\n\ndef progress_wrapper(\n    user_defined_function: Callable,\n    master_workers_queue: multiprocessing.Queue,\n    index: int,\n    chunk_size: int,\n) -> Callable:\n    \"\"\"Wrap the function to apply in a function which monitor the part of work already\n    done.\n    \"\"\"\n    counter = count()\n    state = ProgressState(chunk_size)\n\n    def closure(*user_defined_function_args, **user_defined_functions_kwargs):\n        iteration = next(counter)\n\n        if iteration == state.next_put_iteration:\n            time_now = time_ns()\n            master_workers_queue.put_nowait((index, WorkerStatus.Running, iteration))\n\n            delta_t = time_now - state.last_put_time\n            delta_i = iteration - state.last_put_iteration\n\n            state.next_put_iteration += (\n                max(int((delta_i / delta_t) * INTERVAL_NS), 1) if delta_t != 0 else 1\n            )\n\n            state.last_put_iteration = iteration\n            state.last_put_time = time_now\n\n        return user_defined_function(\n            *user_defined_function_args, **user_defined_functions_kwargs\n        )\n\n    return closure\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/__init__.py",
      "content": "from .core import pandarallel\n\n__version__ = \"1.6.5\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/expanding_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass ExpandingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasExpandingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasExpandingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.expanding(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass Series:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n\n    class Map(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.map(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series_rolling.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\nfrom pandas.core.window.rolling import Rolling\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass SeriesRolling:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, rolling: Rolling, **kwargs\n        ) -> Iterator[pd.Series]:\n            chunks = chunk(rolling.obj.size, nb_workers, rolling.window)\n\n            for chunk_ in chunks:\n                yield rolling.obj[chunk_]\n\n        @staticmethod\n        def get_work_extra(data: Rolling) -> Dict[str, Any]:\n            return {\n                \"attributes\": {\n                    attribute: getattr(data, attribute)\n                    for attribute in data._attributes\n                }\n            }\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            attributes: Dict[str, Any] = extra[\"attributes\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            result = data.rolling(**attributes).apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n            return result if worker_index == 0 else result[attributes[\"window\"] :]\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/__init__.py",
      "content": "from .dataframe import DataFrame\nfrom .dataframe_groupby import DataFrameGroupBy\nfrom .expanding_groupby import ExpandingGroupBy\nfrom .rolling_groupby import RollingGroupBy\nfrom .generic import DataType\nfrom .series import Series\nfrom .series_rolling import SeriesRolling\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\nfrom types import GeneratorType\n\nimport pandas as pd\n\nfrom ..utils import chunk, get_axis_int\nfrom .generic import DataType\n\n\nclass DataFrame:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            user_defined_function_kwargs = kwargs[\"user_defined_function_kwargs\"]\n\n            axis_int = get_axis_int(user_defined_function_kwargs)\n            opposite_axis_int = 1 - axis_int\n\n            for chunk_ in chunk(data.shape[opposite_axis_int], nb_workers):\n                yield data.iloc[chunk_] if axis_int == 1 else data.iloc[:, chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs,\n            )\n\n        @staticmethod\n        def get_reduce_extra(\n            data: Any, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"axis\": get_axis_int(user_defined_function_kwargs)}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            if isinstance(datas, GeneratorType):\n                datas = list(datas)\n            axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n            return pd.concat(datas, copy=False, axis=axis)\n\n    class ApplyMap(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            for chunk_ in chunk(data.shape[0], nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.applymap(user_defined_function)\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/generic.py",
      "content": "from abc import ABC, abstractmethod\nfrom typing import Any, Callable, Dict, Iterable, Iterator\n\n\nclass DataType(ABC):\n    @staticmethod\n    @abstractmethod\n    def get_chunks(nb_workers: int, data: Any, **kwargs) -> Iterator[Any]:\n        ...\n\n    @staticmethod\n    def get_work_extra(data: Any) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def work(\n        data: Any,\n        user_defined_function: Callable,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        ...\n\n    @staticmethod\n    def get_reduce_extra(\n        data: Any, user_defined_function_kwargs: Dict[str, Any]\n    ) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def reduce(datas: Iterable[Any], extra: Dict[str, Any]) -> Any:\n        ...\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/rolling_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass RollingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasRollingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasRollingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.rolling(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe_groupby.py",
      "content": "import itertools\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple, Union, cast\n\nimport pandas as pd\nfrom pandas.core.groupby.generic import DataFrameGroupBy as PandasDataFrameGroupBy\n\nfrom ..utils import chunk, df_indexed_like, get_pandas_version\nfrom .generic import DataType\n\n\nclass DataFrameGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, dataframe_groupby: PandasDataFrameGroupBy, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            chunks = chunk(dataframe_groupby.ngroups, nb_workers)\n            iterator = iter(dataframe_groupby)\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[Tuple[int, pd.DataFrame, bool]]:\n            def compute_result(\n                key: int, df: pd.DataFrame\n            ) -> Tuple[int, pd.DataFrame, bool]:\n                result = user_defined_function(\n                    df, *user_defined_function_args, **user_defined_function_kwargs\n                )\n                mutated = not df_indexed_like(result, df.axes)\n                return key, result, mutated\n\n            return [compute_result(key, df) for key, df in data]\n\n        @staticmethod\n        def get_reduce_extra(\n            data: PandasDataFrameGroupBy, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"df_groupby\": data}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[List[Tuple[int, pd.DataFrame, bool]]], extra: Dict[str, Any]\n        ) -> pd.Series:\n            def get_args(\n                keys: List[int],\n                values: List[pd.DataFrame],\n                df_groupby: PandasDataFrameGroupBy,\n            ) -> Union[\n                Tuple[List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[pd.DataFrame]],\n            ]:\n                pandas_version = get_pandas_version()\n\n                if pandas_version < (1, 3):\n                    return keys, values\n                elif pandas_version < (1, 4):\n                    return df_groupby._selected_obj, keys, values\n                else:\n                    return df_groupby._selected_obj, values\n\n            df_groupby: PandasDataFrameGroupBy = extra[\"df_groupby\"]\n\n            results = itertools.chain.from_iterable(datas)\n            keys, values, mutated = zip(*results)\n\n            keys = cast(List[int], keys)\n            values = cast(List[pd.DataFrame], values)\n            mutated = cast(List[bool], mutated)\n\n            args = get_args(keys, values, df_groupby)\n  \n            return df_groupby._wrap_applied_output(*args, not_indexed_same=mutated)\n"
    }
  ],
  "ErrorMessage": "--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n____________________________________________________________ ERROR at setup of test_dataframe_axis_1_no_reduction[anonymous-1-True-None] ____________________________________________________________\n\nrequest = <SubRequest 'func_dataframe_apply_axis_1_no_reduce' for <Function test_dataframe_axis_1_no_reduction[anonymous-1-True-None]>>\n\n    @pytest.fixture(params=(\"named\", \"anonymous\"))\n    def func_dataframe_apply_axis_1_no_reduce(request):\n        def func(x):\n            return x**2\n    \n>       return dict(named=func, anonymous=lambda x: x**2)[request.params]\nE       AttributeError: 'SubRequest' object has no attribute 'params'\n\ntests/test_pandarallel.py:157: AttributeError\n--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n___________________________________________________________ ERROR at setup of test_dataframe_axis_1_no_reduction[anonymous-1-True-False] ____________________________________________________________\n\nrequest = <SubRequest 'func_dataframe_apply_axis_1_no_reduce' for <Function test_dataframe_axis_1_no_reduction[anonymous-1-True-False]>>\n\n    @pytest.fixture(params=(\"named\", \"anonymous\"))\n    def func_dataframe_apply_axis_1_no_reduce(request):\n        def func(x):\n            return x**2\n    \n>       return dict(named=func, anonymous=lambda x: x**2)[request.params]\nE       AttributeError: 'SubRequest' object has no attribute 'params'\n\ntests/test_pandarallel.py:157: AttributeError\n--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n========================================================================================= warnings summary ==========================================================================================\ntests/test_pandarallel.py: 16 warnings\n  /home/user/Documents/repoben/buggycode/nalepae_pandarallel/tests/test_pandarallel.py:235: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n    res = df.applymap(func_dataframe_applymap)\n\n-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html\n====================================================================================== short test summary info ======================================================================================\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1000-False-None] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1000-False-False] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1000-True-None] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1000-True-False] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1-False-None] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1-False-False] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1-True-None] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1-True-False] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1000-False-None] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1000-False-False] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1000-True-None] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1000-True-False] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1-False-None] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1-False-False] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1-True-None] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1-True-False] - AttributeError: 'SubRequest' object has no attribute 'params'\n============================================================================ 201 passed, 16 warnings, 16 errors in 6.85s ============================================================================",
  "Patch": "--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -154,7 +154,7 @@\n     def func(x):\n         return x**2\n \n-    return dict(named=func, anonymous=lambda x: x**2)[request.params]\n+    return dict(named=func, anonymous=lambda x: x**2)[request.param]\n \n \n @pytest.fixture\n",
  "BuggyCodeLocation": [
    {
      "file": "nalepae_pandarallel/tests/test_pandarallel.py",
      "function": null,
      "content_all": {
        "157": "    return dict(named=func, anonymous=lambda x: x**2)[request.params]\n",
        "158": "\n",
        "159": "\n"
      },
      "content_change": {
        "157": "    return dict(named=func, anonymous=lambda x: x**2)[request.params]\n"
      }
    }
  ],
  "Issue": {
    "title": "Incorrect Key Error in pytest Fixture: 'request.params' Should Be 'request.param'",
    "description": "There is a misconfiguration in the pytest fixture `func_dataframe_apply_axis_1_no_reduce` where it attempts to access 'request.params' instead of 'request.param'. This error leads to a key error during test execution, causing the test suite to fail unexpectedly.\n\nTo reproduce:\n1. Run the test suite using pytest.\n2. Observe the failure in tests due to the incorrect key access.\n\nThis issue impacts the reliability of test results, potentially masking other issues in the codebase. It is crucial that the fixture correctly accesses 'request.param' to ensure the tests are executed as intended.\n\nPlease investigate and fix the access key in the fixture to mitigate this problem.",
    "explanation": "### Summary of the Issue\n\nThe issue reported involves an error in a pytest fixture named `func_dataframe_apply_axis_1_no_reduce` within a project. The problem is with the incorrect property access during test execution. Specifically, the fixture is attempting to access `request.params` instead of the correct key `request.param`. This mismatch causes a key error that results in the failure of the test suite unexpectedly.\n\n### Content of the Commit\n\nTo address this issue, a commit was made that modifies the access key in the `func_dataframe_apply_axis_1_no_reduce` fixture. The essential change involved replacing the incorrect usage of `request.params` with the correct `request.param`.\n\n### Explanation from a Developer's Perspective\n\n#### Cause of the Issue\n\nThe root cause of the issue lies in the incorrect property access for the `request` object within the pytest fixture. In pytest, `request.param` is designed to provide the parameter value for the current test invocation, while `request.params` does not exist and hence accessing it causes a key error. This incorrect access leads to the fixture failing when it attempts to retrieve test parameters, causing the entire test suite to fail unexpectedly. This fail-fast behavior disrupts the continuous integration process and potentially conceals other issues within the codebase.\n\n#### Solution Provided by the Commit\n\nThe commit addresses the problem by correcting the key used to access the test parameter in the fixture. Here’s the developer-oriented explanation of how the commit solves the issue:\n\n- **Identification of the Error**: The developer identified that the error arises due to accessing a non-existent attribute `request.params` instead of `request.param`.\n  \n- **Modifying the Access Key**: The core fix in the commit is changing `request.params` to `request.param`, ensuring the fixture correctly retrieves the parameter intended for the current test.\n  \n- **Impact on Test Execution**: This change ensures that the pytest fixture functions as expected, fetching the appropriate test parameters without causing a key error. Consequently, the test suite can run without interruptions, ensuring reliable and continuous testing.\n\n### Conclusion\n\nIn summary, the issue was caused by an incorrect access key in the pytest fixture, which led to a key error and test failures. The solution involved a simple yet crucial change: correcting the access key from `request.params` to `request.param`. This fix allows the fixture to correctly retrieve parameter values, thereby resolving the key error and ensuring that the test suite runs smoothly without unexpected failures. This commit demonstrates the importance of accurate usage of framework-specific conventions and highlights how a small oversight can disrupt the functionality and reliability of automated tests."
  },
  "Explain": "### Summary of the Issue\n\nThe issue reported involves an error in a pytest fixture named `func_dataframe_apply_axis_1_no_reduce` within a project. The problem is with the incorrect property access during test execution. Specifically, the fixture is attempting to access `request.params` instead of the correct key `request.param`. This mismatch causes a key error that results in the failure of the test suite unexpectedly.\n\n### Content of the Commit\n\nTo address this issue, a commit was made that modifies the access key in the `func_dataframe_apply_axis_1_no_reduce` fixture. The essential change involved replacing the incorrect usage of `request.params` with the correct `request.param`.\n\n### Explanation from a Developer's Perspective\n\n#### Cause of the Issue\n\nThe root cause of the issue lies in the incorrect property access for the `request` object within the pytest fixture. In pytest, `request.param` is designed to provide the parameter value for the current test invocation, while `request.params` does not exist and hence accessing it causes a key error. This incorrect access leads to the fixture failing when it attempts to retrieve test parameters, causing the entire test suite to fail unexpectedly. This fail-fast behavior disrupts the continuous integration process and potentially conceals other issues within the codebase.\n\n#### Solution Provided by the Commit\n\nThe commit addresses the problem by correcting the key used to access the test parameter in the fixture. Here’s the developer-oriented explanation of how the commit solves the issue:\n\n- **Identification of the Error**: The developer identified that the error arises due to accessing a non-existent attribute `request.params` instead of `request.param`.\n  \n- **Modifying the Access Key**: The core fix in the commit is changing `request.params` to `request.param`, ensuring the fixture correctly retrieves the parameter intended for the current test.\n  \n- **Impact on Test Execution**: This change ensures that the pytest fixture functions as expected, fetching the appropriate test parameters without causing a key error. Consequently, the test suite can run without interruptions, ensuring reliable and continuous testing.\n\n### Conclusion\n\nIn summary, the issue was caused by an incorrect access key in the pytest fixture, which led to a key error and test failures. The solution involved a simple yet crucial change: correcting the access key from `request.params` to `request.param`. This fix allows the fixture to correctly retrieve parameter values, thereby resolving the key error and ensuring that the test suite runs smoothly without unexpected failures. This commit demonstrates the importance of accurate usage of framework-specific conventions and highlights how a small oversight can disrupt the functionality and reliability of automated tests.",
  "Source": "Human",
  "Token": 1437,
  "Command": [
    "pytest tests"
  ],
  "FilteredCode": [
    {
      "path": "nalepae_pandarallel/tests/test_pandarallel.py",
      "content": "1 import importlib\n2 import math\n3 \n4 import numpy as np\n5 import pandas as pd\n6 import pytest\n7 from pandarallel import pandarallel\n8 \n9 \n10 @pytest.fixture(params=(1000, 1))\n11 def df_size(request):\n12     return request.param\n13 \n14 \n15 @pytest.fixture(params=(False, True))\n16 def progress_bar(request):\n17     return request.param\n18 \n19 \n20 @pytest.fixture(params=(None, False))\n21 def use_memory_fs(request):\n22     return request.param\n23 \n24 \n25 @pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\n26 def exception(request):\n27     return request.param\n28 \n29 \n30 @pytest.fixture(params=(\"named\", \"anonymous\"))\n31 def func_dataframe_apply_axis_0(request):\n32     def func(x):\n33         return max(x) - min(x)\n34 \n35     return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n36 \n37 \n38 @pytest.fixture(params=(\"named\", \"anonymous\"))\n39 def func_dataframe_apply_axis_1(request):\n40     def func(x):\n41         return math.sin(x.a**2) + math.sin(x.b**2)\n42 \n43     return dict(\n44         named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n45     )[request.param]\n46 \n47 \n48 @pytest.fixture(params=(\"named\", \"anonymous\"))\n49 def func_dataframe_applymap(request):\n50     def func(x):\n51         return math.sin(x**2) - math.cos(x**2)\n52 \n53     return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n54         request.param\n55     ]\n56 \n57 \n58 @pytest.fixture(params=(\"named\", \"anonymous\"))\n59 def func_series_map(request):\n60     def func(x):\n61         return math.log10(math.sqrt(math.exp(x**2)))\n62 \n63     return dict(\n64         named=func, anonymous=lambda x: math.(...truncated)"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/utils.py",
      "content": "1 import itertools\n2 from enum import Enum\n3 from typing import Any, Dict, List, Tuple\n4 \n5 import pandas as pd\n6 from pandas import DataFrame, Index\n7 \n8 \n9 def chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n10     \"\"\"\n11     Return `nb_chunks` slices of approximatively `nb_item / nb_chunks` each.\n12 \n13     Parameters\n14     ----------\n15     nb_item : int\n16         Total number of items\n17 \n18     nb_chunks : int\n19         Number of chunks to return\n20 \n21     start_offset : int\n22         Shift start of slice by this amount\n23 \n24     Returns\n25     -------\n26     A list of slices\n27 \n28     Examples\n29     --------\n30     >>> chunks = chunk(103, 4)\n31     >>> chunks\n32     [slice(0, 26, None), slice(26, 52, None), slice(52, 78, None), slice(78, 103, None)]\n33     \"\"\"\n34     if nb_item == 0:\n35         return [slice(0)]\n36     \n37     if nb_item <= nb_chunks:\n38         return [slice(max(0, idx - start_offset), idx + 1) for idx in range(nb_item)]\n39 \n40     quotient = nb_item // nb_chunks\n41     remainder = nb_item % nb_chunks\n42 \n43     quotients = [quotient] * nb_chunks\n44     remainders = [1] * remainder + [0] * (nb_chunks - remainder)\n45 \n46     nb_elems_per_chunk = [\n47         quotient + remainder for quotient, remainder in zip(quotients, remainders)\n48     ]\n49 \n50     accumulated = list(itertools.accumulate(nb_elems_per_chunk))\n51     shifted_accumulated = accumulated.copy()\n52     shifted_accumulated.insert(0, 0)\n53     shifted_accumulated.pop()\n54 \n55     return [\n56         slice(max(0, begin - start_offset), end)\n57         for begin, end in zip(shifted_accumulated, accumulated)\n58     ]\n59 \n60 \n61 def df_indexed_like(df: DataFrame, axes: List[Index]) -> bool:\n62     \"\"\"\n63     Returns whether a data frame is indexed in the way specified by the\n64     provided axes.\n65 \n66     Used by DataFrameGroupBy to determine whether a group has been modified.\n67 \n68     Function adapted from pandas.core.groupby.ops._is_indexed_like\n69 \n70     Parameters\n71     ----------\n72     df : DataFrame\n73         The data frame in question\n74 \n75     axes : List[Index]\n76         The axes to which the data frame is compared\n77 \n78     Returns\n79     -------\n80     Whether or not the data frame is indexed in the same wa as the axes.\n81     \"\"\"\n82     if isinstance(df, DataFrame):\n83         return df.axes[0].equals(axes[0])\n84 \n85     return False\n86 \n87 \n88 def get_pandas_version() -> Tuple[int, int]:\n89     major_str, minor_str, *_ = pd.__version__.split(\".\")\n90     return int(major_str), int(minor_str)\n91 \n92 \n93 def get_axis_int(user_defined_function_kwargs: Dict[str, Any]):\n94     axis = user_defined_function_kwargs.get(\"axis\", 0)\n95 \n96     if axis not in {0, 1, \"index\", \"columns\"}:\n97         raise ValueError(f\"No axis named {axis} for object type DataFrame\")\n98 \n99     return {0: 0, 1: 1, \"index\": 0, \"columns\": 1}[axis]\n100 \n101 \n102 class WorkerStatus(int, Enum):\n103     Running = 0\n104     Success = 1\n105     Error = 2"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
      "content": "1 import multiprocessing\n2 import os\n3 import shutil\n4 import sys\n5 from abc import ABC, abstractmethod\n6 from enum import Enum\n7 from itertools import count\n8 from time import time_ns\n9 from typing import Callable,(...truncated)"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe_groupby.py",
      "content": "1 import itertools\n2 from typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple, Union, cast\n3 \n4 import pandas as pd\n5 from (...truncated)"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/core.py",
      "content": "1 import multiprocessing\n2 import os\n3 import pickle\n4 from itertools impor(...truncated)"
    }
  ],
  "TokenAll": 14242,
  "FilteredLevel": 1500,
  "Results": {
    "model": "GPT-4o",
    "Difficulty": "Medium",
    "issue_origin": {
      "title": "Typo in test function",
      "description": "In the file 'nalepae_pandarallel/tests/test_pandarallel.py', there is a typo on line 64, 'lambda x: math.(...truncated)'. The invocation of math module seems incomplete or incorrect. This should be corrected to ensure proper execution of the tests.",
      "explanation": "### Summary of the Issue\n\nThe issue is about a typo in a test function in the `nalepae_pandarallel/tests/test_pandarallel.py` file. Specifically, there is a incomplete or incorrect invocation of the `math` module on line 64. The code is truncated at `lambda x: math.(...truncated)`, which implies that the call to a function in the `math` module is missing or improperly referenced.\n\n### Detailed Content of the Commit\n\nThe commit to resolve this issue likely involves fixing the typo in line 64 to properly call the intended function from the `math` module. The correct version should properly finish the lambda function and make sure it references the correct method from the `math` module. The following detailed change should be part of the commit's content.\n\n**Before the fix:**\n```python\nnamed=func, anonymous=lambda x: math.(...truncated)\n```\n\n**After the fix:**\n```python\nnamed=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n```\n\n### Explanation of How the Commit Solves the Issue\n\n1. **Identify the Problem:**\n   The code at line 64 is currently incomplete and does not specify which function from the `math` module to use, leading to a syntax error or runtime error when the tests are executed.\n\n2. **Propose the Correct Function:**\n   Based on the context where `func_series_map` fixture is defined earlier:\n    ```python\n    @pytest.fixture(params=(\"named\", \"anonymous\"))\n    def func_series_map(request):\n        def func(x):\n            return math.log10(math.sqrt(math.exp(x**2)))\n        \n        return dict(\n            named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n        )[request.param]\n    ```\n   It is evident that the 'anonymous' lambda function should be performing the same set of operations as in the named `func`.\n\n3. **Correct the Lambda Function:**\n   By fixing the lambda function to properly call `math.log10`, `math.sqrt`, and `math.exp`, the code now correctly references the intended mathematical operations.\n\n4. **Why This Fix Works:**\n   - **Correct Syntax:** By providing the complete function call, the lambda function becomes syntactically correct.\n   - **Consistent Behavior:** Ensures that the behavior of the \"anonymous\" lambda function matches the named function (`func`), thereby ensuring consistency in tests depending on the 'func_series_map'.\n\n5. **Validation of the Fix:**\n   This fix ensures that when the tests are run, they correctly use the anonymous function which is now consistent with the named one, and any mathematical operations can be correctly performed without error.\n\n### Conclusion\n\nThe commit to fix the typo not only resolves a syntactical issue but also reinstates the intended behavior of the lambda function used in tests. It is crucial for ensuring that the tests run as expected and verify the intended functionality in the codebase."
    },
    "issue_message": {
      "title": "AttributeError: 'SubRequest' object has no attribute 'params'",
      "description": "The code contains an AttributeError in the pytest fixture func_dataframe_apply_axis_1_no_reduce. The 'request' object should use 'param' instead of 'params'. Update the return statement to use 'request.param'.",
      "explanation": "### Issue Summary\n\nThe provided issue describes an `AttributeError` occurring in the pytest fixture function named `func_dataframe_apply_axis_1_no_reduce` within the `nalepae_pandarallel/tests/test_pandarallel.py` file. Specifically, the error message indicates that the `request` object does not have an attribute named `params`. Instead of using `request.params`, the code should use `request.param` to avoid this error.\n\n### Detailed Explanation of the Issue\n\nThe relevant part of the error message says:\n```\nAttributeError: 'SubRequest' object has no attribute 'params'\n```\nIn pytest fixture functions, the `request` object is used to access the parameters passed to the fixture via the `@pytest.fixture` decorator. The `params` keyword in the decorator defines the parameters that are passed to the fixture during test execution. However, when you need to access one of these parameters within the fixture body, the correct attribute is `request.param`, not `request.params`.\n\n### Analysis of the Code Base\n\nHere is the relevant part of the code:\n```python\n10 @pytest.fixture(params=(1000, 1))\n11 def df_size(request):\n12     return request.param\n...\n38 @pytest.fixture(params=(\"named\", \"anonymous\"))\n39 def func_dataframe_apply_axis_1(request):\n40     def func(x):\n41         return math.sin(x.a**2) + math.sin(x.b**2)\n...\n58 @pytest.fixture(params=(\"named\", \"anonymous\"))\n59 def func_series_map(request):\n60     def func(x):\n61         return math.log10(math.sqrt(math.exp(x**2)))\n```\nIn these fixtures, `request.param` is used, which is correct. However, the error occurs when `request.params` is mistakenly used in the `func_dataframe_apply_axis_1_no_reduce` function, which is not shown here but mentioned in the error message.\n\n### Error Inducing Code\nBased on the error message:\n```python\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1_no_reduce(request):\n    def func(x):\n        return x**2\n\n    return dict(named=func, anonymous=lambda x: x**2)[request.params]\n```\nHere, `request.params` should be `request.param`.\n\n### Commit Details and the Fix\n\nTo resolve this issue, the commit should replace the incorrect `request.params` with `request.param` in the `func_dataframe_apply_axis_1_no_reduce` function.\n\nCorrected code:\n```python\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1_no_reduce(request):\n    def func(x):\n        return x**2\n\n    return dict(named=func, anonymous=lambda x: x**2)[request.param]\n```\n\n### How the Commit Solves the Issue\n\n- **Accessing the parameter correctly:** The change from `request.params` to `request.param` ensures that the fixture accesses the parameter provided by the `@pytest.fixture` decorator correctly. Since `params` attribute does not exist on the `request` object and `param` is the correct attribute, this change eliminates the `AttributeError`.\n- **Consistent parameter handling:** This keeps the code consistent with how parameters are accessed in other fixture functions in the file. For instance, in the `df_size`, `progress_bar`, and `use_memory_fs` fixtures, `request.param` is consistently used.\n- **Successful test execution:** With this change, the test function that uses this fixture should now work without errors, leading to successful test execution and verification of the functionality.\n\n### Conclusion\n\nIn summary, the `AttributeError` was caused by incorrectly accessing a non-existent attribute `params` on the `request` object, whereas the actual correct attribute is `request.param`. Modifying the fixture function to use `request.param` solves this error and aligns with the correct usage pattern of pytest fixtures. This correction ensures that the tests run successfully and as expected."
    },
    "issue_ground": {
      "title": "Incorrect Key Error in pytest Fixture: 'request.params' Should Be 'request.param'",
      "description": "There is a misconfiguration in the pytest fixture `func_dataframe_apply_axis_1_no_reduce` where it attempts to access 'request.params' instead of 'request.param'. This error leads to a key error during test execution, causing the test suite to fail unexpectedly.\n\nTo reproduce:\n1. Run the test suite using pytest.\n2. Observe the failure in tests due to the incorrect key access.\n\nThis issue impacts the reliability of test results, potentially masking other issues in the codebase. It is crucial that the fixture correctly accesses 'request.param' to ensure the tests are executed as intended.\n\nPlease investigate and fix the access key in the fixture to mitigate this problem.",
      "explanation": "## Summary of the Issue\n\nThe issue in the pytest fixture `func_dataframe_apply_axis_1_no_reduce` is due to a misconfiguration. Specifically, the fixture is incorrectly using `request.params` instead of the correct `request.param`. This results in an AttributeError, which causes the test suite to fail.\n\n## Detailed Content of the Commit\n\nThe error trace indicates that the issue is with the key access within the pytest fixture function `func_dataframe_apply_axis_1_no_reduce`:\n\n```python\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1_no_reduce(request):\n    def func(x):\n        return x**2\n\n    return dict(named=func, anonymous=lambda x: x**2)[request.params]\n```\n\nThe correct attribute should be `request.param` instead of `request.params`.\n\nTo fix this, the code within the fixture should be corrected as follows:\n\n```python\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1_no_reduce(request):\n    def func(x):\n        return x**2\n\n    return dict(named=func, anonymous=lambda x: x**2)[request.param]\n```\n\n## Explanation: Cause and Solution\n\nLet's break this down:\n\n### Cause of the Issue\n\n1. **Misuse of `request.params`**: \n   - `request.params` is incorrect and does not exist.\n   - The pytest `request` object for parameterized fixtures provides `request.param` (singular) to get the current parameter value.\n  \n2. **AttributeError**:\n   - Since `request.params` does not exist, attempting to access it causes an AttributeError, leading to the test suite's failure.\n\n### Fix Implementation\n\nThe fix involves changing `request.params` to `request.param`:\n```diff\n- return dict(named=func, anonymous=lambda x: x**2)[request.params]\n+ return dict(named=func, anonymous=lambda x: x**2)[request.param]\n```\n\n### How the Commit Solves the Issue\n\n1. **Access Correct Attribute**: \n   - By using `request.param`, the fixture correctly accesses the provided parameter for the current test setup. This ensures `named` or `anonymous` keys are correctly used.\n\n2. **Fixes the AttributeError**:\n   - This change eliminates the AttributeError, as `request.param` is the valid attribute to access the parameter used by the fixture.\n\n3. **Test Suite Stability**:\n   - With the correct access key, the pytest fixture functions as intended without throwing errors, allowing tests to run and ensuring reliable results.\n\n### Detailed Look at the Code Before and After\n\n**Before:\n```python\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1_no_reduce(request):\n    def func(x):\n        return x**2\n\n    return dict(named=func, anonymous=lambda x: x**2)[request.params]\n```\n\n**After:\n```python\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1_no_reduce(request):\n    def func(x):\n        return x**2\n\n    return dict(named=func, anonymous=lambda x: x**2)[request.param]\n```\n\nIn the corrected version:\n- `request.params` is changed to `request.param`.\n- The fixture now uses the correct attribute to get the parameter value provided for the current test, thus resolving the issue and allowing the test suite to run successfully.\n\nBy making this small but crucial change, the AttributeError is resolved, the fixture behaves as expected, and the overall reliability of the test results is restored."
    },
    "issue_ground_truth": {
      "title": "Incorrect Key Error in pytest Fixture: 'request.params' Should Be 'request.param'",
      "description": "There is a misconfiguration in the pytest fixture `func_dataframe_apply_axis_1_no_reduce` where it attempts to access 'request.params' instead of 'request.param'. This error leads to a key error during test execution, causing the test suite to fail unexpectedly.\n\nTo reproduce:\n1. Run the test suite using pytest.\n2. Observe the failure in tests due to the incorrect key access.\n\nThis issue impacts the reliability of test results, potentially masking other issues in the codebase. It is crucial that the fixture correctly accesses 'request.param' to ensure the tests are executed as intended.\n\nPlease investigate and fix the access key in the fixture to mitigate this problem.",
      "explanation": "### Summary of the Issue\n\nThe issue reported involves an error in a pytest fixture named `func_dataframe_apply_axis_1_no_reduce` within a project. The problem is with the incorrect property access during test execution. Specifically, the fixture is attempting to access `request.params` instead of the correct key `request.param`. This mismatch causes a key error that results in the failure of the test suite unexpectedly.\n\n### Content of the Commit\n\nTo address this issue, a commit was made that modifies the access key in the `func_dataframe_apply_axis_1_no_reduce` fixture. The essential change involved replacing the incorrect usage of `request.params` with the correct `request.param`.\n\n### Explanation from a Developer's Perspective\n\n#### Cause of the Issue\n\nThe root cause of the issue lies in the incorrect property access for the `request` object within the pytest fixture. In pytest, `request.param` is designed to provide the parameter value for the current test invocation, while `request.params` does not exist and hence accessing it causes a key error. This incorrect access leads to the fixture failing when it attempts to retrieve test parameters, causing the entire test suite to fail unexpectedly. This fail-fast behavior disrupts the continuous integration process and potentially conceals other issues within the codebase.\n\n#### Solution Provided by the Commit\n\nThe commit addresses the problem by correcting the key used to access the test parameter in the fixture. Here’s the developer-oriented explanation of how the commit solves the issue:\n\n- **Identification of the Error**: The developer identified that the error arises due to accessing a non-existent attribute `request.params` instead of `request.param`.\n  \n- **Modifying the Access Key**: The core fix in the commit is changing `request.params` to `request.param`, ensuring the fixture correctly retrieves the parameter intended for the current test.\n  \n- **Impact on Test Execution**: This change ensures that the pytest fixture functions as expected, fetching the appropriate test parameters without causing a key error. Consequently, the test suite can run without interruptions, ensuring reliable and continuous testing.\n\n### Conclusion\n\nIn summary, the issue was caused by an incorrect access key in the pytest fixture, which led to a key error and test failures. The solution involved a simple yet crucial change: correcting the access key from `request.params` to `request.param`. This fix allows the fixture to correctly retrieve parameter values, thereby resolving the key error and ensuring that the test suite runs smoothly without unexpected failures. This commit demonstrates the importance of accurate usage of framework-specific conventions and highlights how a small oversight can disrupt the functionality and reliability of automated tests."
    },
    "location_origin": [
      {
        "file": "nalepae_pandarallel/tests/test_pandarallel.py",
        "function": {
          "58": "func_series_map"
        },
        "content_all": {
          "61": "        return math.log10(math.sqrt(math.exp(x**2)))\n",
          "62": "\n",
          "63": "    return dict(\n",
          "64": "        named=func, anonymous=lambda x: math.(...truncated)\n",
          "65": "    )[\n",
          "66": "        request.param\n",
          "67": "    ]\n"
        },
        "content_change": {
          "64": "        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n"
        }
      }
    ],
    "location_message": [
      {
        "file": "nalepae_pandarallel/tests/test_pandarallel.py",
        "function": {
          "157": "func_dataframe_apply_axis_1_no_reduce"
        },
        "content_all": {
          "154": "\n",
          "155": "@pytest.fixture(params=(\"named\", \"anonymous\"))\n",
          "156": "def func_dataframe_apply_axis_1_no_reduce(request):\n",
          "157": "    def func(x):\n",
          "158": "        return x**2\n",
          "159": "\n",
          "160": "    return dict(named=func, anonymous=lambda x: x**2)[request.params]\n",
          "161": "\n"
        },
        "content_change": {
          "160": "    return dict(named=func, anonymous=lambda x: x**2)[request.param]\n"
        }
      }
    ],
    "location_ground": [
      {
        "file": "nalepae_pandarallel/tests/test_pandarallel.py",
        "function": {
          "30": "func_dataframe_apply_axis_0"
        },
        "content_all": {
          "27": "\n",
          "28": "\n",
          "29": "@pytest.fixture(params=(\"named\", \"anonymous\"))\n",
          "30": "def func_dataframe_apply_axis_0(request):\n",
          "31": "    def func(x):\n",
          "32": "        return max(x) - min(x)\n",
          "33": "\n",
          "34": "    return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n",
          "35": "\n",
          "36": "\n",
          "37": "@pytest.fixture(params=(\"named\", \"anonymous\"))\n"
        },
        "content_change": {
          "34": "    return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n"
        }
      },
      {
        "file": "nalepae_pandarallel/tests/test_pandarallel.py",
        "function": {
          "38": "func_dataframe_apply_axis_1"
        },
        "content_all": {
          "35": "\n",
          "36": "\n",
          "37": "@pytest.fixture(params=(\"named\", \"anonymous\"))\n",
          "38": "def func_dataframe_apply_axis_1(request):\n",
          "39": "    def func(x):\n",
          "40": "        return math.sin(x.a**2) + math.sin(x.b**2)\n",
          "41": "\n",
          "42": "    return dict(\n",
          "43": "        named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n",
          "44": "    )[request.param]\n",
          "45": "\n",
          "46": "\n",
          "47": "@pytest.fixture(params=(\"named\", \"anonymous\"))\n"
        },
        "content_change": {
          "44": "    )[request.param]\n"
        }
      },
      {
        "file": "nalepae_pandarallel/tests/test_pandarallel.py",
        "function": {
          "48": "func_dataframe_applymap"
        },
        "content_all": {
          "45": "\n",
          "46": "\n",
          "47": "@pytest.fixture(params=(\"named\", \"anonymous\"))\n",
          "48": "def func_dataframe_applymap(request):\n",
          "49": "    def func(x):\n",
          "50": "        return math.sin(x**2) - math.cos(x**2)\n",
          "51": "\n",
          "52": "    return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n",
          "53": "        request.param\n",
          "54": "    ]\n",
          "55": "\n",
          "56": "\n",
          "57": "@pytest.fixture(params=(\"named\", \"anonymous\"))\n"
        },
        "content_change": {
          "53": "        request.param\n"
        }
      },
      {
        "file": "nalepae_pandarallel/tests/test_pandarallel.py",
        "function": {
          "58": "func_series_map"
        },
        "content_all": {
          "55": "\n",
          "56": "\n",
          "57": "@pytest.fixture(params=(\"named\", \"anonymous\"))\n",
          "58": "def func_series_map(request):\n",
          "59": "    def func(x):\n",
          "60": "        return math.log10(math.sqrt(math.exp(x**2)))\n",
          "61": "\n",
          "62": "    return dict(\n",
          "63": "        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n",
          "64": "    )[request.param]\n",
          "65": "\n",
          "66": "\n",
          "67": "def test_dataframe_apply_axis_0_no_parallel(df_size, progress_bar, use_memory_fs, func_dataframe_apply_axis_0):\n"
        },
        "content_change": {
          "64": "    )[request.param]\n"
        }
      }
    ],
    "location_ground_exp": [
      {
        "file": "nalepae_pandarallel/tests/test_pandarallel.py",
        "function": {
          "30": "func_dataframe_apply_axis_0"
        },
        "content_all": {
          "27": "\n",
          "28": "\n",
          "29": "30 @pytest.fixture(params=(\"named\", \"anonymous\"))\n",
          "30": "31 def func_dataframe_apply_axis_0(request):\n",
          "31": "32     def func(x):\n",
          "32": "33         return max(x) - min(x)\n",
          "33": "34 \n",
          "34": "35     return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n",
          "35": "36 \n",
          "36": "\n",
          "37": " 38 @pytest.fixture(params=(\"named\", \"anonymous\"))\n",
          "38": "39 def func_dataframe_apply_axis_1(request):\n",
          "39": "40     def func(x):\n",
          "40": "41         return math.sin(x.a**2) + math.sin(x.b**2)\n",
          "41": "\n"
        },
        "content_change": {
          "35": "         )[request.param]\n"
        }
      },
      {
        "file": "nalepae_pandarallel/tests/test_pandarallel.py",
        "function": {
          "38": "func_dataframe_apply_axis_1"
        },
        "content_all": {
          "37": "\n",
          "38": "39 def func_dataframe_apply_axis_1(request):\n",
          "39": "40     def func(x):\n",
          "40": "41         return math.sin(x.a**2) + math.sin(x.b**2)\n",
          "41": "42 \n",
          "42": "43     return dict(\n",
          "43": "44         named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n",
          "44": "45     )[request.param]\n",
          "45": "46 \n",
          "46": "\n",
          "47": " 48 @pytest.fixture(params=(\"named\", \"anonymous\"))\n",
          "48": "49 def func_dataframe_applymap(request):\n",
          "49": "50     def func(x):\n",
          "50": "51         return math.sin(x**2) - math.cos(x**2)\n",
          "51": "\n"
        },
        "content_change": {
          "45": "         named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n"
        }
      },
      {
        "file": "nalepae_pandarparallel/tests/test_pandarallel.py",
        "function": {
          "48": "func_dataframe_applymap"
        },
        "content_all": {
          "47": "\n",
          "48": "49 def func_dataframe_applymap(request):\n",
          "49": "50     def func(x):\n",
          "50": "51         return math.sin(x**2) - math.cos(x**2)\n",
          "51": "52 \n",
          "52": "53     return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n",
          "53": "54         request.param\n",
          "54": "55     ]\n",
          "55": "56 \n",
          "56": "\n",
          "57": " 58 @pytest.fixture(params=(\"named\", \"anonymous\"))\n",
          "58": "59 def func_series_map(request):\n",
          "59": "60     def func(x):\n",
          "60": "61         return math.log10(math.sqrt(math.exp(x**2)))\n",
          "61": "\n"
        },
        "content_change": {
          "55": "         )[\n",
          "53": "         named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))\n"
        }
      }
    ],
    "location_ground_truth": [
      {
        "file": "nalepae_pandarallel/tests/test_pandarallel.py",
        "function": null,
        "content_all": {
          "157": "    return dict(named=func, anonymous=lambda x: x**2)[request.params]\n",
          "158": "\n",
          "159": "\n"
        },
        "content_change": {
          "157": "    return dict(named=func, anonymous=lambda x: x**2)[request.params]\n"
        }
      }
    ],
    "patch_i": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -63,1 +63,1 @@\n     return dict(\n-        named=func, anonymous=lambda x: math.(...truncated)\n+        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n     )[request.param]\n",
    "patch_im": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -65,7 +65,7 @@\n     )[request.param]\n\n @pytest.fixture(params=(\"named\", \"anonymous\"))\n-def func_dataframe_apply_axis_1_no_reduce(request):\n+def func_dataframe_apply_axis_1_no_reduce(request):\n     def func(x):\n         return x**2\n\n-    return dict(named=func, anonymous=lambda x: x**2)[request.params]\n+    return dict(named=func, anonymous=lambda x: x**2)[request.param]\n\n \n",
    "patch_il": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -61,7 +61,7 @@\n         return math.log10(math.sqrt(math.exp(x**2)))\n \n     return dict(\n-        named=func, anonymous=lambda x: math.(...truncated)\n+        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n     )[\n         request.param\n     ]\n",
    "patch_iml": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -157,7 +157,7 @@\n     def func(x):\n         return x**2\n \n-    return dict(named=func, anonymous=lambda x: x**2)[request.params]\n+    return dict(named=func, anonymous=lambda x: x**2)[request.param]\n \n ",
    "patch_ground": "\n--- a/nalepae_pandarparallel/tests/test_pandarparallel.py\n+++ b/nalepae_pandarparallel/tests/test_pandarparallel.py\n@@ -58,7 +58,7 @@ def func_series_map(request):\n     return dict(\n         named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n     )[request.param]\n \n@pytest.fixture(params=(\"named\", \"anonymous\"))\n-def func_dataframe_apply_axis_1_no_reduce(request):\n-    def func(x):\n-        return x**2\n-\n-    return dict(named=func, anonymous=lambda x: x**2)[request.params]\n+@pytest.fixture(params=(\"named\", \"anonymous\"))\n+def func_dataframe_apply_axis_1_no_reduce(request):\n+    def func(x):\n+        return x**2\n+\n+    return dict(named=func, anonymous=lambda x: x**2)[request.param]\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\n def func_dataframe_apply_axis_0(request):\n     def func(x):\n",
    "patch_ground_location": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -34,7 +34,7 @@\n     return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n \n \n @pytest.fixture(params=(\"named\", \"anonymous\"))\n def func_dataframe_apply_axis_1(request):\n     def func(x):\n         return math.sin(x.a**2) + math.sin(x.b**2)\n \n     return dict(\n         named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n-    )[request.params]\n+    )[request.param]\n \n \n @pytest.fixture(params=(\"named\", \"anonymous\"))\n def func_dataframe_applymap(request):\n     def func(x):\n         return math.sin(x**2) - math.cos(x**2)\n \n     return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n-        request.params\n+        request.param\n     ]\n \n \n @pytest.fixture(params=(\"named\", \"anonymous\"))\n def func_series_map(request):\n     def func(x):\n         return math.log10(math.sqrt(math.exp(x**2)))\n \n     return dict(\n         named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n-    )[request.params]\n+    )[request.param]\n",
    "patch_ground_exp": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -30,7 +30,7 @@\n def func_dataframe_apply_axis_0(request):\n     def func(x):\n         return max(x) - min(x)\n \n     return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n \n \n @pytest.fixture(params=(\"named\", \"anonymous\"))\n def func_dataframe_apply_axis_1(request):\n     def func(x):\n         return math.sin(x.a**2) + math.sin(x.b**2)\n \n     return dict(\n         named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n     )[request.param]\n \n \n @pytest.fixture(params=(\"named\", \"anonymous\"))\n def func_dataframe_applymap(request):\n     def func(x):\n         return math.sin(x**2) - math.cos(x**2)\n \n     return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n         request.param\n     ]\n \n \n @pytest.fixture(params=(\"named\", \"anonymous\"))\n def func_series_map(request):\n     def func(x):\n         return math.log10(math.sqrt(math.exp(x**2)))\n \n@@ -10,11 +10,11 @@\n import math\n import numpy as np\n import pandas as pd\n import pytest\n from pandarparallel import pandarparallel\n \n \n @pytest.fixture(params=(1000, 1))\n def df_size(request):\n     return request.param\n \n \n @pytest.fixture(params=(False, True))\n def progress_bar(request):\n     return request.param\n \n \n @pytest.fixture(params=(None, False))\n def use_memory_fs(request):\n     return request.param\n \n \n @pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\n def exception(request):\n     return request.param\n \n",
    "patch_ground_all": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -157,7 +157,7 @@\n     return dict(named=func, anonymous=lambda x: x**2)[request.param]\n\n @pytest.fixture(params=(\"named\", \"anonymous\"))\n def func_dataframe_apply_axis_1(request):\n     def func(x):\n         return math.sin(x.a**2) + math.sin(x.b**2)\n\n     return dict(\n         named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n     )[request.param]\n\n+\n+@pytest.fixture(params=(\"named\", \"anonymous\"))\n+\\n157:    return dict(named=func, anonymous=lambda x: x**2)[request.param]\\n\\'\"\n",
    "patch_ground_truth": "--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -154,7 +154,7 @@\n     def func(x):\n         return x**2\n \n-    return dict(named=func, anonymous=lambda x: x**2)[request.params]\n+    return dict(named=func, anonymous=lambda x: x**2)[request.param]\n \n \n @pytest.fixture\n",
    "message": "--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n____________________________________________________________ ERROR at setup of test_dataframe_axis_1_no_reduction[anonymous-1-True-None] ____________________________________________________________\n\nrequest = <SubRequest 'func_dataframe_apply_axis_1_no_reduce' for <Function test_dataframe_axis_1_no_reduction[anonymous-1-True-None]>>\n\n    @pytest.fixture(params=(\"named\", \"anonymous\"))\n    def func_dataframe_apply_axis_1_no_reduce(request):\n        def func(x):\n            return x**2\n    \n>       return dict(named=func, anonymous=lambda x: x**2)[request.params]\nE       AttributeError: 'SubRequest' object has no attribute 'params'\n\ntests/test_pandarallel.py:157: AttributeError\n--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n___________________________________________________________ ERROR at setup of test_dataframe_axis_1_no_reduction[anonymous-1-True-False] ____________________________________________________________\n\nrequest = <SubRequest 'func_dataframe_apply_axis_1_no_reduce' for <Function test_dataframe_axis_1_no_reduction[anonymous-1-True-False]>>\n\n    @pytest.fixture(params=(\"named\", \"anonymous\"))\n    def func_dataframe_apply_axis_1_no_reduce(request):\n        def func(x):\n            return x**2\n    \n>       return dict(named=func, anonymous=lambda x: x**2)[request.params]\nE       AttributeError: 'SubRequest' object has no attribute 'params'\n\ntests/test_pandarallel.py:157: AttributeError\n--------------------------------------------------------------------------------------- Captured stdout setup ---------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n========================================================================================= warnings summary ==========================================================================================\ntests/test_pandarallel.py: 16 warnings\n  /home/user/Documents/repoben/buggycode/nalepae_pandarallel/tests/test_pandarallel.py:235: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n    res = df.applymap(func_dataframe_applymap)\n\n-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html\n====================================================================================== short test summary info ======================================================================================\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1000-False-None] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1000-False-False] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1000-True-None] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1000-True-False] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1-False-None] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1-False-False] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1-True-None] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1-True-False] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1000-False-None] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1000-False-False] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1000-True-None] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1000-True-False] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1-False-None] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1-False-False] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1-True-None] - AttributeError: 'SubRequest' object has no attribute 'params'\nERROR tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1-True-False] - AttributeError: 'SubRequest' object has no attribute 'params'\n============================================================================ 201 passed, 16 warnings, 16 errors in 6.85s ============================================================================",
    "CodeBase": [
      {
        "path": "nalepae_pandarallel/tests/test_pandarallel.py",
        "content": "1 import importlib\n2 import math\n3 \n4 import numpy as np\n5 import pandas as pd\n6 import pytest\n7 from pandarallel import pandarallel\n8 \n9 \n10 @pytest.fixture(params=(1000, 1))\n11 def df_size(request):\n12     return request.param\n13 \n14 \n15 @pytest.fixture(params=(False, True))\n16 def progress_bar(request):\n17     return request.param\n18 \n19 \n20 @pytest.fixture(params=(None, False))\n21 def use_memory_fs(request):\n22     return request.param\n23 \n24 \n25 @pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\n26 def exception(request):\n27     return request.param\n28 \n29 \n30 @pytest.fixture(params=(\"named\", \"anonymous\"))\n31 def func_dataframe_apply_axis_0(request):\n32     def func(x):\n33         return max(x) - min(x)\n34 \n35     return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n36 \n37 \n38 @pytest.fixture(params=(\"named\", \"anonymous\"))\n39 def func_dataframe_apply_axis_1(request):\n40     def func(x):\n41         return math.sin(x.a**2) + math.sin(x.b**2)\n42 \n43     return dict(\n44         named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n45     )[request.param]\n46 \n47 \n48 @pytest.fixture(params=(\"named\", \"anonymous\"))\n49 def func_dataframe_applymap(request):\n50     def func(x):\n51         return math.sin(x**2) - math.cos(x**2)\n52 \n53     return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n54         request.param\n55     ]\n56 \n57 \n58 @pytest.fixture(params=(\"named\", \"anonymous\"))\n59 def func_series_map(request):\n60     def func(x):\n61         return math.log10(math.sqrt(math.exp(x**2)))\n62 \n63     return dict(\n64         named=func, anonymous=lambda x: math.(...truncated)"
      },
      {
        "path": "nalepae_pandarallel/pandarallel/utils.py",
        "content": "1 import itertools\n2 from enum import Enum\n3 from typing import Any, Dict, List, Tuple\n4 \n5 import pandas as pd\n6 from pandas import DataFrame, Index\n7 \n8 \n9 def chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n10     \"\"\"\n11     Return `nb_chunks` slices of approximatively `nb_item / nb_chunks` each.\n12 \n13     Parameters\n14     ----------\n15     nb_item : int\n16         Total number of items\n17 \n18     nb_chunks : int\n19         Number of chunks to return\n20 \n21     start_offset : int\n22         Shift start of slice by this amount\n23 \n24     Returns\n25     -------\n26     A list of slices\n27 \n28     Examples\n29     --------\n30     >>> chunks = chunk(103, 4)\n31     >>> chunks\n32     [slice(0, 26, None), slice(26, 52, None), slice(52, 78, None), slice(78, 103, None)]\n33     \"\"\"\n34     if nb_item == 0:\n35         return [slice(0)]\n36     \n37     if nb_item <= nb_chunks:\n38         return [slice(max(0, idx - start_offset), idx + 1) for idx in range(nb_item)]\n39 \n40     quotient = nb_item // nb_chunks\n41     remainder = nb_item % nb_chunks\n42 \n43     quotients = [quotient] * nb_chunks\n44     remainders = [1] * remainder + [0] * (nb_chunks - remainder)\n45 \n46     nb_elems_per_chunk = [\n47         quotient + remainder for quotient, remainder in zip(quotients, remainders)\n48     ]\n49 \n50     accumulated = list(itertools.accumulate(nb_elems_per_chunk))\n51     shifted_accumulated = accumulated.copy()\n52     shifted_accumulated.insert(0, 0)\n53     shifted_accumulated.pop()\n54 \n55     return [\n56         slice(max(0, begin - start_offset), end)\n57         for begin, end in zip(shifted_accumulated, accumulated)\n58     ]\n59 \n60 \n61 def df_indexed_like(df: DataFrame, axes: List[Index]) -> bool:\n62     \"\"\"\n63     Returns whether a data frame is indexed in the way specified by the\n64     provided axes.\n65 \n66     Used by DataFrameGroupBy to determine whether a group has been modified.\n67 \n68     Function adapted from pandas.core.groupby.ops._is_indexed_like\n69 \n70     Parameters\n71     ----------\n72     df : DataFrame\n73         The data frame in question\n74 \n75     axes : List[Index]\n76         The axes to which the data frame is compared\n77 \n78     Returns\n79     -------\n80     Whether or not the data frame is indexed in the same wa as the axes.\n81     \"\"\"\n82     if isinstance(df, DataFrame):\n83         return df.axes[0].equals(axes[0])\n84 \n85     return False\n86 \n87 \n88 def get_pandas_version() -> Tuple[int, int]:\n89     major_str, minor_str, *_ = pd.__version__.split(\".\")\n90     return int(major_str), int(minor_str)\n91 \n92 \n93 def get_axis_int(user_defined_function_kwargs: Dict[str, Any]):\n94     axis = user_defined_function_kwargs.get(\"axis\", 0)\n95 \n96     if axis not in {0, 1, \"index\", \"columns\"}:\n97         raise ValueError(f\"No axis named {axis} for object type DataFrame\")\n98 \n99     return {0: 0, 1: 1, \"index\": 0, \"columns\": 1}[axis]\n100 \n101 \n102 class WorkerStatus(int, Enum):\n103     Running = 0\n104     Success = 1\n105     Error = 2"
      },
      {
        "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
        "content": "1 import multiprocessing\n2 import os\n3 import shutil\n4 import sys\n5 from abc import ABC, abstractmethod\n6 from enum import Enum\n7 from itertools import count\n8 from time import time_ns\n9 from typing import Callable,(...truncated)"
      },
      {
        "path": "nalepae_pandarallel/pandarallel/data_types/dataframe_groupby.py",
        "content": "1 import itertools\n2 from typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple, Union, cast\n3 \n4 import pandas as pd\n5 from (...truncated)"
      },
      {
        "path": "nalepae_pandarallel/pandarallel/core.py",
        "content": "1 import multiprocessing\n2 import os\n3 import pickle\n4 from itertools impor(...truncated)"
      }
    ],
    "CommitSHA": "261a652cddb219ac353ff803e81646c08b72fc6f"
  },
  "Score": {
    "Difficulty": "Medium",
    "issue_origin": {
      "Title": 6,
      "Description": 7,
      "Reproducibility": 5,
      "Relevance": 8,
      "Explanation": 7,
      "Overall": 7
    },
    "issue_message": {
      "Title": 7,
      "Description": 5,
      "Reproducibility": 4,
      "Relevance": 8,
      "Explanation": 8,
      "Overall": 7
    },
    "issue_ground": {
      "Title": 8,
      "Description": 8,
      "Reproducibility": 8,
      "Relevance": 8,
      "Explanation": 8,
      "Overall": 8
    },
    "issue_ground_truth": {
      "title": "Incorrect Key Error in pytest Fixture: 'request.params' Should Be 'request.param'",
      "description": "There is a misconfiguration in the pytest fixture `func_dataframe_apply_axis_1_no_reduce` where it attempts to access 'request.params' instead of 'request.param'. This error leads to a key error during test execution, causing the test suite to fail unexpectedly.\n\nTo reproduce:\n1. Run the test suite using pytest.\n2. Observe the failure in tests due to the incorrect key access.\n\nThis issue impacts the reliability of test results, potentially masking other issues in the codebase. It is crucial that the fixture correctly accesses 'request.param' to ensure the tests are executed as intended.\n\nPlease investigate and fix the access key in the fixture to mitigate this problem.",
      "explanation": "### Summary of the Issue\n\nThe issue reported involves an error in a pytest fixture named `func_dataframe_apply_axis_1_no_reduce` within a project. The problem is with the incorrect property access during test execution. Specifically, the fixture is attempting to access `request.params` instead of the correct key `request.param`. This mismatch causes a key error that results in the failure of the test suite unexpectedly.\n\n### Content of the Commit\n\nTo address this issue, a commit was made that modifies the access key in the `func_dataframe_apply_axis_1_no_reduce` fixture. The essential change involved replacing the incorrect usage of `request.params` with the correct `request.param`.\n\n### Explanation from a Developer's Perspective\n\n#### Cause of the Issue\n\nThe root cause of the issue lies in the incorrect property access for the `request` object within the pytest fixture. In pytest, `request.param` is designed to provide the parameter value for the current test invocation, while `request.params` does not exist and hence accessing it causes a key error. This incorrect access leads to the fixture failing when it attempts to retrieve test parameters, causing the entire test suite to fail unexpectedly. This fail-fast behavior disrupts the continuous integration process and potentially conceals other issues within the codebase.\n\n#### Solution Provided by the Commit\n\nThe commit addresses the problem by correcting the key used to access the test parameter in the fixture. Here’s the developer-oriented explanation of how the commit solves the issue:\n\n- **Identification of the Error**: The developer identified that the error arises due to accessing a non-existent attribute `request.params` instead of `request.param`.\n  \n- **Modifying the Access Key**: The core fix in the commit is changing `request.params` to `request.param`, ensuring the fixture correctly retrieves the parameter intended for the current test.\n  \n- **Impact on Test Execution**: This change ensures that the pytest fixture functions as expected, fetching the appropriate test parameters without causing a key error. Consequently, the test suite can run without interruptions, ensuring reliable and continuous testing.\n\n### Conclusion\n\nIn summary, the issue was caused by an incorrect access key in the pytest fixture, which led to a key error and test failures. The solution involved a simple yet crucial change: correcting the access key from `request.params` to `request.param`. This fix allows the fixture to correctly retrieve parameter values, thereby resolving the key error and ensuring that the test suite runs smoothly without unexpected failures. This commit demonstrates the importance of accurate usage of framework-specific conventions and highlights how a small oversight can disrupt the functionality and reliability of automated tests."
    }
  }
}