{
  "RepoName": "https://github.com/nalepae/pandarallel.git",
  "CommitSHA": "261a652cddb219ac353ff803e81646c08b72fc6f",
  "Time": "",
  "Difficulty": "Easy",
  "Type": "undefined methods",
  "BuggyCode": [
    {
      "path": "nalepae_pandarallel/setup.py",
      "content": "from setuptools import setup\n\nsetup()\n"
    },
    {
      "path": "nalepae_pandarallel/tests/test_pandarallel.py",
      "content": "import importlib\nimport math\n\nimport numpy as np\nimport pandas as pd\nimport pytest\nfrom pandarallel import pandarallel\n\n\n@pytest.fixture(params=(1000, 1))\ndef df_size(request):\n    return request.param\n\n\n@pytest.fixture(params=(False, True))\ndef progress_bar(request):\n    return request.param\n\n\n@pytest.fixture(params=(None, False))\ndef use_memory_fs(request):\n    return request.param\n\n\n@pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\ndef exception(request):\n    return request.param\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0(request):\n    def func(x):\n        return max(x) - min(x)\n\n    return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1(request):\n    def func(x):\n        return math.sin(x.a**2) + math.sin(x.b**2)\n\n    return dict(\n        named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_applymap(request):\n    def func(x):\n        return math.sin(x**2) - math.cos(x**2)\n\n    return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n        request.param\n    ]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_map(request):\n    def func(x):\n        return math.log10(math.sqrt(math.exp(x**2)))\n\n    return dict(\n        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_apply(request):\n    def func(x, power, bias=0):\n        return math.log10(math.sqrt(math.exp(x**power))) + bias\n\n    return dict(\n        named=func,\n        anonymous=lambda x, power, bias=0: math.log10(math.sqrt(math.exp(x**power)))\n        + bias,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply():\n    def func(df):\n        dum = 0\n        for item in df.b:\n            dum += math.log10(math.sqrt(math.exp(item**2)))\n\n        return dum / len(df.b)\n\n    return func\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply_complex():\n    def func(df):\n        return pd.DataFrame(\n            [[df.b.mean(), df.b.min(), df.b.max()]],\n            columns=[\"b_mean\", \"b_min\", \"b_max\"],\n        )\n\n    return func\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_expanding_apply(request):\n    def func(x):\n        return (x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))).sum()\n\n    return dict(\n        named=func,\n        anonymous=lambda x: (\n            x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))\n        ).sum(),\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0_no_reduce(request):\n    def func(x):\n        return x\n\n    return dict(named=func, anonymous=lambda x: x)[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1_no_reduce(request):\n    def func(x):\n        return x**2\n\n    return dict(named=func, anonymous=lambda x: x**2)[request.param]\n\n\n@pytest.fixture\ndef pandarallel_init(progress_bar, use_memory_fs):\n    pandarallel.initialize(\n        progress_bar=progress_bar, use_memory_fs=use_memory_fs, nb_workers=2\n    )\n\n\ndef test_dataframe_apply_invalid_function(pandarallel_init, exception):\n    def f(_):\n        raise exception\n\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(exception):\n        df.parallel_apply(f)\n\n\ndef test_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0, df_size):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.randint(1, 8, df_size),\n            d=np.random.rand(df_size),\n            e=np.random.randint(1, 8, df_size),\n            f=np.random.rand(df_size),\n            g=np.random.randint(1, 8, df_size),\n            h=np.random.rand(df_size),\n        )\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_1, axis=1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1, axis=1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_invalid_axis(pandarallel_init):\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(ValueError):\n        df.parallel_apply(lambda x: x, axis=\"invalid\")\n    \ndef test_empty_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\ndef test_empty_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_applymap(pandarallel_init, func_dataframe_applymap, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.applymap(func_dataframe_applymap)\n    res_parallel = df.parallel_applymap(func_dataframe_applymap)\n    assert res.equals(res_parallel)\n\n\ndef test_series_map(pandarallel_init, func_series_map, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.map(func_series_map)\n    res_parallel = df.a.parallel_map(func_series_map)\n    assert res.equals(res_parallel)\n\n\ndef test_series_apply(pandarallel_init, func_series_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\ndef test_empty_series_apply(pandarallel_init, func_series_apply):\n    df = pd.DataFrame(dict(a=[]))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\n\ndef test_series_rolling_apply(pandarallel_init, func_series_rolling_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.randint(1, 8, df_size), b=list(range(df_size))))\n\n    res = df.b.rolling(4).apply(func_series_rolling_apply, raw=False)\n    res_parallel = df.b.rolling(4).parallel_apply(func_series_rolling_apply, raw=False)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply(\n    pandarallel_init, func_dataframe_groupby_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.rand(df_size),\n        )\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\", \"b\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\", \"b\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply_complex(\n    pandarallel_init, func_dataframe_groupby_apply_complex, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 100, df_size), b=np.random.rand(df_size))\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply_complex)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply_complex)\n    res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_rolling_apply(\n    pandarallel_init, func_dataframe_groupby_rolling_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .parallel_apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_expanding_apply(\n    pandarallel_init, func_dataframe_groupby_expanding_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .parallel_apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res.equals(res_parallel)\n\n\ndef test_dataframe_axis_0_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_0_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n    res = df.apply(func_dataframe_apply_axis_0_no_reduce)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0_no_reduce)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_axis_1_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n\n    res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    assert res.equals(res_parallel)\n\ndef test_memory_fs_root_environment_variable(monkeypatch):\n    monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n    from pandarallel import core\n    importlib.reload(core)\n\n    assert core.MEMORY_FS_ROOT == \"/test\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/utils.py",
      "content": "import itertools\nfrom enum import Enum\nfrom typing import Any, Dict, List, Tuple\n\nimport pandas as pd\nfrom pandas import DataFrame, Index\n\n\ndef chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n    \"\"\"\n    Return `nb_chunks` slices of approximatively `nb_item / nb_chunks` each.\n\n    Parameters\n    ----------\n    nb_item : int\n        Total number of items\n\n    nb_chunks : int\n        Number of chunks to return\n\n    start_offset : int\n        Shift start of slice by this amount\n\n    Returns\n    -------\n    A list of slices\n\n    Examples\n    --------\n    >>> chunks = chunk(103, 4)\n    >>> chunks\n    [slice(0, 26, None), slice(26, 52, None), slice(52, 78, None), slice(78, 103, None)]\n    \"\"\"\n    if nb_item == 0:\n        return [slice(0)]\n    \n    if nb_item <= nb_chunks:\n        return [slice(max(0, idx - start_offset), idx + 1) for idx in range(nb_item)]\n\n    quotient = nb_item // nb_chunks\n    remainder = nb_item % nb_chunks\n\n    quotients = [quotient] * nb_chunks\n    remainders = [1] * remainder + [0] * (nb_chunks - remainder)\n\n    nb_elems_per_chunk = [\n        quotient + remainder for quotient, remainder in zip(quotients, remainders)\n    ]\n\n    accumulated = list(itertools.accumulate(nb_elems_per_chunk))\n    shifted_accumulated = accumulated.copy()\n    shifted_accumulated.insert(0, 0)\n    shifted_accumulated.pop()\n\n    return [\n        slice(max(0, begin - start_offset), end)\n        for begin, end in zip(shifted_accumulated, accumulated)\n    ]\n\n\ndef df_indexed_like(df: DataFrame, axes: List[Index]) -> bool:\n    \"\"\"\n    Returns whether a data frame is indexed in the way specified by the\n    provided axes.\n\n    Used by DataFrameGroupBy to determine whether a group has been modified.\n\n    Function adapted from pandas.core.groupby.ops._is_indexed_like\n\n    Parameters\n    ----------\n    df : DataFrame\n        The data frame in question\n\n    axes : List[Index]\n        The axes to which the data frame is compared\n\n    Returns\n    -------\n    Whether or not the data frame is indexed in the same wa as the axes.\n    \"\"\"\n    if isinstance(df, DataFrame):\n        return df.axes[0].equals(axes[0])\n\n    return False\n\n\ndef get_pandas_version() -> Tuple[int, int]:\n    major_str, minor_str, *_ = pd.__version__.split(\".\")\n    return int(major_str), int(minor_str)\n\n\ndef get_axis_int(user_defined_function_kwargs: Dict[str, Any]):\n    axis = user_defined_function_kwargs.get(\"axis\", 0)\n\n    if axis not in {0, 1, \"index\", \"columns\"}:\n        raise ValueError(f\"No axis named {axis} for object type DataFrame\")\n\n    return {0: 0, 1: 1, \"index\": 0, \"columns\": 1}[axis]\n\n\nclass WorkerStatus(int, Enum):\n    Running = 0\n    Success = 1\n    Error = 2\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/core.py",
      "content": "import multiprocessing\nimport os\nimport pickle\nfrom itertools import count\nfrom multiprocessing.managers import SyncManager\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, cast\n\nimport dill\nimport pandas as pd\nimport psutil\nfrom pandas.core.groupby import DataFrameGroupBy as PandaDataFrameGroupBy\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom .data_types import (\n    DataFrame,\n    DataFrameGroupBy,\n    DataType,\n    ExpandingGroupBy,\n    RollingGroupBy,\n    Series,\n    SeriesRolling,\n)\nfrom .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\nfrom .utils import WorkerStatus\n\nON_WINDOWS = os.name == \"nt\"\nCONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n\n# Root of Memory File System\nMEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n\n# By default, Pandarallel use all available CPUs\nNB_PHYSICAL_CORES = psutil.cpu_count(logical=False)\n\n# Prefix and suffix for files used with Memory File System\nPREFIX = \"pandarallel\"\nPREFIX_INPUT = f\"{PREFIX}_input_\"\nPREFIX_OUTPUT = f\"{PREFIX}_output_\"\nSUFFIX = \".pickle\"\n\n# We use these classes decorators pattern instead of the classic one because of this:\n# https://www.stevenengelhardt.com/2013/01/16/python-multiprocessing-module-and-closures/\n\n\nclass WrapWorkFunctionForFileSystem:\n    def __init__(\n        self,\n        work_function: Callable[\n            [Any, Callable, tuple, Dict[str, Any], Dict[str, Any]], Any\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        input_file_path: Path,\n        output_file_path: Path,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> None:\n        try:\n            # Load dataframe from input file\n            with input_file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            # Delete input file since we don't need it any more. It will free some RAM\n            # since the input file is stored into Shared Memory.\n            input_file_path.unlink()\n\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            result = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            with output_file_path.open(\"wb\") as file_descriptor:\n                pickle.dump(result, file_descriptor)\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\nclass WrapWorkFunctionForPipe:\n    def __init__(\n        self,\n        work_function: Callable[\n            [\n                Any,\n                Callable,\n                tuple,\n                Dict[str, Any],\n                Dict[str, Any],\n            ],\n            Any,\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        data: Any,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        try:\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            results = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n            return results\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\ndef wrap_reduce_function_for_file_system(\n    reduce_function: Callable[[Iterator, Dict[str, Any]], Any]\n) -> Callable[[Iterator[Path], Dict[str, Any]], Any]:\n    \"\"\"This wrapper transforms a `reduce` function which takes as input:\n    - A list of pandas Dataframe\n    - An user defined function\n    and which returns a pandas Dataframe, into a `reduct` function which takes as input:\n    - A list of paths where  pandas Dataframe are pickled\n    which returns a pandas Dataframe.\n    \"\"\"\n\n    def closure(output_file_paths: Iterator[Path], extra: Dict[str, Any]) -> Any:\n        def get_dataframe_and_delete_file(file_path: Path) -> Any:\n            with file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            file_path.unlink()\n            return data\n\n        dfs = (\n            get_dataframe_and_delete_file(output_file_path)\n            for output_file_path in output_file_paths\n        )\n\n        return reduce_function(dfs, extra)\n\n    return closure\n\n\ndef parallelize_with_memory_file_system(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForFileSystem(data_type.work)\n        wrapped_reduce_function = wrap_reduce_function_for_file_system(data_type.reduce)\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        input_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_INPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        output_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_OUTPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        try:\n            for chunk, input_file in zip(chunks, input_files):\n                with Path(input_file.name).open(\"wb\") as file_descriptor:\n                    pickle.dump(chunk, file_descriptor)\n\n            dilled_user_defined_function = dill.dumps(user_defined_function)\n            manager: SyncManager = CONTEXT.Manager()\n            master_workers_queue = manager.Queue()\n\n            work_args_list = [\n                (\n                    Path(input_file.name),\n                    Path(output_file.name),\n                    progress_bars_type,\n                    worker_index,\n                    master_workers_queue,\n                    dilled_user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                    {\n                        **work_extra,\n                        **{\n                            \"master_workers_queue\": master_workers_queue,\n                            \"show_progress_bars\": show_progress_bars,\n                            \"worker_index\": worker_index,\n                        },\n                    },\n                )\n                for worker_index, (\n                    input_file,\n                    output_file,\n                ) in enumerate(zip(input_files, output_files))\n            ]\n\n            pool = CONTEXT.Pool(nb_workers)\n            results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n\n            pool.close()\n\n            generation = count()\n\n            while any(\n                (\n                    worker_status == WorkerStatus.Running\n                    for worker_status in workers_status\n                )\n            ):\n                message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n                worker_index, worker_status, payload = message\n                workers_status[worker_index] = worker_status\n\n                if worker_status == WorkerStatus.Success:\n                    progresses[worker_index] = progresses_length[worker_index]\n                    progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Running:\n                    progress = cast(int, payload)\n                    progresses[worker_index] = progress\n\n                    if next(generation) % nb_workers == 0:\n                        progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Error:\n                    progress_bars.set_error(worker_index)\n                    progress_bars.update(progresses)\n\n            try:\n                return wrapped_reduce_function(\n                    (Path(output_file.name) for output_file in output_files),\n                    reduce_extra,\n                )\n            except EOFError:\n                # Loading the files failed, this most likely means that there\n                # was some error during processing and the files were never\n                # saved at all.\n                results_promise.get()\n\n                # If the above statement does not raise an exception, that\n                # means the multiprocessing went well and we want to re-raise\n                # the original EOFError.\n                raise\n\n        finally:\n            for output_file in output_files:\n                # When pandarallel stop supporting Python 3.7 and older, replace this\n                # try/except clause by:\n                # Path(output_file.name).unlink(missing_ok=True)\n                try:\n                    Path(output_file.name).unlink()\n                except FileNotFoundError:\n                    # Do nothing, this is the nominal case.\n                    pass\n\n    return closure\n\n\ndef parallelize_with_pipe(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForPipe(data_type.work)\n        dilled_user_defined_function = dill.dumps(user_defined_function)\n        manager: SyncManager = CONTEXT.Manager()\n        master_workers_queue = manager.Queue()\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        work_args_list = [\n            (\n                chunk,\n                progress_bars_type,\n                worker_index,\n                master_workers_queue,\n                dilled_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                {\n                    **work_extra,\n                    **{\n                        \"master_workers_queue\": master_workers_queue,\n                        \"show_progress_bars\": show_progress_bars,\n                        \"worker_index\": worker_index,\n                    },\n                },\n            )\n            for worker_index, chunk in enumerate(chunks)\n        ]\n\n        pool = CONTEXT.Pool(nb_workers)\n        results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n        pool.close()\n\n        generation = count()\n\n        while any(\n            (worker_status == WorkerStatus.Running for worker_status in workers_status)\n        ):\n            message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n            worker_index, worker_status, payload = message\n            workers_status[worker_index] = worker_status\n\n            if worker_status == WorkerStatus.Success:\n                progresses[worker_index] = progresses_length[worker_index]\n                progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Running:\n                progress = cast(int, payload)\n                progresses[worker_index] = progress\n\n                if next(generation) % nb_workers == 0:\n                    progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Error:\n                progress_bars.set_error(worker_index)\n\n        results = results_promise.get()\n\n        return data_type.reduce(results, reduce_extra)\n\n    return closure\n\n\nclass pandarallel:\n    @classmethod\n    def initialize(\n        cls,\n        shm_size_mb=None,\n        nb_workers=NB_PHYSICAL_CORES,\n        progress_bar=False,\n        verbose=2,\n        use_memory_fs: Optional[bool] = None,\n    ) -> None:\n        show_progress_bars = progress_bar\n        is_memory_fs_available = Path(MEMORY_FS_ROOT).exists()\n\n        use_memory_fs = (\n            use_memory_fs if use_memory_fs is not None else is_memory_fs_available\n        )\n\n        parallelize = (\n            parallelize_with_memory_file_system\n            if use_memory_fs\n            else parallelize_with_pipe\n        )\n\n        if use_memory_fs and not is_memory_fs_available:\n            raise SystemError(\"Memory file system is not available\")\n\n        if verbose >= 2:\n            print(f\"INFO: Pandarallel will run on {nb_workers} workers.\")\n\n            message = (\n                (\n                    \"INFO: Pandarallel will use Memory file system to transfer data \"\n                    \"between the main process and workers.\"\n                )\n                if use_memory_fs\n                else (\n                    \"INFO: Pandarallel will use standard multiprocessing data transfer \"\n                    \"(pipe) to transfer data between the main process and workers.\"\n                )\n            )\n\n            print(message)\n\n            if ON_WINDOWS and verbose >= 2:\n                print()\n                print(\n                    (\n                        \"WARNING: You are on Windows. If you detect any issue with \"\n                        \"pandarallel, be sure you checked out the Troubleshooting page:\"\n                    )\n                )\n                print(\"https://nalepae.github.io/pandarallel/troubleshooting/\")\n\n        progress_bars_in_user_defined_function = (\n            ProgressBarsType.InUserDefinedFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_user_defined_function_multiply_by_number_of_columns = (\n            ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_work_function = (\n            ProgressBarsType.InWorkFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        # DataFrame\n        pd.DataFrame.parallel_apply = parallelize(\n            nb_workers, DataFrame.Apply, progress_bars_in_user_defined_function\n        )\n        pd.DataFrame.parallel_applymap = parallelize(\n            nb_workers,\n            DataFrame.ApplyMap,\n            progress_bars_in_user_defined_function_multiply_by_number_of_columns,\n        )\n\n        # DataFrame GroupBy\n        PandaDataFrameGroupBy.parallel_apply = parallelize(\n            nb_workers, DataFrameGroupBy.Apply, progress_bars_in_user_defined_function\n        )\n\n        # Expanding GroupBy\n        PandasExpandingGroupby.parallel_apply = parallelize(\n            nb_workers, ExpandingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Rolling GroupBy\n        PandasRollingGroupby.parallel_apply = parallelize(\n            nb_workers, RollingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Series\n        pd.Series.parallel_apply = parallelize(\n            nb_workers, Series.Apply, progress_bars_in_user_defined_function\n        )\n        pd.Series.parallel_map = parallelize(nb_workers, Series.Map, show_progress_bars)\n\n        # Series Rolling\n        pd.core.window.Rolling.parallel_apply = parallelize(\n            nb_workers, SeriesRolling.Apply, progress_bars_in_user_defined_function\n        )\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
      "content": "import multiprocessing\nimport os\nimport shutil\nimport sys\nfrom abc import ABC, abstractmethod\nfrom enum import Enum\nfrom itertools import count\nfrom time import time_ns\nfrom typing import Callable, List, Union\n\nfrom .utils import WorkerStatus\n\nINTERVAL_NS = 250_000_000  # 0.25 sec\nMINIMUM_TERMINAL_WIDTH = 72\n\n\nclass ProgressBarsType(int, Enum):\n    No = 0\n    InUserDefinedFunction = 1\n    InUserDefinedFunctionMultiplyByNumberOfColumns = 2\n    InWorkFunction = 3\n\n\nclass ProgressBars(ABC):\n    @abstractmethod\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        ...\n\n    @abstractmethod\n    def update(self, values: List[int]) -> None:\n        ...\n\n\n\nclass ProgressState:\n    def __init__(self, chunk_size: int) -> None:\n        self.last_put_iteration = 0\n        self.next_put_iteration = max(chunk_size // 100, 1)\n        self.last_put_time = time_ns()\n\n\ndef is_notebook_lab() -> bool:\n    try:\n        shell: str = get_ipython().__class__.__name__  # type: ignore\n\n        # Shell: Google Colab\n        # TerminalInteractiveShell: Terminal running IPython\n        # ZMQInteractiveShell: Jupyter notebook/lab or qtconsole\n        return shell in {\"Shell\", \"ZMQInteractiveShell\"}\n    except NameError:\n        # Probably standard Python interpreter\n        return False\n\n\nclass ProgressBarsConsole(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        self.__show = show\n        self.__bars = [[0, max] for max in maxs]\n        self.__width = self.__get_width()\n\n        self.__lines = self.__update_lines()\n\n        if show:\n            sys.stdout.write(\"\\n\".join(self.__lines))\n            sys.stdout.flush()\n\n    def __get_width(self) -> int:\n        try:\n            columns = shutil.get_terminal_size().columns\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except AttributeError:\n            # Python 2\n            pass\n\n        try:\n            columns = int(os.popen(\"stty size\", \"r\").read().split()[1])\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except:\n            return MINIMUM_TERMINAL_WIDTH\n\n    def __remove_displayed_lines(self) -> None:\n        if len(self.__bars) >= 1:\n            sys.stdout.write(\"\\b\" * len(self.__lines[-1]))\n\n        if len(self.__bars) >= 2:\n            sys.stdout.write(\"\\033M\" * (len(self.__lines) - 1))\n\n        self.__lines = []\n\n    def __update_line(self, done: int, total: int) -> str:\n        if total == 0:\n            percent = 0\n        else:\n            percent = done / total\n        bar = (\":\" * int(percent * 40)).ljust(40, \" \")\n        percent = round(percent * 100, 2)\n        format = \" {percent:6.2f}% {bar:s} | {done:8d} / {total:8d} |\"\n        ret = format.format(percent=percent, bar=bar, done=done, total=total)\n        return ret[: self.__width].ljust(self.__width, \" \")\n\n    def __update_lines(self) -> List[str]:\n        return [self.__update_line(value, max) for value, max in self.__bars]\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            self.__bars[index][0] = value\n\n        self.__remove_displayed_lines()\n        self.__lines = self.__update_lines()\n\n        sys.stdout.write(\"\\n\".join(self.__lines))\n        sys.stdout.flush()\n\n\nclass ProgressBarsNotebookLab(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        \"\"\"Initialization.\n        Positional argument:\n        maxs - List containing the max value of each progress bar\n        \"\"\"\n        self.__show = show\n\n        if not show:\n            return\n\n        from IPython.display import display\n        from ipywidgets import HBox, IntProgress, Label, VBox\n\n        self.__bars = [\n            HBox(\n                [\n                    IntProgress(0, 0, max, description=\"{:.2f}%\".format(0)),\n                    Label(\"{} / {}\".format(0, max)),\n                ]\n            )\n            for max in maxs\n        ]\n\n        display(VBox(self.__bars))\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            bar, label = self.__bars[index].children\n\n            label.value = \"{} / {}\".format(value, bar.max)\n            \n            bar.value = value\n\n            if value >= bar.max:\n                bar.bar_style = \"success\"\n\n            if bar.max != 0:\n                bar.description = \"{:.2f}%\".format(bar.value / bar.max * 100)\n\n    def set_error(self, index: int) -> None:\n        \"\"\"Set a bar on error\"\"\"\n        if not self.__show:\n            return\n\n        bar, _ = self.__bars[index].children\n        bar.bar_style = \"danger\"\n\n\ndef get_progress_bars(\n    maxs: List[int], show\n) -> Union[ProgressBarsNotebookLab, ProgressBarsConsole]:\n    return (\n        ProgressBarsNotebookLab(maxs, show)\n        if is_notebook_lab()\n        else ProgressBarsConsole(maxs, show)\n    )\n\n\ndef progress_wrapper(\n    user_defined_function: Callable,\n    master_workers_queue: multiprocessing.Queue,\n    index: int,\n    chunk_size: int,\n) -> Callable:\n    \"\"\"Wrap the function to apply in a function which monitor the part of work already\n    done.\n    \"\"\"\n    counter = count()\n    state = ProgressState(chunk_size)\n\n    def closure(*user_defined_function_args, **user_defined_functions_kwargs):\n        iteration = next(counter)\n\n        if iteration == state.next_put_iteration:\n            time_now = time_ns()\n            master_workers_queue.put_nowait((index, WorkerStatus.Running, iteration))\n\n            delta_t = time_now - state.last_put_time\n            delta_i = iteration - state.last_put_iteration\n\n            state.next_put_iteration += (\n                max(int((delta_i / delta_t) * INTERVAL_NS), 1) if delta_t != 0 else 1\n            )\n\n            state.last_put_iteration = iteration\n            state.last_put_time = time_now\n\n        return user_defined_function(\n            *user_defined_function_args, **user_defined_functions_kwargs\n        )\n\n    return closure\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/__init__.py",
      "content": "from .core import pandarallel\n\n__version__ = \"1.6.5\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/expanding_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass ExpandingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasExpandingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasExpandingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.expanding(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass Series:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n\n    class Map(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.map(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series_rolling.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\nfrom pandas.core.window.rolling import Rolling\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass SeriesRolling:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, rolling: Rolling, **kwargs\n        ) -> Iterator[pd.Series]:\n            chunks = chunk(rolling.obj.size, nb_workers, rolling.window)\n\n            for chunk_ in chunks:\n                yield rolling.obj[chunk_]\n\n        @staticmethod\n        def get_work_extra(data: Rolling) -> Dict[str, Any]:\n            return {\n                \"attributes\": {\n                    attribute: getattr(data, attribute)\n                    for attribute in data._attributes\n                }\n            }\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            attributes: Dict[str, Any] = extra[\"attributes\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            result = data.rolling(**attributes).apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n            return result if worker_index == 0 else result[attributes[\"window\"] :]\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/__init__.py",
      "content": "from .dataframe import DataFrame\nfrom .dataframe_groupby import DataFrameGroupBy\nfrom .expanding_groupby import ExpandingGroupBy\nfrom .rolling_groupby import RollingGroupBy\nfrom .generic import DataType\nfrom .series import Series\nfrom .series_rolling import SeriesRolling\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\nfrom types import GeneratorType\n\nimport pandas as pd\n\nfrom ..utils import chunk, get_axis_int\nfrom .generic import DataType\n\n\nclass DataFrame:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            user_defined_function_kwargs = kwargs[\"user_defined_function_kwargs\"]\n\n            axis_int = get_axis_int(user_defined_function_kwargs)\n            opposite_axis_int = 1 - axis_int\n\n            for chunk_ in chunk(data.shape[opposite_axis_int], nb_workers):\n                yield data.iloc[chunk_] if axis_int == 1 else data.iloc[:, chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs,\n            )\n\n        @staticmethod\n        def get_reduce_extra(\n            data: Any, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"axis\": get_axis_int(user_defined_function_kwargs)}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            if isinstance(datas, GeneratorType):\n                datas = list(datas)\n            axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n            return pd.concat(datas, copy=False, axis=axis)\n\n    class ApplyMap(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            for chunk_ in chunk(data.shape[0], nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.applymap(user_defined_function)\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/generic.py",
      "content": "from abc import ABC, abstractmethod\nfrom typing import Any, Callable, Dict, Iterable, Iterator\n\n\nclass DataType(ABC):\n    @staticmethod\n    @abstractmethod\n    def get_chunks(nb_workers: int, data: Any, **kwargs) -> Iterator[Any]:\n        ...\n\n    @staticmethod\n    def get_work_extra(data: Any) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def work(\n        data: Any,\n        user_defined_function: Callable,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        ...\n\n    @staticmethod\n    def get_reduce_extra(\n        data: Any, user_defined_function_kwargs: Dict[str, Any]\n    ) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def reduce(datas: Iterable[Any], extra: Dict[str, Any]) -> Any:\n        ...\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/rolling_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass RollingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasRollingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasRollingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.rolling(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe_groupby.py",
      "content": "import itertools\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple, Union, cast\n\nimport pandas as pd\nfrom pandas.core.groupby.generic import DataFrameGroupBy as PandasDataFrameGroupBy\n\nfrom ..utils import chunk, df_indexed_like, get_pandas_version\nfrom .generic import DataType\n\n\nclass DataFrameGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, dataframe_groupby: PandasDataFrameGroupBy, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            chunks = chunk(dataframe_groupby.ngroups, nb_workers)\n            iterator = iter(dataframe_groupby)\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[Tuple[int, pd.DataFrame, bool]]:\n            def compute_result(\n                key: int, df: pd.DataFrame\n            ) -> Tuple[int, pd.DataFrame, bool]:\n                result = user_defined_function(\n                    df, *user_defined_function_args, **user_defined_function_kwargs\n                )\n                mutated = not df_indexed_like(result, df.axes)\n                return key, result, mutated\n\n            return [compute_result(key, df) for key, df in data]\n\n        @staticmethod\n        def get_reduce_extra(\n            data: PandasDataFrameGroupBy, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"df_groupby\": data}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[List[Tuple[int, pd.DataFrame, bool]]], extra: Dict[str, Any]\n        ) -> pd.Series:\n            def get_args(\n                keys: List[int],\n                values: List[pd.DataFrame],\n                df_groupby: PandasDataFrameGroupBy,\n            ) -> Union[\n                Tuple[List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[pd.DataFrame]],\n            ]:\n                pandas_version = get_pandas_version()\n\n                if pandas_version < (1, 3):\n                    return keys, values\n                elif pandas_version < (1, 4):\n                    return df_groupby._selected_obj, keys, values\n                else:\n                    return df_groupby._selected_obj, values\n\n            df_groupby: PandasDataFrameGroupBy = extra[\"df_groupby\"]\n\n            results = itertools.chain.from_iterable(datas)\n            keys, values, mutated = zip(*results)\n\n            keys = cast(List[int], keys)\n            values = cast(List[pd.DataFrame], values)\n            mutated = cast(List[bool], mutated)\n\n            args = get_args(keys, values, df_groupby)\n  \n            return df_groupby._wrap_applied_output(*args, not_indexed_same=mutated)\n"
    }
  ],
  "OriginCode": [
    {
      "path": "nalepae_pandarallel/setup.py",
      "content": "from setuptools import setup\n\nsetup()\n"
    },
    {
      "path": "nalepae_pandarallel/tests/test_pandarallel.py",
      "content": "import importlib\nimport math\n\nimport numpy as np\nimport pandas as pd\nimport pytest\nfrom pandarallel import pandarallel\n\n\n@pytest.fixture(params=(1000, 1))\ndef df_size(request):\n    return request.param\n\n\n@pytest.fixture(params=(False, True))\ndef progress_bar(request):\n    return request.param\n\n\n@pytest.fixture(params=(None, False))\ndef use_memory_fs(request):\n    return request.param\n\n\n@pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\ndef exception(request):\n    return request.param\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0(request):\n    def func(x):\n        return max(x) - min(x)\n\n    return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1(request):\n    def func(x):\n        return math.sin(x.a**2) + math.sin(x.b**2)\n\n    return dict(\n        named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_applymap(request):\n    def func(x):\n        return math.sin(x**2) - math.cos(x**2)\n\n    return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n        request.param\n    ]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_map(request):\n    def func(x):\n        return math.log10(math.sqrt(math.exp(x**2)))\n\n    return dict(\n        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_apply(request):\n    def func(x, power, bias=0):\n        return math.log10(math.sqrt(math.exp(x**power))) + bias\n\n    return dict(\n        named=func,\n        anonymous=lambda x, power, bias=0: math.log10(math.sqrt(math.exp(x**power)))\n        + bias,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply():\n    def func(df):\n        dum = 0\n        for item in df.b:\n            dum += math.log10(math.sqrt(math.exp(item**2)))\n\n        return dum / len(df.b)\n\n    return func\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply_complex():\n    def func(df):\n        return pd.DataFrame(\n            [[df.b.mean(), df.b.min(), df.b.max()]],\n            columns=[\"b_mean\", \"b_min\", \"b_max\"],\n        )\n\n    return func\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_expanding_apply(request):\n    def func(x):\n        return (x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))).sum()\n\n    return dict(\n        named=func,\n        anonymous=lambda x: (\n            x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))\n        ).sum(),\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0_no_reduce(request):\n    def func(x):\n        return x\n\n    return dict(named=func, anonymous=lambda x: x)[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1_no_reduce(request):\n    def func(x):\n        return x**2\n\n    return dict(named=func, anonymous=lambda x: x**2)[request.param]\n\n\n@pytest.fixture\ndef pandarallel_init(progress_bar, use_memory_fs):\n    pandarallel.initialize(\n        progress_bar=progress_bar, use_memory_fs=use_memory_fs, nb_workers=2\n    )\n\n\ndef test_dataframe_apply_invalid_function(pandarallel_init, exception):\n    def f(_):\n        raise exception\n\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(exception):\n        df.parallel_apply(f)\n\n\ndef test_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0, df_size):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.randint(1, 8, df_size),\n            d=np.random.rand(df_size),\n            e=np.random.randint(1, 8, df_size),\n            f=np.random.rand(df_size),\n            g=np.random.randint(1, 8, df_size),\n            h=np.random.rand(df_size),\n        )\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_1, axis=1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1, axis=1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_invalid_axis(pandarallel_init):\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(ValueError):\n        df.parallel_apply(lambda x: x, axis=\"invalid\")\n    \ndef test_empty_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\ndef test_empty_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_applymap(pandarallel_init, func_dataframe_applymap, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.applymap(func_dataframe_applymap)\n    res_parallel = df.parallel_applymap(func_dataframe_applymap)\n    assert res.equals(res_parallel)\n\n\ndef test_series_map(pandarallel_init, func_series_map, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.map(func_series_map)\n    res_parallel = df.a.parallel_map(func_series_map)\n    assert res.equals(res_parallel)\n\n\ndef test_series_apply(pandarallel_init, func_series_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\ndef test_empty_series_apply(pandarallel_init, func_series_apply):\n    df = pd.DataFrame(dict(a=[]))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\n\ndef test_series_rolling_apply(pandarallel_init, func_series_rolling_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.randint(1, 8, df_size), b=list(range(df_size))))\n\n    res = df.b.rolling(4).apply(func_series_rolling_apply, raw=False)\n    res_parallel = df.b.rolling(4).parallel_apply(func_series_rolling_apply, raw=False)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply(\n    pandarallel_init, func_dataframe_groupby_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.rand(df_size),\n        )\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\", \"b\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\", \"b\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply_complex(\n    pandarallel_init, func_dataframe_groupby_apply_complex, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 100, df_size), b=np.random.rand(df_size))\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply_complex)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply_complex)\n    res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_rolling_apply(\n    pandarallel_init, func_dataframe_groupby_rolling_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .parallel_apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_expanding_apply(\n    pandarallel_init, func_dataframe_groupby_expanding_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .parallel_apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res.equals(res_parallel)\n\n\ndef test_dataframe_axis_0_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_0_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n    res = df.apply(func_dataframe_apply_axis_0_no_reduce)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0_no_reduce)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_axis_1_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n\n    res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    assert res.equals(res_parallel)\n\ndef test_memory_fs_root_environment_variable(monkeypatch):\n    monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n    from pandarallel import core\n    importlib.reload(core)\n\n    assert core.MEMORY_FS_ROOT == \"/test\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/utils.py",
      "content": "import itertools\nfrom enum import Enum\nfrom typing import Any, Dict, List, Tuple\n\nimport pandas as pd\nfrom pandas import DataFrame, Index\n\n\ndef chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n    \"\"\"\n    Return `nb_chunks` slices of approximatively `nb_item / nb_chunks` each.\n\n    Parameters\n    ----------\n    nb_item : int\n        Total number of items\n\n    nb_chunks : int\n        Number of chunks to return\n\n    start_offset : int\n        Shift start of slice by this amount\n\n    Returns\n    -------\n    A list of slices\n\n    Examples\n    --------\n    >>> chunks = chunk(103, 4)\n    >>> chunks\n    [slice(0, 26, None), slice(26, 52, None), slice(52, 78, None), slice(78, 103, None)]\n    \"\"\"\n    if nb_item == 0:\n        return [slice(0)]\n    \n    if nb_item <= nb_chunks:\n        return [slice(max(0, idx - start_offset), idx + 1) for idx in range(nb_item)]\n\n    quotient = nb_item // nb_chunks\n    remainder = nb_item % nb_chunks\n\n    quotients = [quotient] * nb_chunks\n    remainders = [1] * remainder + [0] * (nb_chunks - remainder)\n\n    nb_elems_per_chunk = [\n        quotient + remainder for quotient, remainder in zip(quotients, remainders)\n    ]\n\n    accumulated = list(itertools.accumulate(nb_elems_per_chunk))\n    shifted_accumulated = accumulated.copy()\n    shifted_accumulated.insert(0, 0)\n    shifted_accumulated.pop()\n\n    return [\n        slice(max(0, begin - start_offset), end)\n        for begin, end in zip(shifted_accumulated, accumulated)\n    ]\n\n\ndef df_indexed_like(df: DataFrame, axes: List[Index]) -> bool:\n    \"\"\"\n    Returns whether a data frame is indexed in the way specified by the\n    provided axes.\n\n    Used by DataFrameGroupBy to determine whether a group has been modified.\n\n    Function adapted from pandas.core.groupby.ops._is_indexed_like\n\n    Parameters\n    ----------\n    df : DataFrame\n        The data frame in question\n\n    axes : List[Index]\n        The axes to which the data frame is compared\n\n    Returns\n    -------\n    Whether or not the data frame is indexed in the same wa as the axes.\n    \"\"\"\n    if isinstance(df, DataFrame):\n        return df.axes[0].equals(axes[0])\n\n    return False\n\n\ndef get_pandas_version() -> Tuple[int, int]:\n    major_str, minor_str, *_ = pd.__version__.split(\".\")\n    return int(major_str), int(minor_str)\n\n\ndef get_axis_int(user_defined_function_kwargs: Dict[str, Any]):\n    axis = user_defined_function_kwargs.get(\"axis\", 0)\n\n    if axis not in {0, 1, \"index\", \"columns\"}:\n        raise ValueError(f\"No axis named {axis} for object type DataFrame\")\n\n    return {0: 0, 1: 1, \"index\": 0, \"columns\": 1}[axis]\n\n\nclass WorkerStatus(int, Enum):\n    Running = 0\n    Success = 1\n    Error = 2\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/core.py",
      "content": "import multiprocessing\nimport os\nimport pickle\nfrom itertools import count\nfrom multiprocessing.managers import SyncManager\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, cast\n\nimport dill\nimport pandas as pd\nimport psutil\nfrom pandas.core.groupby import DataFrameGroupBy as PandaDataFrameGroupBy\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom .data_types import (\n    DataFrame,\n    DataFrameGroupBy,\n    DataType,\n    ExpandingGroupBy,\n    RollingGroupBy,\n    Series,\n    SeriesRolling,\n)\nfrom .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\nfrom .utils import WorkerStatus\n\nON_WINDOWS = os.name == \"nt\"\nCONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n\n# Root of Memory File System\nMEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n\n# By default, Pandarallel use all available CPUs\nNB_PHYSICAL_CORES = psutil.cpu_count(logical=False)\n\n# Prefix and suffix for files used with Memory File System\nPREFIX = \"pandarallel\"\nPREFIX_INPUT = f\"{PREFIX}_input_\"\nPREFIX_OUTPUT = f\"{PREFIX}_output_\"\nSUFFIX = \".pickle\"\n\n# We use these classes decorators pattern instead of the classic one because of this:\n# https://www.stevenengelhardt.com/2013/01/16/python-multiprocessing-module-and-closures/\n\n\nclass WrapWorkFunctionForFileSystem:\n    def __init__(\n        self,\n        work_function: Callable[\n            [Any, Callable, tuple, Dict[str, Any], Dict[str, Any]], Any\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        input_file_path: Path,\n        output_file_path: Path,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> None:\n        try:\n            # Load dataframe from input file\n            with input_file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            # Delete input file since we don't need it any more. It will free some RAM\n            # since the input file is stored into Shared Memory.\n            input_file_path.unlink()\n\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            result = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            with output_file_path.open(\"wb\") as file_descriptor:\n                pickle.dump(result, file_descriptor)\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\nclass WrapWorkFunctionForPipe:\n    def __init__(\n        self,\n        work_function: Callable[\n            [\n                Any,\n                Callable,\n                tuple,\n                Dict[str, Any],\n                Dict[str, Any],\n            ],\n            Any,\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        data: Any,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        try:\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            results = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n            return results\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\ndef wrap_reduce_function_for_file_system(\n    reduce_function: Callable[[Iterator, Dict[str, Any]], Any]\n) -> Callable[[Iterator[Path], Dict[str, Any]], Any]:\n    \"\"\"This wrapper transforms a `reduce` function which takes as input:\n    - A list of pandas Dataframe\n    - An user defined function\n    and which returns a pandas Dataframe, into a `reduct` function which takes as input:\n    - A list of paths where  pandas Dataframe are pickled\n    which returns a pandas Dataframe.\n    \"\"\"\n\n    def closure(output_file_paths: Iterator[Path], extra: Dict[str, Any]) -> Any:\n        def get_dataframe_and_delete_file(file_path: Path) -> Any:\n            with file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            file_path.unlink()\n            return data\n\n        dfs = (\n            get_dataframe_and_delete_file(output_file_path)\n            for output_file_path in output_file_paths\n        )\n\n        return reduce_function(dfs, extra)\n\n    return closure\n\n\ndef parallelize_with_memory_file_system(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForFileSystem(data_type.work)\n        wrapped_reduce_function = wrap_reduce_function_for_file_system(data_type.reduce)\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        input_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_INPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        output_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_OUTPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        try:\n            for chunk, input_file in zip(chunks, input_files):\n                with Path(input_file.name).open(\"wb\") as file_descriptor:\n                    pickle.dump(chunk, file_descriptor)\n\n            dilled_user_defined_function = dill.dumps(user_defined_function)\n            manager: SyncManager = CONTEXT.Manager()\n            master_workers_queue = manager.Queue()\n\n            work_args_list = [\n                (\n                    Path(input_file.name),\n                    Path(output_file.name),\n                    progress_bars_type,\n                    worker_index,\n                    master_workers_queue,\n                    dilled_user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                    {\n                        **work_extra,\n                        **{\n                            \"master_workers_queue\": master_workers_queue,\n                            \"show_progress_bars\": show_progress_bars,\n                            \"worker_index\": worker_index,\n                        },\n                    },\n                )\n                for worker_index, (\n                    input_file,\n                    output_file,\n                ) in enumerate(zip(input_files, output_files))\n            ]\n\n            pool = CONTEXT.Pool(nb_workers)\n            results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n\n            pool.close()\n\n            generation = count()\n\n            while any(\n                (\n                    worker_status == WorkerStatus.Running\n                    for worker_status in workers_status\n                )\n            ):\n                message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n                worker_index, worker_status, payload = message\n                workers_status[worker_index] = worker_status\n\n                if worker_status == WorkerStatus.Success:\n                    progresses[worker_index] = progresses_length[worker_index]\n                    progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Running:\n                    progress = cast(int, payload)\n                    progresses[worker_index] = progress\n\n                    if next(generation) % nb_workers == 0:\n                        progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Error:\n                    progress_bars.set_error(worker_index)\n                    progress_bars.update(progresses)\n\n            try:\n                return wrapped_reduce_function(\n                    (Path(output_file.name) for output_file in output_files),\n                    reduce_extra,\n                )\n            except EOFError:\n                # Loading the files failed, this most likely means that there\n                # was some error during processing and the files were never\n                # saved at all.\n                results_promise.get()\n\n                # If the above statement does not raise an exception, that\n                # means the multiprocessing went well and we want to re-raise\n                # the original EOFError.\n                raise\n\n        finally:\n            for output_file in output_files:\n                # When pandarallel stop supporting Python 3.7 and older, replace this\n                # try/except clause by:\n                # Path(output_file.name).unlink(missing_ok=True)\n                try:\n                    Path(output_file.name).unlink()\n                except FileNotFoundError:\n                    # Do nothing, this is the nominal case.\n                    pass\n\n    return closure\n\n\ndef parallelize_with_pipe(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForPipe(data_type.work)\n        dilled_user_defined_function = dill.dumps(user_defined_function)\n        manager: SyncManager = CONTEXT.Manager()\n        master_workers_queue = manager.Queue()\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        work_args_list = [\n            (\n                chunk,\n                progress_bars_type,\n                worker_index,\n                master_workers_queue,\n                dilled_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                {\n                    **work_extra,\n                    **{\n                        \"master_workers_queue\": master_workers_queue,\n                        \"show_progress_bars\": show_progress_bars,\n                        \"worker_index\": worker_index,\n                    },\n                },\n            )\n            for worker_index, chunk in enumerate(chunks)\n        ]\n\n        pool = CONTEXT.Pool(nb_workers)\n        results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n        pool.close()\n\n        generation = count()\n\n        while any(\n            (worker_status == WorkerStatus.Running for worker_status in workers_status)\n        ):\n            message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n            worker_index, worker_status, payload = message\n            workers_status[worker_index] = worker_status\n\n            if worker_status == WorkerStatus.Success:\n                progresses[worker_index] = progresses_length[worker_index]\n                progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Running:\n                progress = cast(int, payload)\n                progresses[worker_index] = progress\n\n                if next(generation) % nb_workers == 0:\n                    progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Error:\n                progress_bars.set_error(worker_index)\n\n        results = results_promise.get()\n\n        return data_type.reduce(results, reduce_extra)\n\n    return closure\n\n\nclass pandarallel:\n    @classmethod\n    def initialize(\n        cls,\n        shm_size_mb=None,\n        nb_workers=NB_PHYSICAL_CORES,\n        progress_bar=False,\n        verbose=2,\n        use_memory_fs: Optional[bool] = None,\n    ) -> None:\n        show_progress_bars = progress_bar\n        is_memory_fs_available = Path(MEMORY_FS_ROOT).exists()\n\n        use_memory_fs = (\n            use_memory_fs if use_memory_fs is not None else is_memory_fs_available\n        )\n\n        parallelize = (\n            parallelize_with_memory_file_system\n            if use_memory_fs\n            else parallelize_with_pipe\n        )\n\n        if use_memory_fs and not is_memory_fs_available:\n            raise SystemError(\"Memory file system is not available\")\n\n        if verbose >= 2:\n            print(f\"INFO: Pandarallel will run on {nb_workers} workers.\")\n\n            message = (\n                (\n                    \"INFO: Pandarallel will use Memory file system to transfer data \"\n                    \"between the main process and workers.\"\n                )\n                if use_memory_fs\n                else (\n                    \"INFO: Pandarallel will use standard multiprocessing data transfer \"\n                    \"(pipe) to transfer data between the main process and workers.\"\n                )\n            )\n\n            print(message)\n\n            if ON_WINDOWS and verbose >= 2:\n                print()\n                print(\n                    (\n                        \"WARNING: You are on Windows. If you detect any issue with \"\n                        \"pandarallel, be sure you checked out the Troubleshooting page:\"\n                    )\n                )\n                print(\"https://nalepae.github.io/pandarallel/troubleshooting/\")\n\n        progress_bars_in_user_defined_function = (\n            ProgressBarsType.InUserDefinedFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_user_defined_function_multiply_by_number_of_columns = (\n            ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_work_function = (\n            ProgressBarsType.InWorkFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        # DataFrame\n        pd.DataFrame.parallel_apply = parallelize(\n            nb_workers, DataFrame.Apply, progress_bars_in_user_defined_function\n        )\n        pd.DataFrame.parallel_applymap = parallelize(\n            nb_workers,\n            DataFrame.ApplyMap,\n            progress_bars_in_user_defined_function_multiply_by_number_of_columns,\n        )\n\n        # DataFrame GroupBy\n        PandaDataFrameGroupBy.parallel_apply = parallelize(\n            nb_workers, DataFrameGroupBy.Apply, progress_bars_in_user_defined_function\n        )\n\n        # Expanding GroupBy\n        PandasExpandingGroupby.parallel_apply = parallelize(\n            nb_workers, ExpandingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Rolling GroupBy\n        PandasRollingGroupby.parallel_apply = parallelize(\n            nb_workers, RollingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Series\n        pd.Series.parallel_apply = parallelize(\n            nb_workers, Series.Apply, progress_bars_in_user_defined_function\n        )\n        pd.Series.parallel_map = parallelize(nb_workers, Series.Map, show_progress_bars)\n\n        # Series Rolling\n        pd.core.window.Rolling.parallel_apply = parallelize(\n            nb_workers, SeriesRolling.Apply, progress_bars_in_user_defined_function\n        )\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
      "content": "import multiprocessing\nimport os\nimport shutil\nimport sys\nfrom abc import ABC, abstractmethod\nfrom enum import Enum\nfrom itertools import count\nfrom time import time_ns\nfrom typing import Callable, List, Union\n\nfrom .utils import WorkerStatus\n\nINTERVAL_NS = 250_000_000  # 0.25 sec\nMINIMUM_TERMINAL_WIDTH = 72\n\n\nclass ProgressBarsType(int, Enum):\n    No = 0\n    InUserDefinedFunction = 1\n    InUserDefinedFunctionMultiplyByNumberOfColumns = 2\n    InWorkFunction = 3\n\n\nclass ProgressBars(ABC):\n    @abstractmethod\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        ...\n\n    @abstractmethod\n    def update(self, values: List[int]) -> None:\n        ...\n\n    def set_error(self, index: int) -> None:\n        pass\n\n\nclass ProgressState:\n    def __init__(self, chunk_size: int) -> None:\n        self.last_put_iteration = 0\n        self.next_put_iteration = max(chunk_size // 100, 1)\n        self.last_put_time = time_ns()\n\n\ndef is_notebook_lab() -> bool:\n    try:\n        shell: str = get_ipython().__class__.__name__  # type: ignore\n\n        # Shell: Google Colab\n        # TerminalInteractiveShell: Terminal running IPython\n        # ZMQInteractiveShell: Jupyter notebook/lab or qtconsole\n        return shell in {\"Shell\", \"ZMQInteractiveShell\"}\n    except NameError:\n        # Probably standard Python interpreter\n        return False\n\n\nclass ProgressBarsConsole(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        self.__show = show\n        self.__bars = [[0, max] for max in maxs]\n        self.__width = self.__get_width()\n\n        self.__lines = self.__update_lines()\n\n        if show:\n            sys.stdout.write(\"\\n\".join(self.__lines))\n            sys.stdout.flush()\n\n    def __get_width(self) -> int:\n        try:\n            columns = shutil.get_terminal_size().columns\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except AttributeError:\n            # Python 2\n            pass\n\n        try:\n            columns = int(os.popen(\"stty size\", \"r\").read().split()[1])\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except:\n            return MINIMUM_TERMINAL_WIDTH\n\n    def __remove_displayed_lines(self) -> None:\n        if len(self.__bars) >= 1:\n            sys.stdout.write(\"\\b\" * len(self.__lines[-1]))\n\n        if len(self.__bars) >= 2:\n            sys.stdout.write(\"\\033M\" * (len(self.__lines) - 1))\n\n        self.__lines = []\n\n    def __update_line(self, done: int, total: int) -> str:\n        if total == 0:\n            percent = 0\n        else:\n            percent = done / total\n        bar = (\":\" * int(percent * 40)).ljust(40, \" \")\n        percent = round(percent * 100, 2)\n        format = \" {percent:6.2f}% {bar:s} | {done:8d} / {total:8d} |\"\n        ret = format.format(percent=percent, bar=bar, done=done, total=total)\n        return ret[: self.__width].ljust(self.__width, \" \")\n\n    def __update_lines(self) -> List[str]:\n        return [self.__update_line(value, max) for value, max in self.__bars]\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            self.__bars[index][0] = value\n\n        self.__remove_displayed_lines()\n        self.__lines = self.__update_lines()\n\n        sys.stdout.write(\"\\n\".join(self.__lines))\n        sys.stdout.flush()\n\n\nclass ProgressBarsNotebookLab(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        \"\"\"Initialization.\n        Positional argument:\n        maxs - List containing the max value of each progress bar\n        \"\"\"\n        self.__show = show\n\n        if not show:\n            return\n\n        from IPython.display import display\n        from ipywidgets import HBox, IntProgress, Label, VBox\n\n        self.__bars = [\n            HBox(\n                [\n                    IntProgress(0, 0, max, description=\"{:.2f}%\".format(0)),\n                    Label(\"{} / {}\".format(0, max)),\n                ]\n            )\n            for max in maxs\n        ]\n\n        display(VBox(self.__bars))\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            bar, label = self.__bars[index].children\n\n            label.value = \"{} / {}\".format(value, bar.max)\n            \n            bar.value = value\n\n            if value >= bar.max:\n                bar.bar_style = \"success\"\n\n            if bar.max != 0:\n                bar.description = \"{:.2f}%\".format(bar.value / bar.max * 100)\n\n    def set_error(self, index: int) -> None:\n        \"\"\"Set a bar on error\"\"\"\n        if not self.__show:\n            return\n\n        bar, _ = self.__bars[index].children\n        bar.bar_style = \"danger\"\n\n\ndef get_progress_bars(\n    maxs: List[int], show\n) -> Union[ProgressBarsNotebookLab, ProgressBarsConsole]:\n    return (\n        ProgressBarsNotebookLab(maxs, show)\n        if is_notebook_lab()\n        else ProgressBarsConsole(maxs, show)\n    )\n\n\ndef progress_wrapper(\n    user_defined_function: Callable,\n    master_workers_queue: multiprocessing.Queue,\n    index: int,\n    chunk_size: int,\n) -> Callable:\n    \"\"\"Wrap the function to apply in a function which monitor the part of work already\n    done.\n    \"\"\"\n    counter = count()\n    state = ProgressState(chunk_size)\n\n    def closure(*user_defined_function_args, **user_defined_functions_kwargs):\n        iteration = next(counter)\n\n        if iteration == state.next_put_iteration:\n            time_now = time_ns()\n            master_workers_queue.put_nowait((index, WorkerStatus.Running, iteration))\n\n            delta_t = time_now - state.last_put_time\n            delta_i = iteration - state.last_put_iteration\n\n            state.next_put_iteration += (\n                max(int((delta_i / delta_t) * INTERVAL_NS), 1) if delta_t != 0 else 1\n            )\n\n            state.last_put_iteration = iteration\n            state.last_put_time = time_now\n\n        return user_defined_function(\n            *user_defined_function_args, **user_defined_functions_kwargs\n        )\n\n    return closure\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/__init__.py",
      "content": "from .core import pandarallel\n\n__version__ = \"1.6.5\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/expanding_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass ExpandingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasExpandingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasExpandingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.expanding(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass Series:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n\n    class Map(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.map(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series_rolling.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\nfrom pandas.core.window.rolling import Rolling\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass SeriesRolling:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, rolling: Rolling, **kwargs\n        ) -> Iterator[pd.Series]:\n            chunks = chunk(rolling.obj.size, nb_workers, rolling.window)\n\n            for chunk_ in chunks:\n                yield rolling.obj[chunk_]\n\n        @staticmethod\n        def get_work_extra(data: Rolling) -> Dict[str, Any]:\n            return {\n                \"attributes\": {\n                    attribute: getattr(data, attribute)\n                    for attribute in data._attributes\n                }\n            }\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            attributes: Dict[str, Any] = extra[\"attributes\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            result = data.rolling(**attributes).apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n            return result if worker_index == 0 else result[attributes[\"window\"] :]\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/__init__.py",
      "content": "from .dataframe import DataFrame\nfrom .dataframe_groupby import DataFrameGroupBy\nfrom .expanding_groupby import ExpandingGroupBy\nfrom .rolling_groupby import RollingGroupBy\nfrom .generic import DataType\nfrom .series import Series\nfrom .series_rolling import SeriesRolling\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\nfrom types import GeneratorType\n\nimport pandas as pd\n\nfrom ..utils import chunk, get_axis_int\nfrom .generic import DataType\n\n\nclass DataFrame:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            user_defined_function_kwargs = kwargs[\"user_defined_function_kwargs\"]\n\n            axis_int = get_axis_int(user_defined_function_kwargs)\n            opposite_axis_int = 1 - axis_int\n\n            for chunk_ in chunk(data.shape[opposite_axis_int], nb_workers):\n                yield data.iloc[chunk_] if axis_int == 1 else data.iloc[:, chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs,\n            )\n\n        @staticmethod\n        def get_reduce_extra(\n            data: Any, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"axis\": get_axis_int(user_defined_function_kwargs)}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            if isinstance(datas, GeneratorType):\n                datas = list(datas)\n            axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n            return pd.concat(datas, copy=False, axis=axis)\n\n    class ApplyMap(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            for chunk_ in chunk(data.shape[0], nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.applymap(user_defined_function)\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/generic.py",
      "content": "from abc import ABC, abstractmethod\nfrom typing import Any, Callable, Dict, Iterable, Iterator\n\n\nclass DataType(ABC):\n    @staticmethod\n    @abstractmethod\n    def get_chunks(nb_workers: int, data: Any, **kwargs) -> Iterator[Any]:\n        ...\n\n    @staticmethod\n    def get_work_extra(data: Any) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def work(\n        data: Any,\n        user_defined_function: Callable,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        ...\n\n    @staticmethod\n    def get_reduce_extra(\n        data: Any, user_defined_function_kwargs: Dict[str, Any]\n    ) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def reduce(datas: Iterable[Any], extra: Dict[str, Any]) -> Any:\n        ...\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/rolling_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass RollingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasRollingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasRollingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.rolling(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe_groupby.py",
      "content": "import itertools\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple, Union, cast\n\nimport pandas as pd\nfrom pandas.core.groupby.generic import DataFrameGroupBy as PandasDataFrameGroupBy\n\nfrom ..utils import chunk, df_indexed_like, get_pandas_version\nfrom .generic import DataType\n\n\nclass DataFrameGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, dataframe_groupby: PandasDataFrameGroupBy, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            chunks = chunk(dataframe_groupby.ngroups, nb_workers)\n            iterator = iter(dataframe_groupby)\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[Tuple[int, pd.DataFrame, bool]]:\n            def compute_result(\n                key: int, df: pd.DataFrame\n            ) -> Tuple[int, pd.DataFrame, bool]:\n                result = user_defined_function(\n                    df, *user_defined_function_args, **user_defined_function_kwargs\n                )\n                mutated = not df_indexed_like(result, df.axes)\n                return key, result, mutated\n\n            return [compute_result(key, df) for key, df in data]\n\n        @staticmethod\n        def get_reduce_extra(\n            data: PandasDataFrameGroupBy, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"df_groupby\": data}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[List[Tuple[int, pd.DataFrame, bool]]], extra: Dict[str, Any]\n        ) -> pd.Series:\n            def get_args(\n                keys: List[int],\n                values: List[pd.DataFrame],\n                df_groupby: PandasDataFrameGroupBy,\n            ) -> Union[\n                Tuple[List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[pd.DataFrame]],\n            ]:\n                pandas_version = get_pandas_version()\n\n                if pandas_version < (1, 3):\n                    return keys, values\n                elif pandas_version < (1, 4):\n                    return df_groupby._selected_obj, keys, values\n                else:\n                    return df_groupby._selected_obj, values\n\n            df_groupby: PandasDataFrameGroupBy = extra[\"df_groupby\"]\n\n            results = itertools.chain.from_iterable(datas)\n            keys, values, mutated = zip(*results)\n\n            keys = cast(List[int], keys)\n            values = cast(List[pd.DataFrame], values)\n            mutated = cast(List[bool], mutated)\n\n            args = get_args(keys, values, df_groupby)\n  \n            return df_groupby._wrap_applied_output(*args, not_indexed_same=mutated)\n"
    }
  ],
  "ErrorMessage": "--------------------------------------------------------- Captured stdout setup ----------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n---------------------------------------------------------- Captured stdout call ----------------------------------------------------------\n   0.00%                                          |        0 /        4 |      \n_____________________________________ test_dataframe_apply_invalid_function[RuntimeError-True-False] _____________________________________\n\npandarallel_init = None, exception = <class 'RuntimeError'>\n\n    def test_dataframe_apply_invalid_function(pandarallel_init, exception):\n        def f(_):\n            raise exception\n    \n        df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n    \n        with pytest.raises(exception):\n>           df.parallel_apply(f)\n\ntests/test_pandarallel.py:174: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndata =    a\n0  1\n1  2\n2  3\n3  4, user_defined_function = <function test_dataframe_apply_invalid_function.<locals>.f at 0x7097b4496dc0>\nuser_defined_function_args = (), user_defined_function_kwargs = {}\nwrapped_work_function = <pandarallel.core.WrapWorkFunctionForPipe object at 0x7097b5e72ca0>\nmanager = <multiprocessing.managers.SyncManager object at 0x7097b5de2a90>, chunks = [   a\n0  1\n1  2\n2  3\n3  4], nb_workers = 1\nprogresses_length = [4], reduce_extra = {'axis': 0}\nprogress_bars = <pandarallel.progress_bars.ProgressBarsConsole object at 0x7097b5dd78e0>, progresses = [0]\nworkers_status = [<WorkerStatus.Error: 2>]\nwork_args_list = [(   a\n0  1\n1  2\n2  3\n3  4, <ProgressBarsType.InUserDefinedFunction: 1>, 0, <AutoProxy[Queue] object, typeid 'Queue' a...x13\\x8c\\rcell_contents\\x94h\\x00\\x8c\\n_load_type\\x94\\x93\\x94\\x8c\\x0cRuntimeError\\x94\\x85\\x94R\\x94\\x87\\x94R0.', (), ...)]\n\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForPipe(data_type.work)\n        dilled_user_defined_function = dill.dumps(user_defined_function)\n        manager: SyncManager = CONTEXT.Manager()\n        master_workers_queue = manager.Queue()\n    \n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n    \n        nb_workers = len(chunks)\n    \n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n    \n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n    \n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n    \n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n    \n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n    \n        work_args_list = [\n            (\n                chunk,\n                progress_bars_type,\n                worker_index,\n                master_workers_queue,\n                dilled_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                {\n                    **work_extra,\n                    **{\n                        \"master_workers_queue\": master_workers_queue,\n                        \"show_progress_bars\": show_progress_bars,\n                        \"worker_index\": worker_index,\n                    },\n                },\n            )\n            for worker_index, chunk in enumerate(chunks)\n        ]\n    \n        pool = CONTEXT.Pool(nb_workers)\n        results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n        pool.close()\n    \n        generation = count()\n    \n        while any(\n            (worker_status == WorkerStatus.Running for worker_status in workers_status)\n        ):\n            message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n            worker_index, worker_status, payload = message\n            workers_status[worker_index] = worker_status\n    \n            if worker_status == WorkerStatus.Success:\n                progresses[worker_index] = progresses_length[worker_index]\n                progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Running:\n                progress = cast(int, payload)\n                progresses[worker_index] = progress\n    \n                if next(generation) % nb_workers == 0:\n                    progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Error:\n>               progress_bars.set_error(worker_index)\nE               AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\n\npandarallel/core.py:442: AttributeError\n--------------------------------------------------------- Captured stdout setup ----------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n---------------------------------------------------------- Captured stdout call ----------------------------------------------------------\n   0.00%                                          |        0 /        4 |      \n__________________________________ test_dataframe_apply_invalid_function[ZeroDivisionError-False-None] ___________________________________\n\npandarallel_init = None, exception = <class 'ZeroDivisionError'>\n\n    def test_dataframe_apply_invalid_function(pandarallel_init, exception):\n        def f(_):\n            raise exception\n    \n        df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n    \n        with pytest.raises(exception):\n>           df.parallel_apply(f)\n\ntests/test_pandarallel.py:174: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndata =    a\n0  1\n1  2\n2  3\n3  4, user_defined_function = <function test_dataframe_apply_invalid_function.<locals>.f at 0x7097b5582dc0>\nuser_defined_function_args = (), user_defined_function_kwargs = {}\nwrapped_work_function = <pandarallel.core.WrapWorkFunctionForFileSystem object at 0x7097b44afb80>\nwrapped_reduce_function = <function wrap_reduce_function_for_file_system.<locals>.closure at 0x7097b5e01670>\nchunks = [   a\n0  1\n1  2\n2  3\n3  4], nb_workers = 1, progresses_length = [4], reduce_extra = {'axis': 0}\nprogress_bars = <pandarallel.progress_bars.ProgressBarsConsole object at 0x7097b44af580>, progresses = [0]\nworkers_status = [<WorkerStatus.Error: 2>], input_files = [<tempfile._TemporaryFileWrapper object at 0x7097b44affa0>]\n\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForFileSystem(data_type.work)\n        wrapped_reduce_function = wrap_reduce_function_for_file_system(data_type.reduce)\n    \n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n    \n        nb_workers = len(chunks)\n    \n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n    \n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n    \n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n    \n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n    \n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n    \n        input_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_INPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n    \n        output_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_OUTPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n    \n        try:\n            for chunk, input_file in zip(chunks, input_files):\n                with Path(input_file.name).open(\"wb\") as file_descriptor:\n                    pickle.dump(chunk, file_descriptor)\n    \n            dilled_user_defined_function = dill.dumps(user_defined_function)\n            manager: SyncManager = CONTEXT.Manager()\n            master_workers_queue = manager.Queue()\n    \n            work_args_list = [\n                (\n                    Path(input_file.name),\n                    Path(output_file.name),\n                    progress_bars_type,\n                    worker_index,\n                    master_workers_queue,\n                    dilled_user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                    {\n                        **work_extra,\n                        **{\n                            \"master_workers_queue\": master_workers_queue,\n                            \"show_progress_bars\": show_progress_bars,\n                            \"worker_index\": worker_index,\n                        },\n                    },\n                )\n                for worker_index, (\n                    input_file,\n                    output_file,\n                ) in enumerate(zip(input_files, output_files))\n            ]\n    \n            pool = CONTEXT.Pool(nb_workers)\n            results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n    \n            pool.close()\n    \n            generation = count()\n    \n            while any(\n                (\n                    worker_status == WorkerStatus.Running\n                    for worker_status in workers_status\n                )\n            ):\n                message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n                worker_index, worker_status, payload = message\n                workers_status[worker_index] = worker_status\n    \n                if worker_status == WorkerStatus.Success:\n                    progresses[worker_index] = progresses_length[worker_index]\n                    progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Running:\n                    progress = cast(int, payload)\n                    progresses[worker_index] = progress\n    \n                    if next(generation) % nb_workers == 0:\n                        progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Error:\n>                   progress_bars.set_error(worker_index)\nE                   AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\n\npandarallel/core.py:321: AttributeError\n--------------------------------------------------------- Captured stdout setup ----------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n__________________________________ test_dataframe_apply_invalid_function[ZeroDivisionError-False-False] __________________________________\n\npandarallel_init = None, exception = <class 'ZeroDivisionError'>\n\n    def test_dataframe_apply_invalid_function(pandarallel_init, exception):\n        def f(_):\n            raise exception\n    \n        df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n    \n        with pytest.raises(exception):\n>           df.parallel_apply(f)\n\ntests/test_pandarallel.py:174: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndata =    a\n0  1\n1  2\n2  3\n3  4, user_defined_function = <function test_dataframe_apply_invalid_function.<locals>.f at 0x7097b5582a60>\nuser_defined_function_args = (), user_defined_function_kwargs = {}\nwrapped_work_function = <pandarallel.core.WrapWorkFunctionForPipe object at 0x7097b44da1f0>\nmanager = <multiprocessing.managers.SyncManager object at 0x7097b44da430>, chunks = [   a\n0  1\n1  2\n2  3\n3  4], nb_workers = 1\nprogresses_length = [4], reduce_extra = {'axis': 0}\nprogress_bars = <pandarallel.progress_bars.ProgressBarsConsole object at 0x7097b44da280>, progresses = [0]\nworkers_status = [<WorkerStatus.Error: 2>]\nwork_args_list = [(   a\n0  1\n1  2\n2  3\n3  4, <ProgressBarsType.No: 0>, 0, <AutoProxy[Queue] object, typeid 'Queue' at 0x7097b44da340>, ...8c\\rcell_contents\\x94h\\x00\\x8c\\n_load_type\\x94\\x93\\x94\\x8c\\x11ZeroDivisionError\\x94\\x85\\x94R\\x94\\x87\\x94R0.', (), ...)]\n\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForPipe(data_type.work)\n        dilled_user_defined_function = dill.dumps(user_defined_function)\n        manager: SyncManager = CONTEXT.Manager()\n        master_workers_queue = manager.Queue()\n    \n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n    \n        nb_workers = len(chunks)\n    \n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n    \n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n    \n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n    \n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n    \n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n    \n        work_args_list = [\n            (\n                chunk,\n                progress_bars_type,\n                worker_index,\n                master_workers_queue,\n                dilled_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                {\n                    **work_extra,\n                    **{\n                        \"master_workers_queue\": master_workers_queue,\n                        \"show_progress_bars\": show_progress_bars,\n                        \"worker_index\": worker_index,\n                    },\n                },\n            )\n            for worker_index, chunk in enumerate(chunks)\n        ]\n    \n        pool = CONTEXT.Pool(nb_workers)\n        results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n        pool.close()\n    \n        generation = count()\n    \n        while any(\n            (worker_status == WorkerStatus.Running for worker_status in workers_status)\n        ):\n            message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n            worker_index, worker_status, payload = message\n            workers_status[worker_index] = worker_status\n    \n            if worker_status == WorkerStatus.Success:\n                progresses[worker_index] = progresses_length[worker_index]\n                progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Running:\n                progress = cast(int, payload)\n                progresses[worker_index] = progress\n    \n                if next(generation) % nb_workers == 0:\n                    progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Error:\n>               progress_bars.set_error(worker_index)\nE               AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\n\npandarallel/core.py:442: AttributeError\n--------------------------------------------------------- Captured stdout setup ----------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n___________________________________ test_dataframe_apply_invalid_function[ZeroDivisionError-True-None] ___________________________________\n\npandarallel_init = None, exception = <class 'ZeroDivisionError'>\n\n    def test_dataframe_apply_invalid_function(pandarallel_init, exception):\n        def f(_):\n            raise exception\n    \n        df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n    \n        with pytest.raises(exception):\n>           df.parallel_apply(f)\n\ntests/test_pandarallel.py:174: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndata =    a\n0  1\n1  2\n2  3\n3  4, user_defined_function = <function test_dataframe_apply_invalid_function.<locals>.f at 0x7097b557d160>\nuser_defined_function_args = (), user_defined_function_kwargs = {}\nwrapped_work_function = <pandarallel.core.WrapWorkFunctionForFileSystem object at 0x7097b44d33d0>\nwrapped_reduce_function = <function wrap_reduce_function_for_file_system.<locals>.closure at 0x7097b44be550>\nchunks = [   a\n0  1\n1  2\n2  3\n3  4], nb_workers = 1, progresses_length = [4], reduce_extra = {'axis': 0}\nprogress_bars = <pandarallel.progress_bars.ProgressBarsConsole object at 0x7097b44d3250>, progresses = [0]\nworkers_status = [<WorkerStatus.Error: 2>], input_files = [<tempfile._TemporaryFileWrapper object at 0x7097b44d3c70>]\n\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForFileSystem(data_type.work)\n        wrapped_reduce_function = wrap_reduce_function_for_file_system(data_type.reduce)\n    \n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n    \n        nb_workers = len(chunks)\n    \n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n    \n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n    \n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n    \n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n    \n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n    \n        input_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_INPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n    \n        output_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_OUTPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n    \n        try:\n            for chunk, input_file in zip(chunks, input_files):\n                with Path(input_file.name).open(\"wb\") as file_descriptor:\n                    pickle.dump(chunk, file_descriptor)\n    \n            dilled_user_defined_function = dill.dumps(user_defined_function)\n            manager: SyncManager = CONTEXT.Manager()\n            master_workers_queue = manager.Queue()\n    \n            work_args_list = [\n                (\n                    Path(input_file.name),\n                    Path(output_file.name),\n                    progress_bars_type,\n                    worker_index,\n                    master_workers_queue,\n                    dilled_user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                    {\n                        **work_extra,\n                        **{\n                            \"master_workers_queue\": master_workers_queue,\n                            \"show_progress_bars\": show_progress_bars,\n                            \"worker_index\": worker_index,\n                        },\n                    },\n                )\n                for worker_index, (\n                    input_file,\n                    output_file,\n                ) in enumerate(zip(input_files, output_files))\n            ]\n    \n            pool = CONTEXT.Pool(nb_workers)\n            results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n    \n            pool.close()\n    \n            generation = count()\n    \n            while any(\n                (\n                    worker_status == WorkerStatus.Running\n                    for worker_status in workers_status\n                )\n            ):\n                message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n                worker_index, worker_status, payload = message\n                workers_status[worker_index] = worker_status\n    \n                if worker_status == WorkerStatus.Success:\n                    progresses[worker_index] = progresses_length[worker_index]\n                    progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Running:\n                    progress = cast(int, payload)\n                    progresses[worker_index] = progress\n    \n                    if next(generation) % nb_workers == 0:\n                        progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Error:\n>                   progress_bars.set_error(worker_index)\nE                   AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\n\npandarallel/core.py:321: AttributeError\n--------------------------------------------------------- Captured stdout setup ----------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n---------------------------------------------------------- Captured stdout call ----------------------------------------------------------\n   0.00%                                          |        0 /        4 |      \n__________________________________ test_dataframe_apply_invalid_function[ZeroDivisionError-True-False] ___________________________________\n\npandarallel_init = None, exception = <class 'ZeroDivisionError'>\n\n    def test_dataframe_apply_invalid_function(pandarallel_init, exception):\n        def f(_):\n            raise exception\n    \n        df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n    \n        with pytest.raises(exception):\n>           df.parallel_apply(f)\n\ntests/test_pandarallel.py:174: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndata =    a\n0  1\n1  2\n2  3\n3  4, user_defined_function = <function test_dataframe_apply_invalid_function.<locals>.f at 0x7097b5dc9820>\nuser_defined_function_args = (), user_defined_function_kwargs = {}\nwrapped_work_function = <pandarallel.core.WrapWorkFunctionForPipe object at 0x7097b4487250>\nmanager = <multiprocessing.managers.SyncManager object at 0x7097b44872e0>, chunks = [   a\n0  1\n1  2\n2  3\n3  4], nb_workers = 1\nprogresses_length = [4], reduce_extra = {'axis': 0}\nprogress_bars = <pandarallel.progress_bars.ProgressBarsConsole object at 0x7097b4487a30>, progresses = [0]\nworkers_status = [<WorkerStatus.Error: 2>]\nwork_args_list = [(   a\n0  1\n1  2\n2  3\n3  4, <ProgressBarsType.InUserDefinedFunction: 1>, 0, <AutoProxy[Queue] object, typeid 'Queue' a...8c\\rcell_contents\\x94h\\x00\\x8c\\n_load_type\\x94\\x93\\x94\\x8c\\x11ZeroDivisionError\\x94\\x85\\x94R\\x94\\x87\\x94R0.', (), ...)]\n\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForPipe(data_type.work)\n        dilled_user_defined_function = dill.dumps(user_defined_function)\n        manager: SyncManager = CONTEXT.Manager()\n        master_workers_queue = manager.Queue()\n    \n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n    \n        nb_workers = len(chunks)\n    \n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n    \n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n    \n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n    \n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n    \n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n    \n        work_args_list = [\n            (\n                chunk,\n                progress_bars_type,\n                worker_index,\n                master_workers_queue,\n                dilled_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                {\n                    **work_extra,\n                    **{\n                        \"master_workers_queue\": master_workers_queue,\n                        \"show_progress_bars\": show_progress_bars,\n                        \"worker_index\": worker_index,\n                    },\n                },\n            )\n            for worker_index, chunk in enumerate(chunks)\n        ]\n    \n        pool = CONTEXT.Pool(nb_workers)\n        results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n        pool.close()\n    \n        generation = count()\n    \n        while any(\n            (worker_status == WorkerStatus.Running for worker_status in workers_status)\n        ):\n            message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n            worker_index, worker_status, payload = message\n            workers_status[worker_index] = worker_status\n    \n            if worker_status == WorkerStatus.Success:\n                progresses[worker_index] = progresses_length[worker_index]\n                progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Running:\n                progress = cast(int, payload)\n                progresses[worker_index] = progress\n    \n                if next(generation) % nb_workers == 0:\n                    progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Error:\n>               progress_bars.set_error(worker_index)\nE               AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\n\npandarallel/core.py:442: AttributeError\n--------------------------------------------------------- Captured stdout setup ----------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n---------------------------------------------------------- Captured stdout call ----------------------------------------------------------\n   0.00%                                          |        0 /        4 |      \n============================================================ warnings summary ============================================================\ntests/test_pandarallel.py: 16 warnings\n  /home/user/Documents/repoben/buggycode/nalepae_pandarallel/tests/test_pandarallel.py:235: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n    res = df.applymap(func_dataframe_applymap)\n\n-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html\n======================================================== short test summary info =========================================================\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[RuntimeError-False-None] - AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[RuntimeError-False-False] - AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[RuntimeError-True-None] - AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[RuntimeError-True-False] - AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[ZeroDivisionError-False-None] - AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[ZeroDivisionError-False-False] - AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[ZeroDivisionError-True-None] - AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[ZeroDivisionError-True-False] - AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\n=============================================== 8 failed, 209 passed, 16 warnings in 8.60s ===============================================",
  "Patch": "--- a/nalepae_pandarallel/pandarallel/progress_bars.py\n+++ b/nalepae_pandarallel/pandarallel/progress_bars.py\n@@ -30,6 +30,8 @@\n     def update(self, values: List[int]) -> None:\n         ...\n \n+    def set_error(self, index: int) -> None:\n+        pass\n \n \n class ProgressState:\n",
  "BuggyCodeLocation": [
    {
      "file": "nalepae_pandarallel/pandarallel/progress_bars.py",
      "function": null,
      "content_all": {
        "31": "        ...\n",
        "32": "\n",
        "33": "\n",
        "34": "\n"
      },
      "content_change": {}
    }
  ],
  "Issue": {
    "title": "Progress Bars Fail to Indicate Worker Errors",
    "description": "Currently, when workers encounter errors during their tasks, the progress bars do not reflect this status, making it difficult for users to identify which specific worker failed. This lack of feedback can significantly hinder debugging processes and overall user experience. There is a need for a mechanism that clearly indicates errors in the progress bars, providing immediate visual feedback to the user regarding the status of each worker.",
    "explanation": "### Summary of the Issue\n\nThe issue titled **\"Progress Bars Fail to Indicate Worker Errors\"** addresses the problem where progress bars in the application fail to reflect when a worker encounters an error during its task. This lack of feedback complicates debugging and detracts from the overall user experience, as users are not informed which specific worker failed.\n\n### Content of the Commit\n\nThe commit adds a declaration for a new method `set_error` in the `ProgressBars` class. Here is the specific information:\n\n- **Filename:** `nalepae_pandarallel/pandarallel/progress_bars.py`\n- **Patch:** Introduces the method `set_error` in the `ProgressBars` class.\n\nThe method `set_error` is probably intended to manually set a worker's progress bar to an error state.\n\n### Analysis\n\n#### Cause of the Issue\n\nThe core of the issue lies in the lack of a mechanism to reflect worker errors in the progress bars. When a worker encounters an error, the status isn't visually indicated to the user, leaving them unsure whether the progress bar's lack of movement is due to ongoing computation or an underlying error.\n\n#### Solution Description\n\nThe suggested solution includes the following key steps:\n\n1. **Introduce a Method to Flag Errors:** Adding a `set_error` method to the `ProgressBars` class provides a way to manually indicate when a worker encounters an error.\n2. **Update Worker Management Logic:** The logic handling workers' status, likely found within the main processing unit, may now include calls to this `set_error` method when it detects that a worker has errored out.\n3. **Visual Feedback Mechanism:** By incorporating this method, the progress bars can visually show which specific worker encountered an error, improving user feedback and debugging capabilities.\n\n### Detailed Explanation\n\n#### Step-by-Step Breakdown\n\n1. **Understand the User Pain Point:**\n   - Users need to be aware of which workers are failing in real-time.\n   - Currently, all they see is a progress bar that stops moving, which is ambiguous and doesn't clarify if it's due to completion, still processing, or an error.\n\n2. **Implement the Error Indication Method:**\n   - The commit provides the foundation by declaring a `set_error` method in the `ProgressBars` class.\n   - This method will be the hook through which errors can be indicated visually.\n\n3. **Leverage the Error Method:**\n   - Within the worker management code, which likely orchestrates task distribution and progress updates, you'll have logic to determine worker success or failure.\n   - When a worker fails, besides logging or handling the error, it will now also invoke `set_error` on the corresponding progress bar, thereby updating the visual feedback immediately.\n\n4. **Operational Flow:**\n   - A worker starts processing a task.\n   - If it finishes successfully, the progress bar updates normally.\n   - If it encounters an error, the `set_error` method is called, and this change reflects in the UI, immediately indicating to the user which worker had issues.\n\n#### How the Commit Solves the Issue\n\n- **Detection and Reflection:** By allowing the worker management system to call `set_error`, it directly links the detection of an issue with its reflection in the progress bars.\n- **User Clarity:** Users get immediate visual feedback regarding which worker has failed, making it clearer and easier to debug and understand application state.\n- **Enhanced Monitoring:** This additional feature enhances the overall monitoring capability of the application, aligning more closely with user expectations for robust progress tracking.\n\n### Conclusion\n\nTo address the issue where progress bars don't reflect worker errors, the commit primarily adds the `set_error` method as a means to manually flag errors visually. From a developer’s perspective, this change allows for a more dynamic and responsive UI that conveys real-time worker statuses, including errors, enhancing user experience and aiding in quicker troubleshooting."
  },
  "Explain": "### Summary of the Issue\n\nThe issue titled **\"Progress Bars Fail to Indicate Worker Errors\"** addresses the problem where progress bars in the application fail to reflect when a worker encounters an error during its task. This lack of feedback complicates debugging and detracts from the overall user experience, as users are not informed which specific worker failed.\n\n### Content of the Commit\n\nThe commit adds a declaration for a new method `set_error` in the `ProgressBars` class. Here is the specific information:\n\n- **Filename:** `nalepae_pandarallel/pandarallel/progress_bars.py`\n- **Patch:** Introduces the method `set_error` in the `ProgressBars` class.\n\nThe method `set_error` is probably intended to manually set a worker's progress bar to an error state.\n\n### Analysis\n\n#### Cause of the Issue\n\nThe core of the issue lies in the lack of a mechanism to reflect worker errors in the progress bars. When a worker encounters an error, the status isn't visually indicated to the user, leaving them unsure whether the progress bar's lack of movement is due to ongoing computation or an underlying error.\n\n#### Solution Description\n\nThe suggested solution includes the following key steps:\n\n1. **Introduce a Method to Flag Errors:** Adding a `set_error` method to the `ProgressBars` class provides a way to manually indicate when a worker encounters an error.\n2. **Update Worker Management Logic:** The logic handling workers' status, likely found within the main processing unit, may now include calls to this `set_error` method when it detects that a worker has errored out.\n3. **Visual Feedback Mechanism:** By incorporating this method, the progress bars can visually show which specific worker encountered an error, improving user feedback and debugging capabilities.\n\n### Detailed Explanation\n\n#### Step-by-Step Breakdown\n\n1. **Understand the User Pain Point:**\n   - Users need to be aware of which workers are failing in real-time.\n   - Currently, all they see is a progress bar that stops moving, which is ambiguous and doesn't clarify if it's due to completion, still processing, or an error.\n\n2. **Implement the Error Indication Method:**\n   - The commit provides the foundation by declaring a `set_error` method in the `ProgressBars` class.\n   - This method will be the hook through which errors can be indicated visually.\n\n3. **Leverage the Error Method:**\n   - Within the worker management code, which likely orchestrates task distribution and progress updates, you'll have logic to determine worker success or failure.\n   - When a worker fails, besides logging or handling the error, it will now also invoke `set_error` on the corresponding progress bar, thereby updating the visual feedback immediately.\n\n4. **Operational Flow:**\n   - A worker starts processing a task.\n   - If it finishes successfully, the progress bar updates normally.\n   - If it encounters an error, the `set_error` method is called, and this change reflects in the UI, immediately indicating to the user which worker had issues.\n\n#### How the Commit Solves the Issue\n\n- **Detection and Reflection:** By allowing the worker management system to call `set_error`, it directly links the detection of an issue with its reflection in the progress bars.\n- **User Clarity:** Users get immediate visual feedback regarding which worker has failed, making it clearer and easier to debug and understand application state.\n- **Enhanced Monitoring:** This additional feature enhances the overall monitoring capability of the application, aligning more closely with user expectations for robust progress tracking.\n\n### Conclusion\n\nTo address the issue where progress bars don't reflect worker errors, the commit primarily adds the `set_error` method as a means to manually flag errors visually. From a developer’s perspective, this change allows for a more dynamic and responsive UI that conveys real-time worker statuses, including errors, enhancing user experience and aiding in quicker troubleshooting.",
  "Source": "Human",
  "Token": 1139,
  "Command": [
    "pytest tests"
  ],
  "FilteredCode": [
    {
      "path": "nalepae_pandarallel/pandarallel/core.py",
      "content": "1 import multiprocessing\n2 import os\n3 import pickle\n4 from itertools import count\n5 from multiprocessing.managers import SyncManager\n6 from pathlib import Path\n7 from tempfile import NamedTemporaryFile\n8 from typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, cast\n9 \n10 import dill\n11 import pandas as pd\n12 import psutil\n13 from pandas.core.groupby import DataFrameGroupBy as PandaDataFrameGroupBy\n14 from pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\n15 from pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n16 \n17 from .data_types import (\n18     DataFrame,\n19     DataFrameGroupBy,\n20     DataType,\n21     ExpandingGroupBy,\n22     RollingGroupBy,\n23     Series,\n24     SeriesRolling,\n25 )\n26 from .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\n27 from .utils import WorkerStatus\n28 \n29 ON_WINDOWS = os.name == \"nt\"\n30 CONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n31 \n32 # Root of Memory File System\n33 MEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n34 \n35 # By default, Pandarallel use all available CPUs\n36 NB_PHYSICAL_CORES = psutil.cpu_count(logical=False)\n37 \n38 # Prefix and suffix for files used with Memory File System\n39 PREFIX = \"pandarallel\"\n40 PREFIX_INPUT = f\"{PREFIX}_input_\"\n41 PREFIX_OUTPUT = f\"{PREFIX}_output_\"\n42 SUFFIX = \".pickle\"\n43 \n44 # We use these classes decorators pattern instead of the classic one because of this:\n45 # https://www.stevenengelhardt.com/2013/01/16/python-multiprocessing-module-and-closures/\n46 \n47 \n48 class WrapWorkFu(...truncated)"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
      "content": "1 import multiprocessing\n2 import os\n3 import shutil\n4 import sys\n5 from abc import ABC, abstractmethod\n6 from enum import Enum\n7 from itertools import count\n8 from time import time_ns\n9 from typing import Callable, List, Union\n10 \n11 from .utils import WorkerStatus\n12 \n13 INTERVAL_NS = 250_000_000  # 0.25 sec\n14 MINIMUM_TERMINAL_WIDTH = 72\n15 \n16 \n17 class ProgressBarsType(int, Enum):\n18     No = 0\n19     InUserDefinedFunction = 1\n20     InUserDefinedFunctionMultiplyByNumberOfColumns = 2\n21     InWorkFunction = 3\n22 \n23 \n24 class ProgressBars(ABC):\n25     @abstractmethod\n26     def __init__(self, maxs: List[int], show: bool) -> None:\n27         ...\n28 \n29     @abstractmethod\n30     def update(self, values: List[int]) -> None:\n31         ...\n32 \n33 \n34 \n35 class ProgressState:\n36     def __init__(self, chunk_size: int) -> None:\n37         self.last_put_iteration = 0\n38         self.next_put_iteration = max(chunk_size // 100, 1)\n39         self.last_put_time = time_ns()\n40 \n41 \n42 def is_notebook_lab() -> bool:\n43     try:\n44         shell: str = get_ipython().__class__.__name__  # type: ignore\n45 \n46         # Shell: Google Colab\n47         # TerminalInteractiveShell: Terminal running I(...truncated)"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/utils.py",
      "content": "1 import itertools\n2 from enum import Enum\n3 from typing import Any, Dict, List, Tuple\n4 \n5 import pandas as pd\n6 from pandas import DataFrame, Index\n7 \n8 \n9 def chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n10     \"\"\"\n11     Return `nb_chunks` slices of approximatively `nb_item / nb_chunks` each.\n12 \n13     Parameters\n14     ----------\n15     nb_item : int\n16         Total number of items\n17 \n18     nb_chunks : int\n19         Number of chunks to return\n20 \n21     start_offset : int\n22         Shift start of slice by this amount\n23 \n24     Returns\n25     -------\n26     A list of slices\n27 \n28     Examples\n29     --------\n30     >>> chunks = chunk(103, 4)\n31     >>> chunks\n32     [slice(0, 26, None), slice(26, 52, None), slice(52, 78, None), slice(78, 103, None)]\n33     \"\"\"\n34     if nb_item == 0:\n35 (...truncated)"
    },
    {
      "path": "nalepae_pandarallel/tests/test_pandarallel.py",
      "content": "1 import importlib\n2 import math\n3 \n4 import numpy as np\n5 import pandas as pd\n6 import pytest\n7 from pandarallel import pandarallel\n8 \n9 \n10 @pytest.fixture(params=(1000, 1))\n11 def df_size(request):\n12     return request.param\n13 \n14 \n15 @pytest.fixture(params=(False, True))\n16 def progress_bar(request):\n17     return request.param\n18 \n19 \n20 @pytest.fixture(params=(None, False))\n21 def use_memory_fs(request):\n22     return request.param\n23 \n24 \n25 @pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\n26 def exception(request):(...truncated)"
    }
  ],
  "TokenAll": 14225,
  "FilteredLevel": 1500,
  "Results": {
    "model": "GPT-4o",
    "Difficulty": "Easy",
    "issue_origin": {
      "title": "Unspecified Type Hint",
      "description": "In the function is_notebook_lab in the file nalepae_pandarallel/pandarallel/progress_bars.py at line 44, the variable 'shell' is assigned without specifying an import or a type hint for get_ipython. This may cause confusion or errors if get_ipython is not imported or its type is not clear. Please add the appropriate type hint and import statement.",
      "explanation": "### Issue Summary\nThe issue at hand is about the type hinting and the missing import for `get_ipython()` in the `is_notebook_lab` function located in the `nalepae_pandarallel/pandarallel/progress_bars.py` file. The current implementation assigns the result of `get_ipython()` to the `shell` variable without specifying a type hint for `get_ipython()`, nor is there an import statement for `get_ipython()`. This could lead to confusion or errors if `get_ipython` is not imported or its type is not clear.\n\n### Commit Content\nTo resolve this issue, the following changes should be made:\n\n1. **Import statement for `get_ipython`:**\n   An import statement should be added to import `get_ipython` from IPython.\n   \n2. **Type hinting for `shell`:**\n   The line of code where `shell` is assigned should include a proper type hint. \n\nThese changes will look something like this:\n\n```python\n# Add the following import at the top of the progress_bars.py file\nfrom IPython import get_ipython\n\n# Update the is_notebook_lab function in progress_bars.py\ndef is_notebook_lab() -> bool:\n    try:\n        shell: str = get_ipython().__class__.__name__  # type: ignore\n        ...\n```\n\n### Explanation of How the Commit Solves the Issue\n\n1. **Adding the Import Statement:**\n   The original code snippet lacked the import statement for `get_ipython`. Since `get_ipython` is not a built-in Python function but a function provided by the IPython library, it needs to be explicitly imported. By adding `from IPython import get_ipython`, we ensure that `get_ipython` is available in the scope of the `is_notebook_lab` function. This way, we avoid runtime errors related to the undefined function.\n\n2. **Type Hinting:**\n   Type hints in Python help in understanding the expected type of a variable. By specifying `shell: str`, it becomes clear that `shell` is expected to be a string. This helps developers understand the code better and can also aid in static type checking tools. The `__class__.__name__` attribute of `get_ipython()` will always return a string representing the class name, so it is appropriate to hint `shell` as a `str`.\n\n3. **Ensuring Functional Integrity:**\n   The `try-except` block surrounding the `get_ipython` function call indicates that the code is prepared to handle the case where `get_ipython` is not found (perhaps when not running in an IPython environment). Clarifying these details at the outset helps in maintaining functional integrity and preemptively resolves any confusion regarding the return types and the environmental dependencies.\n\n### Summary\nThe commit adds the necessary import statement for `get_ipython` from the IPython library and provides a type hint for the `shell` variable. By doing so, it ensures the code is clear, understandable, and less prone to runtime errors due to undefined functions. These changes improve code clarity and maintainability, making it easier for other developers to understand and work with this part of the codebase."
    },
    "issue_message": {
      "title": "AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'",
      "description": "The code in `pandarallel/core.py` attempts to call a `set_error` method on the `ProgressBarsConsole` object, which does not exist. This results in an `AttributeError` during execution. Please implement the `set_error` method in the `ProgressBarsConsole` class or remove the call to `set_error` if it's not needed.",
      "explanation": "### Issue Summary\n\nThe issue arises when the code in `pandarallel/core.py` tries to invoke a `set_error` method on a `ProgressBarsConsole` object. However, the `ProgressBarsConsole` class does not have a method named `set_error`, leading to an `AttributeError` during the runtime. This error is confirmed by the described stack trace in the specific error message information.\n\n### Content of the Commit\n\nTo resolve this problem, the following commit modifies `nalepae_pandarparallel/pandarparallel/progress_bars.py` by adding the `set_error` method to the `ProgressBarsConsole` class, ensuring that the method exists and can be called without causing an error.\n\n#### Original `ProgressBarsConsole` Class Code Snippet\n\n```python\n24 class ProgressBars(ABC):\n25     @abstractmethod\n26     def __init__(self, maxs: List[int], show: bool) -> None:\n27         ...\n28 \n29     @abstractmethod\n30     def update(self, values: List[int]) -> None:\n31         ...\n```\n\n#### Commit Addition to `ProgressBarsConsole` Class\n \n```python\nclass ProgressBarsConsole(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        self.maxs = maxs\n        self.show = show\n        # Initialize other necessary attributes\n\n    def update(self, values: List[int]) -> None:\n        # Logic to update progress bars\n        pass\n\n    def set_error(self, worker_index: int) -> None:\n        # Logic to handle error for a specific worker\n        print(f\"Worker {worker_index} encountered an error.\", file=sys.stderr)\n```\n\nIn the above code, the `set_error` method is defined to print an error message associated with the `worker_index`. While simple, this method can later be extended to handle more complex error reporting mechanisms.\n\n### Explanation of How the Commit Solves the Issue\n\n#### Cause of the Issue\n\nThe code in `pandarparallel/core.py` relies on the `set_error` method of the `ProgressBarsConsole` class to handle scenarios where a worker process encounters an error. Without this method, any call to `set_error` will result in an `AttributeError`, causing the whole parallel operation to fail. This is evident from the error stack trace displayed, which halts at `progress_bars.set_error(worker_index)`.\n\n#### Solution\n\nThe added `set_error` method to the `ProgressBarsConsole` class eliminates the `AttributeError` by ensuring that the method exists and can be called. This creates a legal method call, thus:\n\n1. **Prevents Runtime Errors**: By ensuring that `set_error` exists, it avoids `AttributeError` and the code's abrupt termination.\n2. **Handles Worker Errors**: The method implementation allows the system to handle workers' errors gracefully; even though it only prints an error message currently, it can be extended further to log errors, retry failed processes, or handle cleanup.\n\n### How the Commit Solves the Issue in Detail\n\nThe commit resolves the problem through the following steps:\n1. **Definition of Method**: The `set_error` method is defined within `ProgressBarsConsole`.\n2. **Error Handling Logic**: The method, while simple, carries out a basic error reporting function. It prints to `stderr`, providing a mechanism to know which worker failed.\n3. **Integration**: By defining this method, existing calls to `set_error` in `pandarparallel/core.py` work as expected, avoiding any unexpected errors.\n\nTo summarize, the commit adds the missing `set_error` method to `ProgressBarsConsole`, thus resolving the AttributeError and allowing the parallel processing to handle worker errors more gracefully."
    },
    "issue_ground": {
      "title": "Progress Bars Fail to Indicate Worker Errors",
      "description": "Currently, when workers encounter errors during their tasks, the progress bars do not reflect this status, making it difficult for users to identify which specific worker failed. This lack of feedback can significantly hinder debugging processes and overall user experience. There is a need for a mechanism that clearly indicates errors in the progress bars, providing immediate visual feedback to the user regarding the status of each worker.",
      "explanation": "### Summary of the Issue\n\nThe issue at hand is that the progress bars within the `nalepae_pandarallel` library fail to indicate when worker processes encounter errors. While the worker's status transitions to an error state, this state change is not reflected visually on the progress bars, making it hard for users to understand which worker failed and hinder their debugging processes.\n\nDuring the test runs, when a user-defined function causes an error (like `ZeroDivisionError`, `RuntimeError`), an `AttributeError` occurs because the `ProgressBarsConsole` object does not have a method named `set_error`, which is expected to be there according to the current handling logic in `core.py`.\n\n### Detailed Analysis of the Commit\n\nFrom the provided code, let's break down specific parts that need modification to fix this issue.\n\n#### Code Analysis\n\n**`nalepae_pandarallel/core.py`:**\n- The error message indicates an issue when `progress_bars.set_error(worker_index)` is called, leading to an `AttributeError`, implying `set_error` doesn't exist in `ProgressBarsConsole`.\n\n**`nalepae_pandarallel/progress_bars.py`:**\n- Here, the `ProgressBars` class is defined, but it doesn't seem to have a `set_error()` method.\n\nTo solve this, new methods indicating errors in progress should be introduced to the relevant classes in `progress_bars.py`.\n\n### Explanation of the Solution\n\n**Step 1: Adding the `set_error` Method in `ProgressBars` Class**\n\nThe `ProgressBars` is an abstract base class (ABC), and `ProgressBarsConsole` and other specific progress bar implementations must implement the `set_error` method.\n\n**Step 2: Updating `ProgressBarsConsole` Implementation**\n\nThe `ProgressBarsConsole` is likely the concrete implementation for console-based progress bars. An appropriate `set_error` method needs to be added here to handle the error state.\n\n**Step 3: Updating the `core.py` Logic**\n\nEnsure that the `ProgressBars` instance handles the `set_error` method gracefully, calling it whenever a worker changes to an error state.\n\n### Commit Content\n\nLet's create a hypothetical commit to address these changes.\n\n1. **Updating `ProgressBars` Class to include `set_error` method:**\n\n```python\n# In nalepae_pandarallel/progress_bars.py\nclass ProgressBars(ABC):\n    @abstractmethod\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        ...\n\n    @abstractmethod\n    def update(self, values: List[int]) -> None:\n        ...\n\n    @abstractmethod\n    def set_error(self, worker_index: int) -> None:\n        ...\n```\n\n2. **Implementing `set_error` Method in `ProgressBarsConsole`:**\n\n```python\nclass ProgressBarsConsole(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        self.progresses = [0] * len(maxs)\n        self.maxs = maxs\n        self.show = show\n        self.errors = [False] * len(maxs)\n        if show:\n            self._draw_initial()\n\n    def set_error(self, worker_index: int) -> None:\n        self.errors[worker_index] = True\n        self._draw()\n\n    def _draw(self) -> None:\n        # Implementation for drawing the progress bar,\n        # showing errors in red, for instance.\n        pass\n```\n\n3. **Using the `set_error` Method in `core.py`:**\n\nEnsure it is properly called when a worker encounters an error.\n\n```python\n# In nalepae_pandarallel/core.py\n\nwhile any(\n    (worker_status == WorkerStatus.Running for worker_status in workers_status)\n):\n    message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n    worker_index, worker_status, payload = message\n    workers_status[worker_index] = worker_status\n\n    if worker_status == WorkerStatus.Success:\n        progresses[worker_index] = progresses_length[worker_index]\n        progress_bars.update(progresses)\n    elif worker_status == WorkerStatus.Running:\n        progress = cast(int, payload)\n        progresses[worker_index] = progress\n\n        if next(generation) % nb_workers == 0:\n            progress_bars.update(progresses)\n    elif worker_status == WorkerStatus.Error:\n        progress_bars.set_error(worker_index)  # Ensure this line works now with the set_error method.\n```\n\n### How the Commit Solves the Issue\n\nBy implementing the `set_error` method in the base `ProgressBars` class and its concrete implementation `ProgressBarsConsole`, the system now has a way of visually indicating which worker encountered an error. The `set_error` method updates the internal state of the progress bars to reflect errors, enhancing the feedback provided to the users. This immediate visual feedback makes it easier for users to identify and debug issues associated with specific workers, thus"
    },
    "issue_ground_truth": {
      "title": "Progress Bars Fail to Indicate Worker Errors",
      "description": "Currently, when workers encounter errors during their tasks, the progress bars do not reflect this status, making it difficult for users to identify which specific worker failed. This lack of feedback can significantly hinder debugging processes and overall user experience. There is a need for a mechanism that clearly indicates errors in the progress bars, providing immediate visual feedback to the user regarding the status of each worker.",
      "explanation": "### Summary of the Issue\n\nThe issue titled **\"Progress Bars Fail to Indicate Worker Errors\"** addresses the problem where progress bars in the application fail to reflect when a worker encounters an error during its task. This lack of feedback complicates debugging and detracts from the overall user experience, as users are not informed which specific worker failed.\n\n### Content of the Commit\n\nThe commit adds a declaration for a new method `set_error` in the `ProgressBars` class. Here is the specific information:\n\n- **Filename:** `nalepae_pandarallel/pandarallel/progress_bars.py`\n- **Patch:** Introduces the method `set_error` in the `ProgressBars` class.\n\nThe method `set_error` is probably intended to manually set a worker's progress bar to an error state.\n\n### Analysis\n\n#### Cause of the Issue\n\nThe core of the issue lies in the lack of a mechanism to reflect worker errors in the progress bars. When a worker encounters an error, the status isn't visually indicated to the user, leaving them unsure whether the progress bar's lack of movement is due to ongoing computation or an underlying error.\n\n#### Solution Description\n\nThe suggested solution includes the following key steps:\n\n1. **Introduce a Method to Flag Errors:** Adding a `set_error` method to the `ProgressBars` class provides a way to manually indicate when a worker encounters an error.\n2. **Update Worker Management Logic:** The logic handling workers' status, likely found within the main processing unit, may now include calls to this `set_error` method when it detects that a worker has errored out.\n3. **Visual Feedback Mechanism:** By incorporating this method, the progress bars can visually show which specific worker encountered an error, improving user feedback and debugging capabilities.\n\n### Detailed Explanation\n\n#### Step-by-Step Breakdown\n\n1. **Understand the User Pain Point:**\n   - Users need to be aware of which workers are failing in real-time.\n   - Currently, all they see is a progress bar that stops moving, which is ambiguous and doesn't clarify if it's due to completion, still processing, or an error.\n\n2. **Implement the Error Indication Method:**\n   - The commit provides the foundation by declaring a `set_error` method in the `ProgressBars` class.\n   - This method will be the hook through which errors can be indicated visually.\n\n3. **Leverage the Error Method:**\n   - Within the worker management code, which likely orchestrates task distribution and progress updates, you'll have logic to determine worker success or failure.\n   - When a worker fails, besides logging or handling the error, it will now also invoke `set_error` on the corresponding progress bar, thereby updating the visual feedback immediately.\n\n4. **Operational Flow:**\n   - A worker starts processing a task.\n   - If it finishes successfully, the progress bar updates normally.\n   - If it encounters an error, the `set_error` method is called, and this change reflects in the UI, immediately indicating to the user which worker had issues.\n\n#### How the Commit Solves the Issue\n\n- **Detection and Reflection:** By allowing the worker management system to call `set_error`, it directly links the detection of an issue with its reflection in the progress bars.\n- **User Clarity:** Users get immediate visual feedback regarding which worker has failed, making it clearer and easier to debug and understand application state.\n- **Enhanced Monitoring:** This additional feature enhances the overall monitoring capability of the application, aligning more closely with user expectations for robust progress tracking.\n\n### Conclusion\n\nTo address the issue where progress bars don't reflect worker errors, the commit primarily adds the `set_error` method as a means to manually flag errors visually. From a developer’s perspective, this change allows for a more dynamic and responsive UI that conveys real-time worker statuses, including errors, enhancing user experience and aiding in quicker troubleshooting."
    },
    "location_origin": [
      {
        "file": "nalepae_pandarallel/pandarallel/progress_bars.py",
        "function": {
          "42": "is_notebook_lab"
        },
        "content_all": {
          "39": "        self.last_put_time = time_ns()\n",
          "40": "\n",
          "41": "\n",
          "42": "def is_notebook_lab() -> bool:\n",
          "43": "    try:\n",
          "44": "        shell: str = get_ipython().__class__.__name__  # type: ignore\n",
          "45": "\n",
          "46": "        # Shell: Google Colab\n",
          "47": "        # TerminalInteractiveShell: Terminal running IPython\n",
          "48": "        # ZMQInteractiveShell: Jupyter notebook or qtconsole\n"
        },
        "content_change": {
          "1": "from IPython import get_ipython\n",
          "44": "        shell: str = get_ipython().__class__.__name__  # type: ignore\n"
        }
      }
    ],
    "location_message": [
      {
        "file": "nalepae_pandarparallel/pandarparallel/core.py",
        "function": {
          "442": "closure"
        },
        "content_all": {
          "439": "                elif worker_status == WorkerStatus.Error:\n",
          "440": "                    progress_bars.set_error(worker_index)\n",
          "441": "\n",
          "442": "                wrapped_work_function = WrapWorkFunctionForPipe(data_type.work)\n"
        },
        "content_change": {
          "440": "                    progress_bars.set_error(worker_index)\n"
        }
      },
      {
        "file": "nalepae_pandarparallel/pandarparallel/progress_bars.py",
        "function": {
          "34": "ProgressBarsConsole"
        },
        "content_all": {
          "30": "    @abstractmethod\n",
          "31": "    def update(self, values: List[int]) -> None:\n",
          "32": "        ...\n",
          "33": "\n",
          "34": "class ProgressBarsConsole(ProgressBars):\n",
          "35": "    def __init__(self, maxs: List[int], show: bool) -> None:\n",
          "36": "        self.maxs = maxs\n",
          "37": "        self.show = show\n",
          "38": "        # Initialize other necessary attributes\n",
          "39": "\n",
          "40": "    def update(self, values: List[int]) -> None:\n",
          "41": "        # Logic to update progress bars\n",
          "42": "        pass\n",
          "43": "\n",
          "44": "    def set_error(self, worker_index: int) -> None:\n",
          "45": "        # Logic to handle error for a specific worker\n",
          "46": "        print(f\"Worker {worker_index} encountered an error.\", file=sys.stderr)\n"
        },
        "content_change": {
          "34": "class ProgressBarsConsole(ProgressBars):\n",
          "44": "    def set_error(self, worker_index: int) -> None:\n",
          "45": "        # Logic to handle error for a specific worker\n",
          "46": "        print(f\"Worker {worker_index} encountered an error.\", file=sys.stderr)\n"
        }
      }
    ],
    "location_ground": [
      {
        "file": "nalepae_pandarallel/pandarallel/core.py",
        "function": {
          "1": "main_code"
        },
        "content_all": {
          "29": "ON_WINDOWS = os.name == \"nt\"\n",
          "30": "CONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n",
          "31": "\n",
          "32": "# Root of Memory File System\n",
          "33": "MEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n",
          "34": "\n",
          "35": "# By default, Pandarallel use all available CPUs\n",
          "36": "NB_PHYSICAL_CORES = psutil.cpu_count(logical=False)\n",
          "37": "\n",
          "38": "# Prefix and suffix for files used with Memory File System\n",
          "39": "PREFIX = \"pandarallel\"\n",
          "40": "PREFIX_INPUT = f\"{PREFIX}_input_\"\n",
          "41": "PREFIX_OUTPUT = f\"{PREFIX}_output_\"\n",
          "42": "SUFFIX = \".pickle\"\n",
          "43": "\n",
          "44": "# We use these classes decorators pattern instead of the classic one because of this:\n",
          "45": "# https://www.stevenengelhardt.com/2013/01/16/python-multiprocessing-module-and-closures/\n",
          "46": "\n",
          "47": "class WrapWorkFunction(...):\n"
        },
        "content_change": {
          "1": "Ensure the `ProgressBars` instance handles the `set_error` method gracefully in the `main_code` function."
        }
      },
      {
        "file": "nalepae_pandarallel/pandarallel/progress_bars.py",
        "function": {
          "24": "ProgressBars"
        },
        "content_all": {
          "23": "\n",
          "24": "class ProgressBars(ABC):\n",
          "25": "    @abstractmethod\n",
          "26": "    def __init__(self, maxs: List[int], show: bool) -> None:\n",
          "27": "        ...\n",
          "28": "\n",
          "29": "    @abstractmethod\n",
          "30": "    def update(self, values: List[int]) -> None:\n",
          "31": "        ...\n",
          "32": "\n",
          "33": "    @abstractmethod\n",
          "34": "    def set_error(self, worker_index: int) -> None:\n",
          "35": "        ...\n",
          "36": "\n"
        },
        "content_change": {
          "34": "    def set_error(self, worker_index: int) -> None:\n"
        }
      },
      {
        "file": "nalepae_pandarallel/pandarallel/progress_bars.py",
        "function": {
          "37": "ProgressBarsConsole"
        },
        "content_all": {
          "36": "\n",
          "37": "class ProgressBarsConsole(ProgressBars):\n",
          "38": "    def __init__(self, maxs: List[int], show: bool) -> None:\n",
          "39": "        self.progresses = [0] * len(maxs)\n",
          "40": "        self.maxs = maxs\n",
          "41": "        self.show = show\n",
          "42": "        self.errors = [False] * len(maxs)\n",
          "43": "        if show:\n",
          "44": "            self._draw_initial()\n",
          "45": "\n",
          "46": "    def set_error(self, worker_index: int) -> None:\n",
          "47": "        self.errors[worker_index] = True\n",
          "48": "        self._draw()\n",
          "49": "\n",
          "50": "    def _draw(self) -> None:\n",
          "51": "        # Implementation for drawing the progress bar,\n",
          "52": "        # showing errors in red, for instance.\n",
          "53": "        pass\n",
          "54": "\n"
        },
        "content_change": {
          "46": "    def set_error(self, worker_index: int) -> None:\n"
        }
      }
    ],
    "location_ground_exp": [
      {
        "file": "nalepae_pandarallel/pandarallel/progress_bars.py",
        "function": {
          "24": "ProgressBars"
        },
        "content_all": {
          "24": "class ProgressBars(ABC):\n",
          "25": "    @abstractmethod\n",
          "26": "    def __init__(self, maxs: List[int], show: bool) -> None:\n",
          "27": "        ...\n",
          "28": "\n",
          "29": "    @abstractmethod\n",
          "30": "    def update(self, values: List[int]) -> None:\n",
          "31": "        ...\n",
          "32": "\n",
          "33": "    def set_error(self, worker_id: int) -> None:\n",
          "34": "        pass\n"
        },
        "content_change": {
          "33": "    def set_error(self, worker_id: int) -> None:\n",
          "34": "        pass\n"
        }
      },
      {
        "file": "nalepae_pandarallel/pandarallel/core.py",
        "function": {
          "27": "WorkerStatus"
        },
        "content_all": {
          "26": "from .utils import WorkerStatus\n",
          "27": "WorkerStatus.Error = 3\n",
          "28": "\n",
          "29": "def worker_function(...):\n",
          "30": "    try:\n",
          "31": "        # Worker logic\n",
          "32": "    except Exception as e:\n",
          "33": "        progress_bars.set_error(worker_id)\n",
          "34": "        WorkerStatus.Error\n"
        },
        "content_change": {
          "27": "WorkerStatus.Error = 3\n",
          "32": "    except Exception as e:\n",
          "33": "        progress_bars.set_error(worker_id)\n",
          "34": "        WorkerStatus.Error\n"
        }
      }
    ],
    "location_ground_truth": [
      {
        "file": "nalepae_pandarallel/pandarallel/progress_bars.py",
        "function": null,
        "content_all": {
          "31": "        ...\n",
          "32": "\n",
          "33": "\n",
          "34": "\n"
        },
        "content_change": {}
      }
    ],
    "patch_i": "\n--- a/nalepae_pandarallel/pandarallel/progress_bars.py\n+++ b/nalepae_pandarallel/pandarallel/progress_bars.py\n@@ -10,6 +10,7 @@\n \n from .utils import WorkerStatus\n \n+from IPython import get_ipython\n INTERVAL_NS = 250_000_000  # 0.25 sec\n MINIMUM_TERMINAL_WIDTH = 72\n \n@@ -42,7 +43,7 @@ def is_notebook_lab() -> bool:\n     try:\n-        shell = get_ipython().__class__.__name__ # type: ignore\n+        shell: str = get_ipython().__class__.__name__\n \n         # Shell: Google Colab\n         # TerminalInteractiveShell: Terminal running I\n",
    "patch_im": "\n--- a/nalepae_pandarparallel/pandarparallel/progress_bars.py\n+++ b/nalepae_pandarparallel/pandarparallel/progress_bars.py\n@@ -42,7 +42,14 @@ class ProgressState:\n         self.next_put_iteration = max(chunk_size // 100, 1)\n         self.last_put_time = time_ns()\n \n+class ProgressBarsConsole(ProgressBars):\n+    def __init__(self, maxs: List[int], show: bool) -> None:\n+        self.maxs = maxs\n+        self.show = show\n+        # Initialize other necessary attributes\n+\n+    def update(self, values: List[int]) -> None:\n+        # Logic to update progress bars\n+        pass\n \n+    def set_error(self, worker_index: int) -> None:\n+        # Logic to handle error for a specific worker\n+        print(f\"Worker {worker_index} encountered an error.\", file=sys.stderr)\n+\n def is_notebook_lab() -> bool:\n     try:\n         shell: str = get_ipython().__class__.__name__  # type: ignore\n",
    "patch_il": "\n--- a/nalepae_pandarallel/pandarallel/progress_bars.py\n+++ b/nalepae_pandarallel/pandarallel/progress_bars.py\n@@ -9,6 +9,7 @@\n from typing import Callable, List, Union\n \n from .utils import WorkerStatus\n+from IPython import get_ipython\n \n INTERVAL_NS = 250_000_000  # 0.25 sec\n MINIMUM_TERMINAL_WIDTH = 72\n",
    "patch_iml": "\n--- a/nalepae_pandarparallel/pandarparallel/progress_bars.py\n+++ b/nalepae_pandarparallel/pandarparallel/progress_bars.py\n@@ -33,6 +33,11 @@\n class ProgressBarsConsole(ProgressBars):\n     def __init__(self, maxs: List[int], show: bool) -> None:\n         self.maxs = maxs\n         self.show = show\n         # Initialize other necessary attributes\n \n     def update(self, values: List[int]) -> None:\n         # Logic to update progress bars\n         pass\n \n+    def set_error(self, worker_index: int) -> None:\n+        # Logic to handle error for a specific worker\n+        print(f\"Worker {worker_index} encountered an error.\", file=sys.stderr)\n",
    "patch_ground": "\n--- a/nalepae_pandarparallel/pandarparallel/progress_bars.py\n+++ b/nalepae_pandarparallel/pandarparallel/progress_bars.py\n@@ -26,6 +26,10 @@\n         ...\n \n     @abstractmethod\n+    def set_error(self, worker_index: int) -> None:\n+        ...\n+\n+    @abstractmethod\n     def update(self, values: List[int]) -> None:\n         ...\n \n@@ -56,6 +60,14 @@\n             self._draw_initial()\n \n     def update(self, values: List[int]) -> None:\n+        pass  # Implementation of update\n+\n+    def set_error(self, worker_index: int) -> None:\n+        self.errors[worker_index] = True\n+        self._draw()\n+\n+    def _draw(self) -> None:\n+        pass  # Implementation for drawing the progress bar, showing errors in red, for instance.\n         ...\n \n class ProgressState:\n",
    "patch_ground_location": "\n--- a/nalepae_pandarallel/pandarallel/progress_bars.py\n+++ b/nalepae_pandarallel/pandarallel/progress_bars.py\n@@ -24,6 +24,8 @@ class ProgressBars(ABC):\n     @abstractmethod\n     def update(self, values: List[int]) -> None:\n         ...\n+    \n+    @abstractmethod\n     def set_error(self, worker_index: int) -> None:\n         ...\n \n@@ -36,6 +38,18 @@ class ProgressBarsConsole(ProgressBars):\n         if show:\n             self._draw_initial()\n \n+    def set_error(self, worker_index: int) -> None:\n+        self.errors[worker_index] = True\n+        self._draw()\n+\n+    def _draw(self) -> None:\n+        # Implementation for drawing the progress bar,\n+        # showing errors in red, for instance.\n+        for i, (progress, max_val, error) in enumerate(zip(self.progresses, self.maxs, self.errors)):\n+            if error:\n+                print(f\"Worker {i} encountered an error\", file=sys.stderr)\n+            else:\n+                print(f\"Worker {i}: {progress}/{max_val}\")\n         pass\n \n--- a/nalepae_pandarallel/pandarallel/core.py\n+++ b/nalepae_pandarallel/pandarallel/core.py\n@@ -94,6 +94,7 @@ while any(\n ):\n     message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n     worker_index, worker_status, payload = message\n+    \n     workers_status[worker_index] = worker_status\n \n     if worker_status == WorkerStatus.Success:\n         progresses[worker_index] = progresses_length[worker_index]\n@@ -106,6 +107,7 @@ while any(\n         progress = cast(int, payload)\n         progresses[worker_index] = progress\n \n         if next(generation) % nb_workers == 0:\n             progress_bars.update(progresses)\n     elif worker_status == WorkerStatus.Error:\n+        progress_bars.set_error(worker_index)  # Ensure this line works now with the set_error method.\n",
    "patch_ground_exp": "\n--- a/nalepae_pandarallel/pandarallel/progress_bars.py\n+++ b/nalepae_pandarallel/pandarallel/progress_bars.py\n@@ -30,6 +30,8 @@\n         ...\n \n     def set_error(self, worker_id: int) -> None:\n-        pass\n+        print(f\"Worker {worker_id} encountered an error.\")  # Placeholder for actual error handling\n \n \n--- a/nalepae_pandarallel/pandarallel/core.py\n+++ b/nalepae_pandarallel/pandarallel/core.py\n@@ -26,6 +26,7 @@\n from .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\n from .utils import WorkerStatus\n \n+WorkerStatus.Error = 3  # Adding Error Status to WorkerStatus Enum\n \n ON_WINDOWS = os.name == \"nt\"\n CONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n@@ -180,6 +181,8 @@\n             # Worker logic\n             ...\n     except Exception as e:\n+        worker_id = os.getpid()  # Assuming worker_id is process ID in this context\n+        progress_bars.set_error(worker_id)\n         WorkerStatus.Error\n         raise  # Re-raise the exception after setting error status\n \n--- a/nalepae_pandarallel/pandarallel/utils.py\n+++ b/nalepae_pandarallel/pandarallel/utils.py\n@@ -3,6 +3,7 @@\n from typing import Any, Dict, List, Tuple\n \n import pandas as pd\n+from enum import IntEnum\n from pandas import DataFrame, Index\n \n \n@@ -10,6 +11,11 @@\n class WorkerStatus(Enum):\n     Working = 1\n     Completed = 2\n+    Error = 3\n \n \n # Util functions\n def chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n",
    "patch_ground_all": "\n--- a/nalepae_pandarallel/pandarallel/progress_bars.py\n+++ b/nalepae_pandarallel/pandarallel/progress_bars.py\n@@ -31,6 +31,10 @@ class ProgressBars(ABC):\n         ...\n \n     @abstractmethod\n+    def set_error(self, worker_id: int) -> None:\n+        ...\n+\n+    @abstractmethod\n     def update(self, values: List[int]) -> None:\n         ...\n \n@@ -49,6 +53,8 @@ class ProgressBars(ABC):\n class SimpleProgressBars(ProgressBars):\n     def __init__(self, maxs: List[int], show: bool = True) -> None:\n         self.maxs = maxs\n         self.show = show\n+        self.errors = [False] * len(maxs)  # Track errors for each worker\n \n     def set_error(self, worker_id: int) -> None:\n         self.errors[worker_id] = True  # Flag the error for the specific worker\n \n     def update(self, values: List[int]) -> None:\n         if not self.show:\n             return\n         for i, value in enumerate(values):\n             progress = min(100, 100 * value // self.maxs[i])\n+            if self.errors[i]:\n+                print(f\"Worker {i} encountered an error!\", file=sys.stderr)\n+            else:\n                 print(f\"Worker {i}: {progress}% complete\", file=sys.stderr)\n",
    "patch_ground_truth": "--- a/nalepae_pandarallel/pandarallel/progress_bars.py\n+++ b/nalepae_pandarallel/pandarallel/progress_bars.py\n@@ -30,6 +30,8 @@\n     def update(self, values: List[int]) -> None:\n         ...\n \n+    def set_error(self, index: int) -> None:\n+        pass\n \n \n class ProgressState:\n",
    "message": "--------------------------------------------------------- Captured stdout setup ----------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n---------------------------------------------------------- Captured stdout call ----------------------------------------------------------\n   0.00%                                          |        0 /        4 |      \n_____________________________________ test_dataframe_apply_invalid_function[RuntimeError-True-False] _____________________________________\n\npandarallel_init = None, exception = <class 'RuntimeError'>\n\n    def test_dataframe_apply_invalid_function(pandarallel_init, exception):\n        def f(_):\n            raise exception\n    \n        df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n    \n        with pytest.raises(exception):\n>           df.parallel_apply(f)\n\ntests/test_pandarallel.py:174: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndata =    a\n0  1\n1  2\n2  3\n3  4, user_defined_function = <function test_dataframe_apply_invalid_function.<locals>.f at 0x7097b4496dc0>\nuser_defined_function_args = (), user_defined_function_kwargs = {}\nwrapped_work_function = <pandarallel.core.WrapWorkFunctionForPipe object at 0x7097b5e72ca0>\nmanager = <multiprocessing.managers.SyncManager object at 0x7097b5de2a90>, chunks = [   a\n0  1\n1  2\n2  3\n3  4], nb_workers = 1\nprogresses_length = [4], reduce_extra = {'axis': 0}\nprogress_bars = <pandarallel.progress_bars.ProgressBarsConsole object at 0x7097b5dd78e0>, progresses = [0]\nworkers_status = [<WorkerStatus.Error: 2>]\nwork_args_list = [(   a\n0  1\n1  2\n2  3\n3  4, <ProgressBarsType.InUserDefinedFunction: 1>, 0, <AutoProxy[Queue] object, typeid 'Queue' a...x13\\x8c\\rcell_contents\\x94h\\x00\\x8c\\n_load_type\\x94\\x93\\x94\\x8c\\x0cRuntimeError\\x94\\x85\\x94R\\x94\\x87\\x94R0.', (), ...)]\n\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForPipe(data_type.work)\n        dilled_user_defined_function = dill.dumps(user_defined_function)\n        manager: SyncManager = CONTEXT.Manager()\n        master_workers_queue = manager.Queue()\n    \n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n    \n        nb_workers = len(chunks)\n    \n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n    \n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n    \n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n    \n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n    \n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n    \n        work_args_list = [\n            (\n                chunk,\n                progress_bars_type,\n                worker_index,\n                master_workers_queue,\n                dilled_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                {\n                    **work_extra,\n                    **{\n                        \"master_workers_queue\": master_workers_queue,\n                        \"show_progress_bars\": show_progress_bars,\n                        \"worker_index\": worker_index,\n                    },\n                },\n            )\n            for worker_index, chunk in enumerate(chunks)\n        ]\n    \n        pool = CONTEXT.Pool(nb_workers)\n        results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n        pool.close()\n    \n        generation = count()\n    \n        while any(\n            (worker_status == WorkerStatus.Running for worker_status in workers_status)\n        ):\n            message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n            worker_index, worker_status, payload = message\n            workers_status[worker_index] = worker_status\n    \n            if worker_status == WorkerStatus.Success:\n                progresses[worker_index] = progresses_length[worker_index]\n                progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Running:\n                progress = cast(int, payload)\n                progresses[worker_index] = progress\n    \n                if next(generation) % nb_workers == 0:\n                    progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Error:\n>               progress_bars.set_error(worker_index)\nE               AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\n\npandarallel/core.py:442: AttributeError\n--------------------------------------------------------- Captured stdout setup ----------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n---------------------------------------------------------- Captured stdout call ----------------------------------------------------------\n   0.00%                                          |        0 /        4 |      \n__________________________________ test_dataframe_apply_invalid_function[ZeroDivisionError-False-None] ___________________________________\n\npandarallel_init = None, exception = <class 'ZeroDivisionError'>\n\n    def test_dataframe_apply_invalid_function(pandarallel_init, exception):\n        def f(_):\n            raise exception\n    \n        df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n    \n        with pytest.raises(exception):\n>           df.parallel_apply(f)\n\ntests/test_pandarallel.py:174: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndata =    a\n0  1\n1  2\n2  3\n3  4, user_defined_function = <function test_dataframe_apply_invalid_function.<locals>.f at 0x7097b5582dc0>\nuser_defined_function_args = (), user_defined_function_kwargs = {}\nwrapped_work_function = <pandarallel.core.WrapWorkFunctionForFileSystem object at 0x7097b44afb80>\nwrapped_reduce_function = <function wrap_reduce_function_for_file_system.<locals>.closure at 0x7097b5e01670>\nchunks = [   a\n0  1\n1  2\n2  3\n3  4], nb_workers = 1, progresses_length = [4], reduce_extra = {'axis': 0}\nprogress_bars = <pandarallel.progress_bars.ProgressBarsConsole object at 0x7097b44af580>, progresses = [0]\nworkers_status = [<WorkerStatus.Error: 2>], input_files = [<tempfile._TemporaryFileWrapper object at 0x7097b44affa0>]\n\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForFileSystem(data_type.work)\n        wrapped_reduce_function = wrap_reduce_function_for_file_system(data_type.reduce)\n    \n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n    \n        nb_workers = len(chunks)\n    \n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n    \n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n    \n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n    \n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n    \n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n    \n        input_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_INPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n    \n        output_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_OUTPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n    \n        try:\n            for chunk, input_file in zip(chunks, input_files):\n                with Path(input_file.name).open(\"wb\") as file_descriptor:\n                    pickle.dump(chunk, file_descriptor)\n    \n            dilled_user_defined_function = dill.dumps(user_defined_function)\n            manager: SyncManager = CONTEXT.Manager()\n            master_workers_queue = manager.Queue()\n    \n            work_args_list = [\n                (\n                    Path(input_file.name),\n                    Path(output_file.name),\n                    progress_bars_type,\n                    worker_index,\n                    master_workers_queue,\n                    dilled_user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                    {\n                        **work_extra,\n                        **{\n                            \"master_workers_queue\": master_workers_queue,\n                            \"show_progress_bars\": show_progress_bars,\n                            \"worker_index\": worker_index,\n                        },\n                    },\n                )\n                for worker_index, (\n                    input_file,\n                    output_file,\n                ) in enumerate(zip(input_files, output_files))\n            ]\n    \n            pool = CONTEXT.Pool(nb_workers)\n            results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n    \n            pool.close()\n    \n            generation = count()\n    \n            while any(\n                (\n                    worker_status == WorkerStatus.Running\n                    for worker_status in workers_status\n                )\n            ):\n                message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n                worker_index, worker_status, payload = message\n                workers_status[worker_index] = worker_status\n    \n                if worker_status == WorkerStatus.Success:\n                    progresses[worker_index] = progresses_length[worker_index]\n                    progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Running:\n                    progress = cast(int, payload)\n                    progresses[worker_index] = progress\n    \n                    if next(generation) % nb_workers == 0:\n                        progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Error:\n>                   progress_bars.set_error(worker_index)\nE                   AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\n\npandarallel/core.py:321: AttributeError\n--------------------------------------------------------- Captured stdout setup ----------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n__________________________________ test_dataframe_apply_invalid_function[ZeroDivisionError-False-False] __________________________________\n\npandarallel_init = None, exception = <class 'ZeroDivisionError'>\n\n    def test_dataframe_apply_invalid_function(pandarallel_init, exception):\n        def f(_):\n            raise exception\n    \n        df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n    \n        with pytest.raises(exception):\n>           df.parallel_apply(f)\n\ntests/test_pandarallel.py:174: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndata =    a\n0  1\n1  2\n2  3\n3  4, user_defined_function = <function test_dataframe_apply_invalid_function.<locals>.f at 0x7097b5582a60>\nuser_defined_function_args = (), user_defined_function_kwargs = {}\nwrapped_work_function = <pandarallel.core.WrapWorkFunctionForPipe object at 0x7097b44da1f0>\nmanager = <multiprocessing.managers.SyncManager object at 0x7097b44da430>, chunks = [   a\n0  1\n1  2\n2  3\n3  4], nb_workers = 1\nprogresses_length = [4], reduce_extra = {'axis': 0}\nprogress_bars = <pandarallel.progress_bars.ProgressBarsConsole object at 0x7097b44da280>, progresses = [0]\nworkers_status = [<WorkerStatus.Error: 2>]\nwork_args_list = [(   a\n0  1\n1  2\n2  3\n3  4, <ProgressBarsType.No: 0>, 0, <AutoProxy[Queue] object, typeid 'Queue' at 0x7097b44da340>, ...8c\\rcell_contents\\x94h\\x00\\x8c\\n_load_type\\x94\\x93\\x94\\x8c\\x11ZeroDivisionError\\x94\\x85\\x94R\\x94\\x87\\x94R0.', (), ...)]\n\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForPipe(data_type.work)\n        dilled_user_defined_function = dill.dumps(user_defined_function)\n        manager: SyncManager = CONTEXT.Manager()\n        master_workers_queue = manager.Queue()\n    \n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n    \n        nb_workers = len(chunks)\n    \n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n    \n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n    \n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n    \n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n    \n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n    \n        work_args_list = [\n            (\n                chunk,\n                progress_bars_type,\n                worker_index,\n                master_workers_queue,\n                dilled_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                {\n                    **work_extra,\n                    **{\n                        \"master_workers_queue\": master_workers_queue,\n                        \"show_progress_bars\": show_progress_bars,\n                        \"worker_index\": worker_index,\n                    },\n                },\n            )\n            for worker_index, chunk in enumerate(chunks)\n        ]\n    \n        pool = CONTEXT.Pool(nb_workers)\n        results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n        pool.close()\n    \n        generation = count()\n    \n        while any(\n            (worker_status == WorkerStatus.Running for worker_status in workers_status)\n        ):\n            message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n            worker_index, worker_status, payload = message\n            workers_status[worker_index] = worker_status\n    \n            if worker_status == WorkerStatus.Success:\n                progresses[worker_index] = progresses_length[worker_index]\n                progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Running:\n                progress = cast(int, payload)\n                progresses[worker_index] = progress\n    \n                if next(generation) % nb_workers == 0:\n                    progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Error:\n>               progress_bars.set_error(worker_index)\nE               AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\n\npandarallel/core.py:442: AttributeError\n--------------------------------------------------------- Captured stdout setup ----------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n___________________________________ test_dataframe_apply_invalid_function[ZeroDivisionError-True-None] ___________________________________\n\npandarallel_init = None, exception = <class 'ZeroDivisionError'>\n\n    def test_dataframe_apply_invalid_function(pandarallel_init, exception):\n        def f(_):\n            raise exception\n    \n        df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n    \n        with pytest.raises(exception):\n>           df.parallel_apply(f)\n\ntests/test_pandarallel.py:174: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndata =    a\n0  1\n1  2\n2  3\n3  4, user_defined_function = <function test_dataframe_apply_invalid_function.<locals>.f at 0x7097b557d160>\nuser_defined_function_args = (), user_defined_function_kwargs = {}\nwrapped_work_function = <pandarallel.core.WrapWorkFunctionForFileSystem object at 0x7097b44d33d0>\nwrapped_reduce_function = <function wrap_reduce_function_for_file_system.<locals>.closure at 0x7097b44be550>\nchunks = [   a\n0  1\n1  2\n2  3\n3  4], nb_workers = 1, progresses_length = [4], reduce_extra = {'axis': 0}\nprogress_bars = <pandarallel.progress_bars.ProgressBarsConsole object at 0x7097b44d3250>, progresses = [0]\nworkers_status = [<WorkerStatus.Error: 2>], input_files = [<tempfile._TemporaryFileWrapper object at 0x7097b44d3c70>]\n\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForFileSystem(data_type.work)\n        wrapped_reduce_function = wrap_reduce_function_for_file_system(data_type.reduce)\n    \n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n    \n        nb_workers = len(chunks)\n    \n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n    \n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n    \n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n    \n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n    \n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n    \n        input_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_INPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n    \n        output_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_OUTPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n    \n        try:\n            for chunk, input_file in zip(chunks, input_files):\n                with Path(input_file.name).open(\"wb\") as file_descriptor:\n                    pickle.dump(chunk, file_descriptor)\n    \n            dilled_user_defined_function = dill.dumps(user_defined_function)\n            manager: SyncManager = CONTEXT.Manager()\n            master_workers_queue = manager.Queue()\n    \n            work_args_list = [\n                (\n                    Path(input_file.name),\n                    Path(output_file.name),\n                    progress_bars_type,\n                    worker_index,\n                    master_workers_queue,\n                    dilled_user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                    {\n                        **work_extra,\n                        **{\n                            \"master_workers_queue\": master_workers_queue,\n                            \"show_progress_bars\": show_progress_bars,\n                            \"worker_index\": worker_index,\n                        },\n                    },\n                )\n                for worker_index, (\n                    input_file,\n                    output_file,\n                ) in enumerate(zip(input_files, output_files))\n            ]\n    \n            pool = CONTEXT.Pool(nb_workers)\n            results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n    \n            pool.close()\n    \n            generation = count()\n    \n            while any(\n                (\n                    worker_status == WorkerStatus.Running\n                    for worker_status in workers_status\n                )\n            ):\n                message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n                worker_index, worker_status, payload = message\n                workers_status[worker_index] = worker_status\n    \n                if worker_status == WorkerStatus.Success:\n                    progresses[worker_index] = progresses_length[worker_index]\n                    progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Running:\n                    progress = cast(int, payload)\n                    progresses[worker_index] = progress\n    \n                    if next(generation) % nb_workers == 0:\n                        progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Error:\n>                   progress_bars.set_error(worker_index)\nE                   AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\n\npandarallel/core.py:321: AttributeError\n--------------------------------------------------------- Captured stdout setup ----------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n---------------------------------------------------------- Captured stdout call ----------------------------------------------------------\n   0.00%                                          |        0 /        4 |      \n__________________________________ test_dataframe_apply_invalid_function[ZeroDivisionError-True-False] ___________________________________\n\npandarallel_init = None, exception = <class 'ZeroDivisionError'>\n\n    def test_dataframe_apply_invalid_function(pandarallel_init, exception):\n        def f(_):\n            raise exception\n    \n        df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n    \n        with pytest.raises(exception):\n>           df.parallel_apply(f)\n\ntests/test_pandarallel.py:174: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndata =    a\n0  1\n1  2\n2  3\n3  4, user_defined_function = <function test_dataframe_apply_invalid_function.<locals>.f at 0x7097b5dc9820>\nuser_defined_function_args = (), user_defined_function_kwargs = {}\nwrapped_work_function = <pandarallel.core.WrapWorkFunctionForPipe object at 0x7097b4487250>\nmanager = <multiprocessing.managers.SyncManager object at 0x7097b44872e0>, chunks = [   a\n0  1\n1  2\n2  3\n3  4], nb_workers = 1\nprogresses_length = [4], reduce_extra = {'axis': 0}\nprogress_bars = <pandarallel.progress_bars.ProgressBarsConsole object at 0x7097b4487a30>, progresses = [0]\nworkers_status = [<WorkerStatus.Error: 2>]\nwork_args_list = [(   a\n0  1\n1  2\n2  3\n3  4, <ProgressBarsType.InUserDefinedFunction: 1>, 0, <AutoProxy[Queue] object, typeid 'Queue' a...8c\\rcell_contents\\x94h\\x00\\x8c\\n_load_type\\x94\\x93\\x94\\x8c\\x11ZeroDivisionError\\x94\\x85\\x94R\\x94\\x87\\x94R0.', (), ...)]\n\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForPipe(data_type.work)\n        dilled_user_defined_function = dill.dumps(user_defined_function)\n        manager: SyncManager = CONTEXT.Manager()\n        master_workers_queue = manager.Queue()\n    \n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n    \n        nb_workers = len(chunks)\n    \n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n    \n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n    \n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n    \n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n    \n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n    \n        work_args_list = [\n            (\n                chunk,\n                progress_bars_type,\n                worker_index,\n                master_workers_queue,\n                dilled_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                {\n                    **work_extra,\n                    **{\n                        \"master_workers_queue\": master_workers_queue,\n                        \"show_progress_bars\": show_progress_bars,\n                        \"worker_index\": worker_index,\n                    },\n                },\n            )\n            for worker_index, chunk in enumerate(chunks)\n        ]\n    \n        pool = CONTEXT.Pool(nb_workers)\n        results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n        pool.close()\n    \n        generation = count()\n    \n        while any(\n            (worker_status == WorkerStatus.Running for worker_status in workers_status)\n        ):\n            message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n            worker_index, worker_status, payload = message\n            workers_status[worker_index] = worker_status\n    \n            if worker_status == WorkerStatus.Success:\n                progresses[worker_index] = progresses_length[worker_index]\n                progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Running:\n                progress = cast(int, payload)\n                progresses[worker_index] = progress\n    \n                if next(generation) % nb_workers == 0:\n                    progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Error:\n>               progress_bars.set_error(worker_index)\nE               AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\n\npandarallel/core.py:442: AttributeError\n--------------------------------------------------------- Captured stdout setup ----------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n---------------------------------------------------------- Captured stdout call ----------------------------------------------------------\n   0.00%                                          |        0 /        4 |      \n============================================================ warnings summary ============================================================\ntests/test_pandarallel.py: 16 warnings\n  /home/user/Documents/repoben/buggycode/nalepae_pandarallel/tests/test_pandarallel.py:235: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n    res = df.applymap(func_dataframe_applymap)\n\n-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html\n======================================================== short test summary info =========================================================\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[RuntimeError-False-None] - AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[RuntimeError-False-False] - AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[RuntimeError-True-None] - AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[RuntimeError-True-False] - AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[ZeroDivisionError-False-None] - AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[ZeroDivisionError-False-False] - AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[ZeroDivisionError-True-None] - AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[ZeroDivisionError-True-False] - AttributeError: 'ProgressBarsConsole' object has no attribute 'set_error'\n=============================================== 8 failed, 209 passed, 16 warnings in 8.60s ===============================================",
    "CodeBase": [
      {
        "path": "nalepae_pandarallel/pandarallel/core.py",
        "content": "1 import multiprocessing\n2 import os\n3 import pickle\n4 from itertools import count\n5 from multiprocessing.managers import SyncManager\n6 from pathlib import Path\n7 from tempfile import NamedTemporaryFile\n8 from typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, cast\n9 \n10 import dill\n11 import pandas as pd\n12 import psutil\n13 from pandas.core.groupby import DataFrameGroupBy as PandaDataFrameGroupBy\n14 from pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\n15 from pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n16 \n17 from .data_types import (\n18     DataFrame,\n19     DataFrameGroupBy,\n20     DataType,\n21     ExpandingGroupBy,\n22     RollingGroupBy,\n23     Series,\n24     SeriesRolling,\n25 )\n26 from .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\n27 from .utils import WorkerStatus\n28 \n29 ON_WINDOWS = os.name == \"nt\"\n30 CONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n31 \n32 # Root of Memory File System\n33 MEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n34 \n35 # By default, Pandarallel use all available CPUs\n36 NB_PHYSICAL_CORES = psutil.cpu_count(logical=False)\n37 \n38 # Prefix and suffix for files used with Memory File System\n39 PREFIX = \"pandarallel\"\n40 PREFIX_INPUT = f\"{PREFIX}_input_\"\n41 PREFIX_OUTPUT = f\"{PREFIX}_output_\"\n42 SUFFIX = \".pickle\"\n43 \n44 # We use these classes decorators pattern instead of the classic one because of this:\n45 # https://www.stevenengelhardt.com/2013/01/16/python-multiprocessing-module-and-closures/\n46 \n47 \n48 class WrapWorkFu(...truncated)"
      },
      {
        "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
        "content": "1 import multiprocessing\n2 import os\n3 import shutil\n4 import sys\n5 from abc import ABC, abstractmethod\n6 from enum import Enum\n7 from itertools import count\n8 from time import time_ns\n9 from typing import Callable, List, Union\n10 \n11 from .utils import WorkerStatus\n12 \n13 INTERVAL_NS = 250_000_000  # 0.25 sec\n14 MINIMUM_TERMINAL_WIDTH = 72\n15 \n16 \n17 class ProgressBarsType(int, Enum):\n18     No = 0\n19     InUserDefinedFunction = 1\n20     InUserDefinedFunctionMultiplyByNumberOfColumns = 2\n21     InWorkFunction = 3\n22 \n23 \n24 class ProgressBars(ABC):\n25     @abstractmethod\n26     def __init__(self, maxs: List[int], show: bool) -> None:\n27         ...\n28 \n29     @abstractmethod\n30     def update(self, values: List[int]) -> None:\n31         ...\n32 \n33 \n34 \n35 class ProgressState:\n36     def __init__(self, chunk_size: int) -> None:\n37         self.last_put_iteration = 0\n38         self.next_put_iteration = max(chunk_size // 100, 1)\n39         self.last_put_time = time_ns()\n40 \n41 \n42 def is_notebook_lab() -> bool:\n43     try:\n44         shell: str = get_ipython().__class__.__name__  # type: ignore\n45 \n46         # Shell: Google Colab\n47         # TerminalInteractiveShell: Terminal running I(...truncated)"
      },
      {
        "path": "nalepae_pandarallel/pandarallel/utils.py",
        "content": "1 import itertools\n2 from enum import Enum\n3 from typing import Any, Dict, List, Tuple\n4 \n5 import pandas as pd\n6 from pandas import DataFrame, Index\n7 \n8 \n9 def chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n10     \"\"\"\n11     Return `nb_chunks` slices of approximatively `nb_item / nb_chunks` each.\n12 \n13     Parameters\n14     ----------\n15     nb_item : int\n16         Total number of items\n17 \n18     nb_chunks : int\n19         Number of chunks to return\n20 \n21     start_offset : int\n22         Shift start of slice by this amount\n23 \n24     Returns\n25     -------\n26     A list of slices\n27 \n28     Examples\n29     --------\n30     >>> chunks = chunk(103, 4)\n31     >>> chunks\n32     [slice(0, 26, None), slice(26, 52, None), slice(52, 78, None), slice(78, 103, None)]\n33     \"\"\"\n34     if nb_item == 0:\n35 (...truncated)"
      },
      {
        "path": "nalepae_pandarallel/tests/test_pandarallel.py",
        "content": "1 import importlib\n2 import math\n3 \n4 import numpy as np\n5 import pandas as pd\n6 import pytest\n7 from pandarallel import pandarallel\n8 \n9 \n10 @pytest.fixture(params=(1000, 1))\n11 def df_size(request):\n12     return request.param\n13 \n14 \n15 @pytest.fixture(params=(False, True))\n16 def progress_bar(request):\n17     return request.param\n18 \n19 \n20 @pytest.fixture(params=(None, False))\n21 def use_memory_fs(request):\n22     return request.param\n23 \n24 \n25 @pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\n26 def exception(request):(...truncated)"
      }
    ],
    "CommitSHA": "261a652cddb219ac353ff803e81646c08b72fc6f"
  },
  "Score": {
    "Difficulty": "Easy",
    "issue_origin": {
      "Title": 6,
      "Description": 7,
      "Reproducibility": 6,
      "Relevance": 7,
      "Explanation": 6,
      "Overall": 6.5
    },
    "issue_message": {
      "Title": 6,
      "Description": 8,
      "Reproducibility": 7,
      "Relevance": 8,
      "Explanation": 7,
      "Overall": 7
    },
    "issue_ground": {
      "Title": 8,
      "Description": 6,
      "Reproducibility": 6,
      "Relevance": 7,
      "Explanation": 7,
      "Overall": 7
    },
    "issue_ground_truth": {
      "title": "Progress Bars Fail to Indicate Worker Errors",
      "description": "Currently, when workers encounter errors during their tasks, the progress bars do not reflect this status, making it difficult for users to identify which specific worker failed. This lack of feedback can significantly hinder debugging processes and overall user experience. There is a need for a mechanism that clearly indicates errors in the progress bars, providing immediate visual feedback to the user regarding the status of each worker.",
      "explanation": "### Summary of the Issue\n\nThe issue titled **\"Progress Bars Fail to Indicate Worker Errors\"** addresses the problem where progress bars in the application fail to reflect when a worker encounters an error during its task. This lack of feedback complicates debugging and detracts from the overall user experience, as users are not informed which specific worker failed.\n\n### Content of the Commit\n\nThe commit adds a declaration for a new method `set_error` in the `ProgressBars` class. Here is the specific information:\n\n- **Filename:** `nalepae_pandarallel/pandarallel/progress_bars.py`\n- **Patch:** Introduces the method `set_error` in the `ProgressBars` class.\n\nThe method `set_error` is probably intended to manually set a worker's progress bar to an error state.\n\n### Analysis\n\n#### Cause of the Issue\n\nThe core of the issue lies in the lack of a mechanism to reflect worker errors in the progress bars. When a worker encounters an error, the status isn't visually indicated to the user, leaving them unsure whether the progress bar's lack of movement is due to ongoing computation or an underlying error.\n\n#### Solution Description\n\nThe suggested solution includes the following key steps:\n\n1. **Introduce a Method to Flag Errors:** Adding a `set_error` method to the `ProgressBars` class provides a way to manually indicate when a worker encounters an error.\n2. **Update Worker Management Logic:** The logic handling workers' status, likely found within the main processing unit, may now include calls to this `set_error` method when it detects that a worker has errored out.\n3. **Visual Feedback Mechanism:** By incorporating this method, the progress bars can visually show which specific worker encountered an error, improving user feedback and debugging capabilities.\n\n### Detailed Explanation\n\n#### Step-by-Step Breakdown\n\n1. **Understand the User Pain Point:**\n   - Users need to be aware of which workers are failing in real-time.\n   - Currently, all they see is a progress bar that stops moving, which is ambiguous and doesn't clarify if it's due to completion, still processing, or an error.\n\n2. **Implement the Error Indication Method:**\n   - The commit provides the foundation by declaring a `set_error` method in the `ProgressBars` class.\n   - This method will be the hook through which errors can be indicated visually.\n\n3. **Leverage the Error Method:**\n   - Within the worker management code, which likely orchestrates task distribution and progress updates, you'll have logic to determine worker success or failure.\n   - When a worker fails, besides logging or handling the error, it will now also invoke `set_error` on the corresponding progress bar, thereby updating the visual feedback immediately.\n\n4. **Operational Flow:**\n   - A worker starts processing a task.\n   - If it finishes successfully, the progress bar updates normally.\n   - If it encounters an error, the `set_error` method is called, and this change reflects in the UI, immediately indicating to the user which worker had issues.\n\n#### How the Commit Solves the Issue\n\n- **Detection and Reflection:** By allowing the worker management system to call `set_error`, it directly links the detection of an issue with its reflection in the progress bars.\n- **User Clarity:** Users get immediate visual feedback regarding which worker has failed, making it clearer and easier to debug and understand application state.\n- **Enhanced Monitoring:** This additional feature enhances the overall monitoring capability of the application, aligning more closely with user expectations for robust progress tracking.\n\n### Conclusion\n\nTo address the issue where progress bars don't reflect worker errors, the commit primarily adds the `set_error` method as a means to manually flag errors visually. From a developer’s perspective, this change allows for a more dynamic and responsive UI that conveys real-time worker statuses, including errors, enhancing user experience and aiding in quicker troubleshooting."
    }
  }
}