{
  "RepoName": "https://github.com/nalepae/pandarallel.git",
  "CommitSHA": "261a652cddb219ac353ff803e81646c08b72fc6f",
  "Time": "",
  "Difficulty": "Easy",
  "Type": "type mismatch",
  "BuggyCode": [
    {
      "path": "nalepae_pandarallel/setup.py",
      "content": "from setuptools import setup\n\nsetup()\n"
    },
    {
      "path": "nalepae_pandarallel/tests/test_pandarallel.py",
      "content": "import importlib\nimport math\n\nimport numpy as np\nimport pandas as pd\nimport pytest\nfrom pandarallel import pandarallel\n\n\n@pytest.fixture(params=(1000, 1))\ndef df_size(request):\n    return request.param\n\n\n@pytest.fixture(params=(False, True))\ndef progress_bar(request):\n    return request.param\n\n\n@pytest.fixture(params=(None, False))\ndef use_memory_fs(request):\n    return request.param\n\n\n@pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\ndef exception(request):\n    return request.param\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0(request):\n    def func(x):\n        return max(x) - min(x)\n\n    return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1(request):\n    def func(x):\n        return math.sin(x.a**2) + math.sin(x.b**2)\n\n    return dict(\n        named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_applymap(request):\n    def func(x):\n        return math.sin(x**2) - math.cos(x**2)\n\n    return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n        request.param\n    ]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_map(request):\n    def func(x):\n        return math.log10(math.sqrt(math.exp(x**2)))\n\n    return dict(\n        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_apply(request):\n    def func(x, power, bias=0):\n        return math.log10(math.sqrt(math.exp(x**power))) + bias\n\n    return dict(\n        named=func,\n        anonymous=lambda x, power, bias=0: math.log10(math.sqrt(math.exp(x**power)))\n        + bias,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply():\n    def func(df):\n        dum = 0\n        for item in df.b:\n            dum += math.log10(math.sqrt(math.exp(item**2)))\n\n        return dum / len(df.b)\n\n    return func\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply_complex():\n    def func(df):\n        return pd.DataFrame(\n            [[df.b.mean(), df.b.min(), df.b.max()]],\n            columns=[\"b_mean\", \"b_min\", \"b_max\"],\n        )\n\n    return func\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_expanding_apply(request):\n    def func(x):\n        return (x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))).sum()\n\n    return dict(\n        named=func,\n        anonymous=lambda x: (\n            x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))\n        ).sum(),\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0_no_reduce(request):\n    def func(x):\n        return x\n\n    return dict(named=func, anonymous=lambda x: x)[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1_no_reduce(request):\n    def func(x):\n        return x**2\n\n    return dict(named=func, anonymous=lambda x: x**2)[request.param]\n\n\n@pytest.fixture\ndef pandarallel_init(progress_bar, use_memory_fs):\n    pandarallel.initialize(\n        progress_bar=progress_bar, use_memory_fs=use_memory_fs, nb_workers=2\n    )\n\n\ndef test_dataframe_apply_invalid_function(pandarallel_init, exception):\n    def f(_):\n        raise exception\n\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(exception):\n        df.parallel_apply(f)\n\n\ndef test_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0, df_size):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.randint(1, 8, df_size),\n            d=np.random.rand(df_size),\n            e=np.random.randint(1, 8, df_size),\n            f=np.random.rand(df_size),\n            g=np.random.randint(1, 8, df_size),\n            h=np.random.rand(df_size),\n        )\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_1, axis=1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1, axis=1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_invalid_axis(pandarallel_init):\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(ValueError):\n        df.parallel_apply(lambda x: x, axis=\"invalid\")\n    \ndef test_empty_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\ndef test_empty_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_applymap(pandarallel_init, func_dataframe_applymap, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.applymap(func_dataframe_applymap)\n    res_parallel = df.parallel_applymap(func_dataframe_applymap)\n    assert res.equals(res_parallel)\n\n\ndef test_series_map(pandarallel_init, func_series_map, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.map(func_series_map)\n    res_parallel = df.a.parallel_map(func_series_map)\n    assert res.equals(res_parallel)\n\n\ndef test_series_apply(pandarallel_init, func_series_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\ndef test_empty_series_apply(pandarallel_init, func_series_apply):\n    df = pd.DataFrame(dict(a=[]))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\n\ndef test_series_rolling_apply(pandarallel_init, func_series_rolling_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.randint(1, 8, df_size), b=list(range(df_size))))\n\n    res = df.b.rolling(4).apply(func_series_rolling_apply, raw=False)\n    res_parallel = df.b.rolling(4).parallel_apply(func_series_rolling_apply, raw=False)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply(\n    pandarallel_init, func_dataframe_groupby_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.rand(df_size),\n        )\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\", \"b\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\", \"b\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply_complex(\n    pandarallel_init, func_dataframe_groupby_apply_complex, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 100, df_size), b=np.random.rand(df_size))\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply_complex)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply_complex)\n    res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_rolling_apply(\n    pandarallel_init, func_dataframe_groupby_rolling_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .parallel_apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_expanding_apply(\n    pandarallel_init, func_dataframe_groupby_expanding_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .parallel_apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res.equals(res_parallel)\n\n\ndef test_dataframe_axis_0_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_0_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n    res = df.apply(func_dataframe_apply_axis_0_no_reduce)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0_no_reduce)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_axis_1_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n\n    res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    assert res.equals(res_parallel)\n\ndef test_memory_fs_root_environment_variable(monkeypatch):\n    monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n    from pandarallel import core\n    importlib.reload(core)\n\n    assert core.MEMORY_FS_ROOT == \"/test\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/utils.py",
      "content": "import itertools\nfrom enum import Enum\nfrom typing import Any, Dict, List, Tuple\n\nimport pandas as pd\nfrom pandas import DataFrame, Index\n\n\ndef chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n    \"\"\"\n    Return `nb_chunks` slices of approximatively `nb_item / nb_chunks` each.\n\n    Parameters\n    ----------\n    nb_item : int\n        Total number of items\n\n    nb_chunks : int\n        Number of chunks to return\n\n    start_offset : int\n        Shift start of slice by this amount\n\n    Returns\n    -------\n    A list of slices\n\n    Examples\n    --------\n    >>> chunks = chunk(103, 4)\n    >>> chunks\n    [slice(0, 26, None), slice(26, 52, None), slice(52, 78, None), slice(78, 103, None)]\n    \"\"\"\n    if nb_item == 0:\n        return [slice(0)]\n    \n    if nb_item <= nb_chunks:\n        return [slice(max(0, idx - start_offset), idx + 1) for idx in range(nb_item)]\n\n    quotient = nb_item // nb_chunks\n    remainder = nb_item % nb_chunks\n\n    quotients = [quotient] * nb_chunks\n    remainders = [1] * remainder + [0] * (nb_chunks - remainder)\n\n    nb_elems_per_chunk = [\n        quotient + remainder for quotient, remainder in zip(quotients, remainders)\n    ]\n\n    accumulated = list(itertools.accumulate(nb_elems_per_chunk))\n    shifted_accumulated = accumulated.copy()\n    shifted_accumulated.insert(0, 0)\n    shifted_accumulated.pop()\n\n    return [\n        slice(max(0, begin - start_offset), end)\n        for begin, end in zip(shifted_accumulated, accumulated)\n    ]\n\n\ndef df_indexed_like(df: DataFrame, axes: List[Index]) -> bool:\n    \"\"\"\n    Returns whether a data frame is indexed in the way specified by the\n    provided axes.\n\n    Used by DataFrameGroupBy to determine whether a group has been modified.\n\n    Function adapted from pandas.core.groupby.ops._is_indexed_like\n\n    Parameters\n    ----------\n    df : DataFrame\n        The data frame in question\n\n    axes : List[Index]\n        The axes to which the data frame is compared\n\n    Returns\n    -------\n    Whether or not the data frame is indexed in the same wa as the axes.\n    \"\"\"\n    if isinstance(df, DataFrame):\n        return df.axes[0].equals(axes[0])\n\n    return False\n\n\ndef get_pandas_version() -> Tuple[int, int]:\n    major_str, minor_str, *_ = pd.__version__.split(\".\")\n    return int(major_str), int(minor_str)\n\n\ndef get_axis_int(user_defined_function_kwargs: Dict[str, Any]):\n    axis = user_defined_function_kwargs.get(\"axis\", 0)\n\n    if axis not in {0, 1, \"index\", \"columns\"}:\n        raise ValueError(f\"No axis named {axis} for object type DataFrame\")\n\n    return {0: 0, 1: 1, \"index\": 0, \"columns\": 1}[axis]\n\n\nclass WorkerStatus(int, Enum):\n    Running = 0\n    Success = 1\n    Error = 2\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/core.py",
      "content": "import multiprocessing\nimport os\nimport pickle\nfrom itertools import count\nfrom multiprocessing.managers import SyncManager\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, cast\n\nimport dill\nimport pandas as pd\nimport psutil\nfrom pandas.core.groupby import DataFrameGroupBy as PandaDataFrameGroupBy\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom .data_types import (\n    DataFrame,\n    DataFrameGroupBy,\n    DataType,\n    ExpandingGroupBy,\n    RollingGroupBy,\n    Series,\n    SeriesRolling,\n)\nfrom .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\nfrom .utils import WorkerStatus\n\nON_WINDOWS = os.name == \"nt\"\nCONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n\n# Root of Memory File System\nMEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n\n# By default, Pandarallel use all available CPUs\nNB_PHYSICAL_CORES = psutil.cpu_count(logical=False)\n\n# Prefix and suffix for files used with Memory File System\nPREFIX = \"pandarallel\"\nPREFIX_INPUT = f\"{PREFIX}_input_\"\nPREFIX_OUTPUT = f\"{PREFIX}_output_\"\nSUFFIX = \".pickle\"\n\n# We use these classes decorators pattern instead of the classic one because of this:\n# https://www.stevenengelhardt.com/2013/01/16/python-multiprocessing-module-and-closures/\n\n\nclass WrapWorkFunctionForFileSystem:\n    def __init__(\n        self,\n        work_function: Callable[\n            [Any, Callable, tuple, Dict[str, Any], Dict[str, Any]], Any\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        input_file_path: Path,\n        output_file_path: Path,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> None:\n        try:\n            # Load dataframe from input file\n            with input_file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            # Delete input file since we don't need it any more. It will free some RAM\n            # since the input file is stored into Shared Memory.\n            input_file_path.unlink()\n\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            result = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            with output_file_path.open(\"wb\") as file_descriptor:\n                pickle.dump(result, file_descriptor)\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\nclass WrapWorkFunctionForPipe:\n    def __init__(\n        self,\n        work_function: Callable[\n            [\n                Any,\n                Callable,\n                tuple,\n                Dict[str, Any],\n                Dict[str, Any],\n            ],\n            Any,\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        data: Any,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        try:\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            results = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n            return results\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\ndef wrap_reduce_function_for_file_system(\n    reduce_function: Callable[[Iterator, Dict[str, Any]], Any]\n) -> Callable[[Iterator[Path], Dict[str, Any]], Any]:\n    \"\"\"This wrapper transforms a `reduce` function which takes as input:\n    - A list of pandas Dataframe\n    - An user defined function\n    and which returns a pandas Dataframe, into a `reduct` function which takes as input:\n    - A list of paths where  pandas Dataframe are pickled\n    which returns a pandas Dataframe.\n    \"\"\"\n\n    def closure(output_file_paths: Iterator[Path], extra: Dict[str, Any]) -> Any:\n        def get_dataframe_and_delete_file(file_path: Path) -> Any:\n            with file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            file_path.unlink()\n            return data\n\n        dfs = (\n            get_dataframe_and_delete_file(output_file_path)\n            for output_file_path in output_file_paths\n        )\n\n        return reduce_function(dfs, extra)\n\n    return closure\n\n\ndef parallelize_with_memory_file_system(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForFileSystem(data_type.work)\n        wrapped_reduce_function = wrap_reduce_function_for_file_system(data_type.reduce)\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        input_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_INPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        output_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_OUTPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        try:\n            for chunk, input_file in zip(chunks, input_files):\n                with Path(input_file.name).open(\"wb\") as file_descriptor:\n                    pickle.dump(chunk, file_descriptor)\n\n            dilled_user_defined_function = dill.dumps(user_defined_function)\n            manager: SyncManager = CONTEXT.Manager()\n            master_workers_queue = manager.Queue()\n\n            work_args_list = [\n                (\n                    Path(input_file.name),\n                    Path(output_file.name),\n                    progress_bars_type,\n                    worker_index,\n                    master_workers_queue,\n                    dilled_user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                    {\n                        **work_extra,\n                        **{\n                            \"master_workers_queue\": master_workers_queue,\n                            \"show_progress_bars\": show_progress_bars,\n                            \"worker_index\": worker_index,\n                        },\n                    },\n                )\n                for worker_index, (\n                    input_file,\n                    output_file,\n                ) in enumerate(zip(input_files, output_files))\n            ]\n\n            pool = CONTEXT.Pool(nb_workers)\n            results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n\n            pool.close()\n\n            generation = count()\n\n            while any(\n                (\n                    worker_status == WorkerStatus.Running\n                    for worker_status in workers_status\n                )\n            ):\n                message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n                worker_index, worker_status, payload = message\n                workers_status[worker_index] = worker_status\n\n                if worker_status == WorkerStatus.Success:\n                    progresses[worker_index] = progresses_length[worker_index]\n                    progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Running:\n                    progress = cast(int, payload)\n                    progresses[worker_index] = progress\n\n                    if next(generation) % nb_workers == 0:\n                        progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Error:\n                    progress_bars.set_error(worker_index)\n                    progress_bars.update(progresses)\n\n            try:\n                return wrapped_reduce_function(\n                    (Path(output_file.name) for output_file in output_files),\n                    reduce_extra,\n                )\n            except EOFError:\n                # Loading the files failed, this most likely means that there\n                # was some error during processing and the files were never\n                # saved at all.\n                results_promise.get()\n\n                # If the above statement does not raise an exception, that\n                # means the multiprocessing went well and we want to re-raise\n                # the original EOFError.\n                raise\n\n        finally:\n            for output_file in output_files:\n                # When pandarallel stop supporting Python 3.7 and older, replace this\n                # try/except clause by:\n                # Path(output_file.name).unlink(missing_ok=True)\n                try:\n                    Path(output_file.name).unlink()\n                except FileNotFoundError:\n                    # Do nothing, this is the nominal case.\n                    pass\n\n    return closure\n\n\ndef parallelize_with_pipe(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForPipe(data_type.work)\n        dilled_user_defined_function = dill.dumps(user_defined_function)\n        manager: SyncManager = CONTEXT.Manager()\n        master_workers_queue = manager.Queue()\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        work_args_list = [\n            (\n                chunk,\n                progress_bars_type,\n                worker_index,\n                master_workers_queue,\n                dilled_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                {\n                    **work_extra,\n                    **{\n                        \"master_workers_queue\": master_workers_queue,\n                        \"show_progress_bars\": show_progress_bars,\n                        \"worker_index\": worker_index,\n                    },\n                },\n            )\n            for worker_index, chunk in enumerate(chunks)\n        ]\n\n        pool = CONTEXT.Pool(nb_workers)\n        results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n        pool.close()\n\n        generation = count()\n\n        while any(\n            (worker_status == WorkerStatus.Running for worker_status in workers_status)\n        ):\n            message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n            worker_index, worker_status, payload = message\n            workers_status[worker_index] = worker_status\n\n            if worker_status == WorkerStatus.Success:\n                progresses[worker_index] = progresses_length[worker_index]\n                progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Running:\n                progress = cast(int, payload)\n                progresses[worker_index] = progress\n\n                if next(generation) % nb_workers == 0:\n                    progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Error:\n                progress_bars.set_error(worker_index)\n\n        results = results_promise.get()\n\n        return data_type.reduce(results, reduce_extra)\n\n    return closure\n\n\nclass pandarallel:\n    @classmethod\n    def initialize(\n        cls,\n        shm_size_mb=None,\n        nb_workers=NB_PHYSICAL_CORES,\n        progress_bar=False,\n        verbose=2,\n        use_memory_fs: Optional[bool] = None,\n    ) -> None:\n        show_progress_bars = progress_bar\n        is_memory_fs_available = Path(MEMORY_FS_ROOT).exists()\n\n        use_memory_fs = (\n            use_memory_fs if use_memory_fs is not None else is_memory_fs_available\n        )\n\n        parallelize = (\n            parallelize_with_memory_file_system\n            if use_memory_fs\n            else parallelize_with_pipe\n        )\n\n        if use_memory_fs and not is_memory_fs_available:\n            raise SystemError(\"Memory file system is not available\")\n\n        if verbose >= 2:\n            print(f\"INFO: Pandarallel will run on {nb_workers} workers.\")\n\n            message = (\n                (\n                    \"INFO: Pandarallel will use Memory file system to transfer data \"\n                    \"between the main process and workers.\"\n                )\n                if use_memory_fs\n                else (\n                    \"INFO: Pandarallel will use standard multiprocessing data transfer \"\n                    \"(pipe) to transfer data between the main process and workers.\"\n                )\n            )\n\n            print(message)\n\n            if ON_WINDOWS and verbose >= 2:\n                print()\n                print(\n                    (\n                        \"WARNING: You are on Windows. If you detect any issue with \"\n                        \"pandarallel, be sure you checked out the Troubleshooting page:\"\n                    )\n                )\n                print(\"https://nalepae.github.io/pandarallel/troubleshooting/\")\n\n        progress_bars_in_user_defined_function = (\n            ProgressBarsType.InUserDefinedFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_user_defined_function_multiply_by_number_of_columns = (\n            ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_work_function = (\n            ProgressBarsType.InWorkFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        # DataFrame\n        pd.DataFrame.parallel_apply = parallelize(\n            nb_workers, DataFrame.Apply, progress_bars_in_user_defined_function\n        )\n        pd.DataFrame.parallel_applymap = parallelize(\n            nb_workers,\n            DataFrame.ApplyMap,\n            progress_bars_in_user_defined_function_multiply_by_number_of_columns,\n        )\n\n        # DataFrame GroupBy\n        PandaDataFrameGroupBy.parallel_apply = parallelize(\n            nb_workers, DataFrameGroupBy.Apply, progress_bars_in_user_defined_function\n        )\n\n        # Expanding GroupBy\n        PandasExpandingGroupby.parallel_apply = parallelize(\n            nb_workers, ExpandingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Rolling GroupBy\n        PandasRollingGroupby.parallel_apply = parallelize(\n            nb_workers, RollingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Series\n        pd.Series.parallel_apply = parallelize(\n            nb_workers, Series.Apply, progress_bars_in_user_defined_function\n        )\n        pd.Series.parallel_map = parallelize(nb_workers, Series.Map, show_progress_bars)\n\n        # Series Rolling\n        pd.core.window.Rolling.parallel_apply = parallelize(\n            nb_workers, SeriesRolling.Apply, progress_bars_in_user_defined_function\n        )\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
      "content": "import multiprocessing\nimport os\nimport shutil\nimport sys\nfrom abc import ABC, abstractmethod\nfrom enum import Enum\nfrom itertools import count\nfrom time import time_ns\nfrom typing import Callable, List, Union\n\nfrom .utils import WorkerStatus\n\nINTERVAL_NS = 250_000_000  # 0.25 sec\nMINIMUM_TERMINAL_WIDTH = 72\n\n\nclass ProgressBarsType(int, Enum):\n    No = 0\n    InUserDefinedFunction = 1\n    InUserDefinedFunctionMultiplyByNumberOfColumns = 2\n    InWorkFunction = 3\n\n\nclass ProgressBars(ABC):\n    @abstractmethod\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        ...\n\n    @abstractmethod\n    def update(self, values: List[int]) -> None:\n        ...\n\n    def set_error(self, index: int) -> None:\n        pass\n\n\nclass ProgressState:\n    def __init__(self, chunk_size: int) -> None:\n        self.last_put_iteration = 0\n        self.next_put_iteration = max(chunk_size // 100, 1)\n        self.last_put_time = time_ns()\n\n\ndef is_notebook_lab() -> bool:\n    try:\n        shell: str = get_ipython().__class__.__name__  # type: ignore\n\n        # Shell: Google Colab\n        # TerminalInteractiveShell: Terminal running IPython\n        # ZMQInteractiveShell: Jupyter notebook/lab or qtconsole\n        return shell in {\"Shell\", \"ZMQInteractiveShell\"}\n    except NameError:\n        # Probably standard Python interpreter\n        return False\n\n\nclass ProgressBarsConsole(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        self.__show = show\n        self.__bars = [[0, max] for max in maxs]\n        self.__width = self.__get_width()\n\n        self.__lines = self.__update_lines()\n\n        if show:\n            sys.stdout.write(\"\\n\".join(self.__lines))\n            sys.stdout.flush()\n\n    def __get_width(self) -> int:\n        try:\n            columns = shutil.get_terminal_size().columns\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except AttributeError:\n            # Python 2\n            pass\n\n        try:\n            columns = int(os.popen(\"stty size\", \"r\").read().split()[1])\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except:\n            return MINIMUM_TERMINAL_WIDTH\n\n    def __remove_displayed_lines(self) -> None:\n        if len(self.__bars) >= 1:\n            sys.stdout.write(\"\\b\" * len(self.__lines[-1]))\n\n        if len(self.__bars) >= 2:\n            sys.stdout.write(\"\\033M\" * (len(self.__lines) - 1))\n\n        self.__lines = []\n\n    def __update_line(self, done: int, total: int) -> str:\n        if total == 0:\n            percent = 0\n        else:\n            percent = done / total\n        bar = (\":\" * int(percent * 40)).ljust(40, \" \")\n        percent = round(percent * 100, 2)\n        format = \" {percent:6.2f}% {bar:s} | {done:8d} / {total:8d} |\"\n        ret = format.format(percent=percent, bar=bar, done=done, total=total)\n        return ret[: self.__width].ljust(self.__width, \" \")\n\n    def __update_lines(self) -> List[str]:\n        return [self.__update_line(value, max) for value, max in self.__bars]\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            self.__bars[index][0] = value\n\n        self.__remove_displayed_lines()\n        self.__lines = self.__update_lines()\n\n        sys.stdout.write(\"\\n\".join(self.__lines))\n        sys.stdout.flush()\n\n\nclass ProgressBarsNotebookLab(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        \"\"\"Initialization.\n        Positional argument:\n        maxs - List containing the max value of each progress bar\n        \"\"\"\n        self.__show = show\n\n        if not show:\n            return\n\n        from IPython.display import display\n        from ipywidgets import HBox, IntProgress, Label, VBox\n\n        self.__bars = [\n            HBox(\n                [\n                    IntProgress(0, 0, max, description=\"{:.2f}%\".format(0)),\n                    Label(\"{} / {}\".format(0, max)),\n                ]\n            )\n            for max in maxs\n        ]\n\n        display(VBox(self.__bars))\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            bar, label = self.__bars[index].children\n\n            label.value = \"{} / {}\".format(value, bar.max)\n            \n            bar.value = value\n\n            if value >= bar.max:\n                bar.bar_style = \"success\"\n\n            if bar.max != 0:\n                bar.description = \"{:.2f}%\".format(bar.value / bar.max * 100)\n\n    def set_error(self, index: int) -> None:\n        \"\"\"Set a bar on error\"\"\"\n        if not self.__show:\n            return\n\n        bar, _ = self.__bars[index].children\n        bar.bar_style = \"danger\"\n\n\ndef get_progress_bars(\n    maxs: List[int], show\n) -> Union[ProgressBarsNotebookLab, ProgressBarsConsole]:\n    return (\n        ProgressBarsNotebookLab(maxs, show)\n        if is_notebook_lab()\n        else ProgressBarsConsole(maxs, show)\n    )\n\n\ndef progress_wrapper(\n    user_defined_function: Callable,\n    master_workers_queue: multiprocessing.Queue,\n    index: int,\n    chunk_size: int,\n) -> Callable:\n    \"\"\"Wrap the function to apply in a function which monitor the part of work already\n    done.\n    \"\"\"\n    counter = count()\n    state = ProgressState(chunk_size)\n\n    def closure(*user_defined_function_args, **user_defined_functions_kwargs):\n        iteration = next(counter)\n\n        if iteration == state.next_put_iteration:\n            time_now = time_ns()\n            master_workers_queue.put_nowait((index, WorkerStatus.Running, iteration))\n\n            delta_t = time_now - state.last_put_time\n            delta_i = iteration - state.last_put_iteration\n\n            state.next_put_iteration += (\n                max(int((delta_i / delta_t) * INTERVAL_NS), 1) if delta_t != 0 else 1\n            )\n\n            state.last_put_iteration = iteration\n            state.last_put_time = time_now\n\n        return user_defined_function(\n            *user_defined_function_args, **user_defined_functions_kwargs\n        )\n\n    return closure\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/__init__.py",
      "content": "from .core import pandarallel\n\n__version__ = \"1.6.5\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/expanding_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass ExpandingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasExpandingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasExpandingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.expanding(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass Series:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n\n    class Map(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.map(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series_rolling.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\nfrom pandas.core.window.rolling import Rolling\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass SeriesRolling:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, rolling: Rolling, **kwargs\n        ) -> Iterator[pd.Series]:\n            chunks = chunk(rolling.obj.size, nb_workers, rolling.window)\n\n            for chunk_ in chunks:\n                yield rolling.obj[chunk_]\n\n        @staticmethod\n        def get_work_extra(data: Rolling) -> Dict[str, Any]:\n            return {\n                \"attributes\": {\n                    attribute: getattr(data, attribute)\n                    for attribute in data._attributes\n                }\n            }\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            attributes: Dict[str, Any] = extra[\"attributes\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            result = data.rolling(**attributes).apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n            return result if worker_index == 0 else result[attributes[\"window\"] :]\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/__init__.py",
      "content": "from .dataframe import DataFrame\nfrom .dataframe_groupby import DataFrameGroupBy\nfrom .expanding_groupby import ExpandingGroupBy\nfrom .rolling_groupby import RollingGroupBy\nfrom .generic import DataType\nfrom .series import Series\nfrom .series_rolling import SeriesRolling\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\nfrom types import GeneratorType\n\nimport pandas as pd\n\nfrom ..utils import chunk, get_axis_int\nfrom .generic import DataType\n\n\nclass DataFrame:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            user_defined_function_kwargs = kwargs[\"user_defined_function_kwargs\"]\n\n            axis_int = get_axis_int(user_defined_function_kwargs)\n            opposite_axis_int = 1 - axis_int\n\n            for chunk_ in chunk(data.shape[opposite_axis_int], nb_workers):\n                yield data.iloc[chunk_] if axis_int == 1 else data.iloc[:, chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs,\n            )\n\n        @staticmethod\n        def get_reduce_extra(\n            data: Any, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"axis\": get_axis_int(user_defined_function_kwargs)}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n            return pd.concat(datas, copy=False, axis=axis)\n\n    class ApplyMap(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            for chunk_ in chunk(data.shape[0], nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.applymap(user_defined_function)\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/generic.py",
      "content": "from abc import ABC, abstractmethod\nfrom typing import Any, Callable, Dict, Iterable, Iterator\n\n\nclass DataType(ABC):\n    @staticmethod\n    @abstractmethod\n    def get_chunks(nb_workers: int, data: Any, **kwargs) -> Iterator[Any]:\n        ...\n\n    @staticmethod\n    def get_work_extra(data: Any) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def work(\n        data: Any,\n        user_defined_function: Callable,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        ...\n\n    @staticmethod\n    def get_reduce_extra(\n        data: Any, user_defined_function_kwargs: Dict[str, Any]\n    ) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def reduce(datas: Iterable[Any], extra: Dict[str, Any]) -> Any:\n        ...\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/rolling_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass RollingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasRollingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasRollingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.rolling(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe_groupby.py",
      "content": "import itertools\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple, Union, cast\n\nimport pandas as pd\nfrom pandas.core.groupby.generic import DataFrameGroupBy as PandasDataFrameGroupBy\n\nfrom ..utils import chunk, df_indexed_like, get_pandas_version\nfrom .generic import DataType\n\n\nclass DataFrameGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, dataframe_groupby: PandasDataFrameGroupBy, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            chunks = chunk(dataframe_groupby.ngroups, nb_workers)\n            iterator = iter(dataframe_groupby)\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[Tuple[int, pd.DataFrame, bool]]:\n            def compute_result(\n                key: int, df: pd.DataFrame\n            ) -> Tuple[int, pd.DataFrame, bool]:\n                result = user_defined_function(\n                    df, *user_defined_function_args, **user_defined_function_kwargs\n                )\n                mutated = not df_indexed_like(result, df.axes)\n                return key, result, mutated\n\n            return [compute_result(key, df) for key, df in data]\n\n        @staticmethod\n        def get_reduce_extra(\n            data: PandasDataFrameGroupBy, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"df_groupby\": data}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[List[Tuple[int, pd.DataFrame, bool]]], extra: Dict[str, Any]\n        ) -> pd.Series:\n            def get_args(\n                keys: List[int],\n                values: List[pd.DataFrame],\n                df_groupby: PandasDataFrameGroupBy,\n            ) -> Union[\n                Tuple[List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[pd.DataFrame]],\n            ]:\n                pandas_version = get_pandas_version()\n\n                if pandas_version < (1, 3):\n                    return keys, values\n                elif pandas_version < (1, 4):\n                    return df_groupby._selected_obj, keys, values\n                else:\n                    return df_groupby._selected_obj, values\n\n            df_groupby: PandasDataFrameGroupBy = extra[\"df_groupby\"]\n\n            results = itertools.chain.from_iterable(datas)\n            keys, values, mutated = zip(*results)\n\n            keys = cast(List[int], keys)\n            values = cast(List[pd.DataFrame], values)\n            mutated = cast(List[bool], mutated)\n\n            args = get_args(keys, values, df_groupby)\n  \n            return df_groupby._wrap_applied_output(*args, not_indexed_same=mutated)\n"
    }
  ],
  "OriginCode": [
    {
      "path": "nalepae_pandarallel/setup.py",
      "content": "from setuptools import setup\n\nsetup()\n"
    },
    {
      "path": "nalepae_pandarallel/tests/test_pandarallel.py",
      "content": "import importlib\nimport math\n\nimport numpy as np\nimport pandas as pd\nimport pytest\nfrom pandarallel import pandarallel\n\n\n@pytest.fixture(params=(1000, 1))\ndef df_size(request):\n    return request.param\n\n\n@pytest.fixture(params=(False, True))\ndef progress_bar(request):\n    return request.param\n\n\n@pytest.fixture(params=(None, False))\ndef use_memory_fs(request):\n    return request.param\n\n\n@pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\ndef exception(request):\n    return request.param\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0(request):\n    def func(x):\n        return max(x) - min(x)\n\n    return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1(request):\n    def func(x):\n        return math.sin(x.a**2) + math.sin(x.b**2)\n\n    return dict(\n        named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_applymap(request):\n    def func(x):\n        return math.sin(x**2) - math.cos(x**2)\n\n    return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n        request.param\n    ]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_map(request):\n    def func(x):\n        return math.log10(math.sqrt(math.exp(x**2)))\n\n    return dict(\n        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_apply(request):\n    def func(x, power, bias=0):\n        return math.log10(math.sqrt(math.exp(x**power))) + bias\n\n    return dict(\n        named=func,\n        anonymous=lambda x, power, bias=0: math.log10(math.sqrt(math.exp(x**power)))\n        + bias,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply():\n    def func(df):\n        dum = 0\n        for item in df.b:\n            dum += math.log10(math.sqrt(math.exp(item**2)))\n\n        return dum / len(df.b)\n\n    return func\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply_complex():\n    def func(df):\n        return pd.DataFrame(\n            [[df.b.mean(), df.b.min(), df.b.max()]],\n            columns=[\"b_mean\", \"b_min\", \"b_max\"],\n        )\n\n    return func\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_expanding_apply(request):\n    def func(x):\n        return (x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))).sum()\n\n    return dict(\n        named=func,\n        anonymous=lambda x: (\n            x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))\n        ).sum(),\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0_no_reduce(request):\n    def func(x):\n        return x\n\n    return dict(named=func, anonymous=lambda x: x)[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1_no_reduce(request):\n    def func(x):\n        return x**2\n\n    return dict(named=func, anonymous=lambda x: x**2)[request.param]\n\n\n@pytest.fixture\ndef pandarallel_init(progress_bar, use_memory_fs):\n    pandarallel.initialize(\n        progress_bar=progress_bar, use_memory_fs=use_memory_fs, nb_workers=2\n    )\n\n\ndef test_dataframe_apply_invalid_function(pandarallel_init, exception):\n    def f(_):\n        raise exception\n\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(exception):\n        df.parallel_apply(f)\n\n\ndef test_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0, df_size):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.randint(1, 8, df_size),\n            d=np.random.rand(df_size),\n            e=np.random.randint(1, 8, df_size),\n            f=np.random.rand(df_size),\n            g=np.random.randint(1, 8, df_size),\n            h=np.random.rand(df_size),\n        )\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_1, axis=1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1, axis=1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_invalid_axis(pandarallel_init):\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(ValueError):\n        df.parallel_apply(lambda x: x, axis=\"invalid\")\n    \ndef test_empty_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\ndef test_empty_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_applymap(pandarallel_init, func_dataframe_applymap, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.applymap(func_dataframe_applymap)\n    res_parallel = df.parallel_applymap(func_dataframe_applymap)\n    assert res.equals(res_parallel)\n\n\ndef test_series_map(pandarallel_init, func_series_map, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.map(func_series_map)\n    res_parallel = df.a.parallel_map(func_series_map)\n    assert res.equals(res_parallel)\n\n\ndef test_series_apply(pandarallel_init, func_series_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\ndef test_empty_series_apply(pandarallel_init, func_series_apply):\n    df = pd.DataFrame(dict(a=[]))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\n\ndef test_series_rolling_apply(pandarallel_init, func_series_rolling_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.randint(1, 8, df_size), b=list(range(df_size))))\n\n    res = df.b.rolling(4).apply(func_series_rolling_apply, raw=False)\n    res_parallel = df.b.rolling(4).parallel_apply(func_series_rolling_apply, raw=False)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply(\n    pandarallel_init, func_dataframe_groupby_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.rand(df_size),\n        )\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\", \"b\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\", \"b\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply_complex(\n    pandarallel_init, func_dataframe_groupby_apply_complex, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 100, df_size), b=np.random.rand(df_size))\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply_complex)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply_complex)\n    res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_rolling_apply(\n    pandarallel_init, func_dataframe_groupby_rolling_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .parallel_apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_expanding_apply(\n    pandarallel_init, func_dataframe_groupby_expanding_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .parallel_apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res.equals(res_parallel)\n\n\ndef test_dataframe_axis_0_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_0_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n    res = df.apply(func_dataframe_apply_axis_0_no_reduce)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0_no_reduce)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_axis_1_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n\n    res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    assert res.equals(res_parallel)\n\ndef test_memory_fs_root_environment_variable(monkeypatch):\n    monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n    from pandarallel import core\n    importlib.reload(core)\n\n    assert core.MEMORY_FS_ROOT == \"/test\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/utils.py",
      "content": "import itertools\nfrom enum import Enum\nfrom typing import Any, Dict, List, Tuple\n\nimport pandas as pd\nfrom pandas import DataFrame, Index\n\n\ndef chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n    \"\"\"\n    Return `nb_chunks` slices of approximatively `nb_item / nb_chunks` each.\n\n    Parameters\n    ----------\n    nb_item : int\n        Total number of items\n\n    nb_chunks : int\n        Number of chunks to return\n\n    start_offset : int\n        Shift start of slice by this amount\n\n    Returns\n    -------\n    A list of slices\n\n    Examples\n    --------\n    >>> chunks = chunk(103, 4)\n    >>> chunks\n    [slice(0, 26, None), slice(26, 52, None), slice(52, 78, None), slice(78, 103, None)]\n    \"\"\"\n    if nb_item == 0:\n        return [slice(0)]\n    \n    if nb_item <= nb_chunks:\n        return [slice(max(0, idx - start_offset), idx + 1) for idx in range(nb_item)]\n\n    quotient = nb_item // nb_chunks\n    remainder = nb_item % nb_chunks\n\n    quotients = [quotient] * nb_chunks\n    remainders = [1] * remainder + [0] * (nb_chunks - remainder)\n\n    nb_elems_per_chunk = [\n        quotient + remainder for quotient, remainder in zip(quotients, remainders)\n    ]\n\n    accumulated = list(itertools.accumulate(nb_elems_per_chunk))\n    shifted_accumulated = accumulated.copy()\n    shifted_accumulated.insert(0, 0)\n    shifted_accumulated.pop()\n\n    return [\n        slice(max(0, begin - start_offset), end)\n        for begin, end in zip(shifted_accumulated, accumulated)\n    ]\n\n\ndef df_indexed_like(df: DataFrame, axes: List[Index]) -> bool:\n    \"\"\"\n    Returns whether a data frame is indexed in the way specified by the\n    provided axes.\n\n    Used by DataFrameGroupBy to determine whether a group has been modified.\n\n    Function adapted from pandas.core.groupby.ops._is_indexed_like\n\n    Parameters\n    ----------\n    df : DataFrame\n        The data frame in question\n\n    axes : List[Index]\n        The axes to which the data frame is compared\n\n    Returns\n    -------\n    Whether or not the data frame is indexed in the same wa as the axes.\n    \"\"\"\n    if isinstance(df, DataFrame):\n        return df.axes[0].equals(axes[0])\n\n    return False\n\n\ndef get_pandas_version() -> Tuple[int, int]:\n    major_str, minor_str, *_ = pd.__version__.split(\".\")\n    return int(major_str), int(minor_str)\n\n\ndef get_axis_int(user_defined_function_kwargs: Dict[str, Any]):\n    axis = user_defined_function_kwargs.get(\"axis\", 0)\n\n    if axis not in {0, 1, \"index\", \"columns\"}:\n        raise ValueError(f\"No axis named {axis} for object type DataFrame\")\n\n    return {0: 0, 1: 1, \"index\": 0, \"columns\": 1}[axis]\n\n\nclass WorkerStatus(int, Enum):\n    Running = 0\n    Success = 1\n    Error = 2\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/core.py",
      "content": "import multiprocessing\nimport os\nimport pickle\nfrom itertools import count\nfrom multiprocessing.managers import SyncManager\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, cast\n\nimport dill\nimport pandas as pd\nimport psutil\nfrom pandas.core.groupby import DataFrameGroupBy as PandaDataFrameGroupBy\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom .data_types import (\n    DataFrame,\n    DataFrameGroupBy,\n    DataType,\n    ExpandingGroupBy,\n    RollingGroupBy,\n    Series,\n    SeriesRolling,\n)\nfrom .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\nfrom .utils import WorkerStatus\n\nON_WINDOWS = os.name == \"nt\"\nCONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n\n# Root of Memory File System\nMEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n\n# By default, Pandarallel use all available CPUs\nNB_PHYSICAL_CORES = psutil.cpu_count(logical=False)\n\n# Prefix and suffix for files used with Memory File System\nPREFIX = \"pandarallel\"\nPREFIX_INPUT = f\"{PREFIX}_input_\"\nPREFIX_OUTPUT = f\"{PREFIX}_output_\"\nSUFFIX = \".pickle\"\n\n# We use these classes decorators pattern instead of the classic one because of this:\n# https://www.stevenengelhardt.com/2013/01/16/python-multiprocessing-module-and-closures/\n\n\nclass WrapWorkFunctionForFileSystem:\n    def __init__(\n        self,\n        work_function: Callable[\n            [Any, Callable, tuple, Dict[str, Any], Dict[str, Any]], Any\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        input_file_path: Path,\n        output_file_path: Path,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> None:\n        try:\n            # Load dataframe from input file\n            with input_file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            # Delete input file since we don't need it any more. It will free some RAM\n            # since the input file is stored into Shared Memory.\n            input_file_path.unlink()\n\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            result = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            with output_file_path.open(\"wb\") as file_descriptor:\n                pickle.dump(result, file_descriptor)\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\nclass WrapWorkFunctionForPipe:\n    def __init__(\n        self,\n        work_function: Callable[\n            [\n                Any,\n                Callable,\n                tuple,\n                Dict[str, Any],\n                Dict[str, Any],\n            ],\n            Any,\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        data: Any,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        try:\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            results = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n            return results\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\ndef wrap_reduce_function_for_file_system(\n    reduce_function: Callable[[Iterator, Dict[str, Any]], Any]\n) -> Callable[[Iterator[Path], Dict[str, Any]], Any]:\n    \"\"\"This wrapper transforms a `reduce` function which takes as input:\n    - A list of pandas Dataframe\n    - An user defined function\n    and which returns a pandas Dataframe, into a `reduct` function which takes as input:\n    - A list of paths where  pandas Dataframe are pickled\n    which returns a pandas Dataframe.\n    \"\"\"\n\n    def closure(output_file_paths: Iterator[Path], extra: Dict[str, Any]) -> Any:\n        def get_dataframe_and_delete_file(file_path: Path) -> Any:\n            with file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            file_path.unlink()\n            return data\n\n        dfs = (\n            get_dataframe_and_delete_file(output_file_path)\n            for output_file_path in output_file_paths\n        )\n\n        return reduce_function(dfs, extra)\n\n    return closure\n\n\ndef parallelize_with_memory_file_system(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForFileSystem(data_type.work)\n        wrapped_reduce_function = wrap_reduce_function_for_file_system(data_type.reduce)\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        input_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_INPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        output_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_OUTPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        try:\n            for chunk, input_file in zip(chunks, input_files):\n                with Path(input_file.name).open(\"wb\") as file_descriptor:\n                    pickle.dump(chunk, file_descriptor)\n\n            dilled_user_defined_function = dill.dumps(user_defined_function)\n            manager: SyncManager = CONTEXT.Manager()\n            master_workers_queue = manager.Queue()\n\n            work_args_list = [\n                (\n                    Path(input_file.name),\n                    Path(output_file.name),\n                    progress_bars_type,\n                    worker_index,\n                    master_workers_queue,\n                    dilled_user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                    {\n                        **work_extra,\n                        **{\n                            \"master_workers_queue\": master_workers_queue,\n                            \"show_progress_bars\": show_progress_bars,\n                            \"worker_index\": worker_index,\n                        },\n                    },\n                )\n                for worker_index, (\n                    input_file,\n                    output_file,\n                ) in enumerate(zip(input_files, output_files))\n            ]\n\n            pool = CONTEXT.Pool(nb_workers)\n            results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n\n            pool.close()\n\n            generation = count()\n\n            while any(\n                (\n                    worker_status == WorkerStatus.Running\n                    for worker_status in workers_status\n                )\n            ):\n                message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n                worker_index, worker_status, payload = message\n                workers_status[worker_index] = worker_status\n\n                if worker_status == WorkerStatus.Success:\n                    progresses[worker_index] = progresses_length[worker_index]\n                    progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Running:\n                    progress = cast(int, payload)\n                    progresses[worker_index] = progress\n\n                    if next(generation) % nb_workers == 0:\n                        progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Error:\n                    progress_bars.set_error(worker_index)\n                    progress_bars.update(progresses)\n\n            try:\n                return wrapped_reduce_function(\n                    (Path(output_file.name) for output_file in output_files),\n                    reduce_extra,\n                )\n            except EOFError:\n                # Loading the files failed, this most likely means that there\n                # was some error during processing and the files were never\n                # saved at all.\n                results_promise.get()\n\n                # If the above statement does not raise an exception, that\n                # means the multiprocessing went well and we want to re-raise\n                # the original EOFError.\n                raise\n\n        finally:\n            for output_file in output_files:\n                # When pandarallel stop supporting Python 3.7 and older, replace this\n                # try/except clause by:\n                # Path(output_file.name).unlink(missing_ok=True)\n                try:\n                    Path(output_file.name).unlink()\n                except FileNotFoundError:\n                    # Do nothing, this is the nominal case.\n                    pass\n\n    return closure\n\n\ndef parallelize_with_pipe(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForPipe(data_type.work)\n        dilled_user_defined_function = dill.dumps(user_defined_function)\n        manager: SyncManager = CONTEXT.Manager()\n        master_workers_queue = manager.Queue()\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        work_args_list = [\n            (\n                chunk,\n                progress_bars_type,\n                worker_index,\n                master_workers_queue,\n                dilled_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                {\n                    **work_extra,\n                    **{\n                        \"master_workers_queue\": master_workers_queue,\n                        \"show_progress_bars\": show_progress_bars,\n                        \"worker_index\": worker_index,\n                    },\n                },\n            )\n            for worker_index, chunk in enumerate(chunks)\n        ]\n\n        pool = CONTEXT.Pool(nb_workers)\n        results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n        pool.close()\n\n        generation = count()\n\n        while any(\n            (worker_status == WorkerStatus.Running for worker_status in workers_status)\n        ):\n            message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n            worker_index, worker_status, payload = message\n            workers_status[worker_index] = worker_status\n\n            if worker_status == WorkerStatus.Success:\n                progresses[worker_index] = progresses_length[worker_index]\n                progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Running:\n                progress = cast(int, payload)\n                progresses[worker_index] = progress\n\n                if next(generation) % nb_workers == 0:\n                    progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Error:\n                progress_bars.set_error(worker_index)\n\n        results = results_promise.get()\n\n        return data_type.reduce(results, reduce_extra)\n\n    return closure\n\n\nclass pandarallel:\n    @classmethod\n    def initialize(\n        cls,\n        shm_size_mb=None,\n        nb_workers=NB_PHYSICAL_CORES,\n        progress_bar=False,\n        verbose=2,\n        use_memory_fs: Optional[bool] = None,\n    ) -> None:\n        show_progress_bars = progress_bar\n        is_memory_fs_available = Path(MEMORY_FS_ROOT).exists()\n\n        use_memory_fs = (\n            use_memory_fs if use_memory_fs is not None else is_memory_fs_available\n        )\n\n        parallelize = (\n            parallelize_with_memory_file_system\n            if use_memory_fs\n            else parallelize_with_pipe\n        )\n\n        if use_memory_fs and not is_memory_fs_available:\n            raise SystemError(\"Memory file system is not available\")\n\n        if verbose >= 2:\n            print(f\"INFO: Pandarallel will run on {nb_workers} workers.\")\n\n            message = (\n                (\n                    \"INFO: Pandarallel will use Memory file system to transfer data \"\n                    \"between the main process and workers.\"\n                )\n                if use_memory_fs\n                else (\n                    \"INFO: Pandarallel will use standard multiprocessing data transfer \"\n                    \"(pipe) to transfer data between the main process and workers.\"\n                )\n            )\n\n            print(message)\n\n            if ON_WINDOWS and verbose >= 2:\n                print()\n                print(\n                    (\n                        \"WARNING: You are on Windows. If you detect any issue with \"\n                        \"pandarallel, be sure you checked out the Troubleshooting page:\"\n                    )\n                )\n                print(\"https://nalepae.github.io/pandarallel/troubleshooting/\")\n\n        progress_bars_in_user_defined_function = (\n            ProgressBarsType.InUserDefinedFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_user_defined_function_multiply_by_number_of_columns = (\n            ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_work_function = (\n            ProgressBarsType.InWorkFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        # DataFrame\n        pd.DataFrame.parallel_apply = parallelize(\n            nb_workers, DataFrame.Apply, progress_bars_in_user_defined_function\n        )\n        pd.DataFrame.parallel_applymap = parallelize(\n            nb_workers,\n            DataFrame.ApplyMap,\n            progress_bars_in_user_defined_function_multiply_by_number_of_columns,\n        )\n\n        # DataFrame GroupBy\n        PandaDataFrameGroupBy.parallel_apply = parallelize(\n            nb_workers, DataFrameGroupBy.Apply, progress_bars_in_user_defined_function\n        )\n\n        # Expanding GroupBy\n        PandasExpandingGroupby.parallel_apply = parallelize(\n            nb_workers, ExpandingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Rolling GroupBy\n        PandasRollingGroupby.parallel_apply = parallelize(\n            nb_workers, RollingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Series\n        pd.Series.parallel_apply = parallelize(\n            nb_workers, Series.Apply, progress_bars_in_user_defined_function\n        )\n        pd.Series.parallel_map = parallelize(nb_workers, Series.Map, show_progress_bars)\n\n        # Series Rolling\n        pd.core.window.Rolling.parallel_apply = parallelize(\n            nb_workers, SeriesRolling.Apply, progress_bars_in_user_defined_function\n        )\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
      "content": "import multiprocessing\nimport os\nimport shutil\nimport sys\nfrom abc import ABC, abstractmethod\nfrom enum import Enum\nfrom itertools import count\nfrom time import time_ns\nfrom typing import Callable, List, Union\n\nfrom .utils import WorkerStatus\n\nINTERVAL_NS = 250_000_000  # 0.25 sec\nMINIMUM_TERMINAL_WIDTH = 72\n\n\nclass ProgressBarsType(int, Enum):\n    No = 0\n    InUserDefinedFunction = 1\n    InUserDefinedFunctionMultiplyByNumberOfColumns = 2\n    InWorkFunction = 3\n\n\nclass ProgressBars(ABC):\n    @abstractmethod\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        ...\n\n    @abstractmethod\n    def update(self, values: List[int]) -> None:\n        ...\n\n    def set_error(self, index: int) -> None:\n        pass\n\n\nclass ProgressState:\n    def __init__(self, chunk_size: int) -> None:\n        self.last_put_iteration = 0\n        self.next_put_iteration = max(chunk_size // 100, 1)\n        self.last_put_time = time_ns()\n\n\ndef is_notebook_lab() -> bool:\n    try:\n        shell: str = get_ipython().__class__.__name__  # type: ignore\n\n        # Shell: Google Colab\n        # TerminalInteractiveShell: Terminal running IPython\n        # ZMQInteractiveShell: Jupyter notebook/lab or qtconsole\n        return shell in {\"Shell\", \"ZMQInteractiveShell\"}\n    except NameError:\n        # Probably standard Python interpreter\n        return False\n\n\nclass ProgressBarsConsole(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        self.__show = show\n        self.__bars = [[0, max] for max in maxs]\n        self.__width = self.__get_width()\n\n        self.__lines = self.__update_lines()\n\n        if show:\n            sys.stdout.write(\"\\n\".join(self.__lines))\n            sys.stdout.flush()\n\n    def __get_width(self) -> int:\n        try:\n            columns = shutil.get_terminal_size().columns\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except AttributeError:\n            # Python 2\n            pass\n\n        try:\n            columns = int(os.popen(\"stty size\", \"r\").read().split()[1])\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except:\n            return MINIMUM_TERMINAL_WIDTH\n\n    def __remove_displayed_lines(self) -> None:\n        if len(self.__bars) >= 1:\n            sys.stdout.write(\"\\b\" * len(self.__lines[-1]))\n\n        if len(self.__bars) >= 2:\n            sys.stdout.write(\"\\033M\" * (len(self.__lines) - 1))\n\n        self.__lines = []\n\n    def __update_line(self, done: int, total: int) -> str:\n        if total == 0:\n            percent = 0\n        else:\n            percent = done / total\n        bar = (\":\" * int(percent * 40)).ljust(40, \" \")\n        percent = round(percent * 100, 2)\n        format = \" {percent:6.2f}% {bar:s} | {done:8d} / {total:8d} |\"\n        ret = format.format(percent=percent, bar=bar, done=done, total=total)\n        return ret[: self.__width].ljust(self.__width, \" \")\n\n    def __update_lines(self) -> List[str]:\n        return [self.__update_line(value, max) for value, max in self.__bars]\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            self.__bars[index][0] = value\n\n        self.__remove_displayed_lines()\n        self.__lines = self.__update_lines()\n\n        sys.stdout.write(\"\\n\".join(self.__lines))\n        sys.stdout.flush()\n\n\nclass ProgressBarsNotebookLab(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        \"\"\"Initialization.\n        Positional argument:\n        maxs - List containing the max value of each progress bar\n        \"\"\"\n        self.__show = show\n\n        if not show:\n            return\n\n        from IPython.display import display\n        from ipywidgets import HBox, IntProgress, Label, VBox\n\n        self.__bars = [\n            HBox(\n                [\n                    IntProgress(0, 0, max, description=\"{:.2f}%\".format(0)),\n                    Label(\"{} / {}\".format(0, max)),\n                ]\n            )\n            for max in maxs\n        ]\n\n        display(VBox(self.__bars))\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            bar, label = self.__bars[index].children\n\n            label.value = \"{} / {}\".format(value, bar.max)\n            \n            bar.value = value\n\n            if value >= bar.max:\n                bar.bar_style = \"success\"\n\n            if bar.max != 0:\n                bar.description = \"{:.2f}%\".format(bar.value / bar.max * 100)\n\n    def set_error(self, index: int) -> None:\n        \"\"\"Set a bar on error\"\"\"\n        if not self.__show:\n            return\n\n        bar, _ = self.__bars[index].children\n        bar.bar_style = \"danger\"\n\n\ndef get_progress_bars(\n    maxs: List[int], show\n) -> Union[ProgressBarsNotebookLab, ProgressBarsConsole]:\n    return (\n        ProgressBarsNotebookLab(maxs, show)\n        if is_notebook_lab()\n        else ProgressBarsConsole(maxs, show)\n    )\n\n\ndef progress_wrapper(\n    user_defined_function: Callable,\n    master_workers_queue: multiprocessing.Queue,\n    index: int,\n    chunk_size: int,\n) -> Callable:\n    \"\"\"Wrap the function to apply in a function which monitor the part of work already\n    done.\n    \"\"\"\n    counter = count()\n    state = ProgressState(chunk_size)\n\n    def closure(*user_defined_function_args, **user_defined_functions_kwargs):\n        iteration = next(counter)\n\n        if iteration == state.next_put_iteration:\n            time_now = time_ns()\n            master_workers_queue.put_nowait((index, WorkerStatus.Running, iteration))\n\n            delta_t = time_now - state.last_put_time\n            delta_i = iteration - state.last_put_iteration\n\n            state.next_put_iteration += (\n                max(int((delta_i / delta_t) * INTERVAL_NS), 1) if delta_t != 0 else 1\n            )\n\n            state.last_put_iteration = iteration\n            state.last_put_time = time_now\n\n        return user_defined_function(\n            *user_defined_function_args, **user_defined_functions_kwargs\n        )\n\n    return closure\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/__init__.py",
      "content": "from .core import pandarallel\n\n__version__ = \"1.6.5\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/expanding_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass ExpandingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasExpandingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasExpandingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.expanding(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass Series:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n\n    class Map(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.map(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series_rolling.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\nfrom pandas.core.window.rolling import Rolling\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass SeriesRolling:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, rolling: Rolling, **kwargs\n        ) -> Iterator[pd.Series]:\n            chunks = chunk(rolling.obj.size, nb_workers, rolling.window)\n\n            for chunk_ in chunks:\n                yield rolling.obj[chunk_]\n\n        @staticmethod\n        def get_work_extra(data: Rolling) -> Dict[str, Any]:\n            return {\n                \"attributes\": {\n                    attribute: getattr(data, attribute)\n                    for attribute in data._attributes\n                }\n            }\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            attributes: Dict[str, Any] = extra[\"attributes\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            result = data.rolling(**attributes).apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n            return result if worker_index == 0 else result[attributes[\"window\"] :]\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/__init__.py",
      "content": "from .dataframe import DataFrame\nfrom .dataframe_groupby import DataFrameGroupBy\nfrom .expanding_groupby import ExpandingGroupBy\nfrom .rolling_groupby import RollingGroupBy\nfrom .generic import DataType\nfrom .series import Series\nfrom .series_rolling import SeriesRolling\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\nfrom types import GeneratorType\n\nimport pandas as pd\n\nfrom ..utils import chunk, get_axis_int\nfrom .generic import DataType\n\n\nclass DataFrame:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            user_defined_function_kwargs = kwargs[\"user_defined_function_kwargs\"]\n\n            axis_int = get_axis_int(user_defined_function_kwargs)\n            opposite_axis_int = 1 - axis_int\n\n            for chunk_ in chunk(data.shape[opposite_axis_int], nb_workers):\n                yield data.iloc[chunk_] if axis_int == 1 else data.iloc[:, chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs,\n            )\n\n        @staticmethod\n        def get_reduce_extra(\n            data: Any, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"axis\": get_axis_int(user_defined_function_kwargs)}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            if isinstance(datas, GeneratorType):\n                datas = list(datas)\n            axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n            return pd.concat(datas, copy=False, axis=axis)\n\n    class ApplyMap(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            for chunk_ in chunk(data.shape[0], nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.applymap(user_defined_function)\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/generic.py",
      "content": "from abc import ABC, abstractmethod\nfrom typing import Any, Callable, Dict, Iterable, Iterator\n\n\nclass DataType(ABC):\n    @staticmethod\n    @abstractmethod\n    def get_chunks(nb_workers: int, data: Any, **kwargs) -> Iterator[Any]:\n        ...\n\n    @staticmethod\n    def get_work_extra(data: Any) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def work(\n        data: Any,\n        user_defined_function: Callable,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        ...\n\n    @staticmethod\n    def get_reduce_extra(\n        data: Any, user_defined_function_kwargs: Dict[str, Any]\n    ) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def reduce(datas: Iterable[Any], extra: Dict[str, Any]) -> Any:\n        ...\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/rolling_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass RollingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasRollingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasRollingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.rolling(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe_groupby.py",
      "content": "import itertools\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple, Union, cast\n\nimport pandas as pd\nfrom pandas.core.groupby.generic import DataFrameGroupBy as PandasDataFrameGroupBy\n\nfrom ..utils import chunk, df_indexed_like, get_pandas_version\nfrom .generic import DataType\n\n\nclass DataFrameGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, dataframe_groupby: PandasDataFrameGroupBy, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            chunks = chunk(dataframe_groupby.ngroups, nb_workers)\n            iterator = iter(dataframe_groupby)\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[Tuple[int, pd.DataFrame, bool]]:\n            def compute_result(\n                key: int, df: pd.DataFrame\n            ) -> Tuple[int, pd.DataFrame, bool]:\n                result = user_defined_function(\n                    df, *user_defined_function_args, **user_defined_function_kwargs\n                )\n                mutated = not df_indexed_like(result, df.axes)\n                return key, result, mutated\n\n            return [compute_result(key, df) for key, df in data]\n\n        @staticmethod\n        def get_reduce_extra(\n            data: PandasDataFrameGroupBy, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"df_groupby\": data}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[List[Tuple[int, pd.DataFrame, bool]]], extra: Dict[str, Any]\n        ) -> pd.Series:\n            def get_args(\n                keys: List[int],\n                values: List[pd.DataFrame],\n                df_groupby: PandasDataFrameGroupBy,\n            ) -> Union[\n                Tuple[List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[pd.DataFrame]],\n            ]:\n                pandas_version = get_pandas_version()\n\n                if pandas_version < (1, 3):\n                    return keys, values\n                elif pandas_version < (1, 4):\n                    return df_groupby._selected_obj, keys, values\n                else:\n                    return df_groupby._selected_obj, values\n\n            df_groupby: PandasDataFrameGroupBy = extra[\"df_groupby\"]\n\n            results = itertools.chain.from_iterable(datas)\n            keys, values, mutated = zip(*results)\n\n            keys = cast(List[int], keys)\n            values = cast(List[pd.DataFrame], values)\n            mutated = cast(List[bool], mutated)\n\n            args = get_args(keys, values, df_groupby)\n  \n            return df_groupby._wrap_applied_output(*args, not_indexed_same=mutated)\n"
    }
  ],
  "ErrorMessage": "pandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df771688d60>, extra = {'axis': 0}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n______________________________________________________________________________ test_dataframe_axis_0_no_reduction[anonymous-1000-True-None] ______________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_0_no_reduce = <function func_dataframe_apply_axis_0_no_reduce.<locals>.<lambda> at 0x7df771661430>, df_size = 1000\n\n    def test_dataframe_axis_0_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_0_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n        res = df.apply(func_dataframe_apply_axis_0_no_reduce)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_0_no_reduce)\n\ntests/test_pandarallel.py:360: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df771600900>, extra = {'axis': 0}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n-------------------------------------------------------------------------------------------------- Captured stdout call --------------------------------------------------------------------------------------------------\n 100.00% :::::::::::::::::::::::::::::::::::::::: |     1000 /     1000 |      \n 100.00% :::::::::::::::::::::::::::::::::::::::: |     1000 /     1000 |      \n_______________________________________________________________________________ test_dataframe_axis_0_no_reduction[anonymous-1-False-None] _______________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_0_no_reduce = <function func_dataframe_apply_axis_0_no_reduce.<locals>.<lambda> at 0x7df771607c10>, df_size = 1\n\n    def test_dataframe_axis_0_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_0_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n        res = df.apply(func_dataframe_apply_axis_0_no_reduce)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_0_no_reduce)\n\ntests/test_pandarallel.py:360: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df7716004a0>, extra = {'axis': 0}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n_______________________________________________________________________________ test_dataframe_axis_0_no_reduction[anonymous-1-True-None] ________________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_0_no_reduce = <function func_dataframe_apply_axis_0_no_reduce.<locals>.<lambda> at 0x7df772f22e50>, df_size = 1\n\n    def test_dataframe_axis_0_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_0_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n        res = df.apply(func_dataframe_apply_axis_0_no_reduce)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_0_no_reduce)\n\ntests/test_pandarallel.py:360: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df7712e1660>, extra = {'axis': 0}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n-------------------------------------------------------------------------------------------------- Captured stdout call --------------------------------------------------------------------------------------------------\n 100.00% :::::::::::::::::::::::::::::::::::::::: |        1 /        1 |      \n 100.00% :::::::::::::::::::::::::::::::::::::::: |        1 /        1 |      \n_______________________________________________________________________________ test_dataframe_axis_1_no_reduction[named-1000-False-None] ________________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_1_no_reduce = <function func_dataframe_apply_axis_1_no_reduce.<locals>.func at 0x7df771661b80>, df_size = 1000\n\n    def test_dataframe_axis_1_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n    \n        res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\ntests/test_pandarallel.py:378: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df7712e15f0>, extra = {'axis': 1}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n________________________________________________________________________________ test_dataframe_axis_1_no_reduction[named-1000-True-None] ________________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_1_no_reduce = <function func_dataframe_apply_axis_1_no_reduce.<locals>.func at 0x7df771607670>, df_size = 1000\n\n    def test_dataframe_axis_1_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n    \n        res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\ntests/test_pandarallel.py:378: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df771600510>, extra = {'axis': 1}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n-------------------------------------------------------------------------------------------------- Captured stdout call --------------------------------------------------------------------------------------------------\n 100.00% :::::::::::::::::::::::::::::::::::::::: |      500 /      500 |      \n 100.00% :::::::::::::::::::::::::::::::::::::::: |      500 /      500 |      \n_________________________________________________________________________________ test_dataframe_axis_1_no_reduction[named-1-False-None] _________________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_1_no_reduce = <function func_dataframe_apply_axis_1_no_reduce.<locals>.func at 0x7df771607d30>, df_size = 1\n\n    def test_dataframe_axis_1_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n    \n        res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\ntests/test_pandarallel.py:378: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df7716003c0>, extra = {'axis': 1}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n_________________________________________________________________________________ test_dataframe_axis_1_no_reduction[named-1-True-None] __________________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_1_no_reduce = <function func_dataframe_apply_axis_1_no_reduce.<locals>.func at 0x7df771607790>, df_size = 1\n\n    def test_dataframe_axis_1_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n    \n        res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\ntests/test_pandarallel.py:378: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df771688200>, extra = {'axis': 1}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n-------------------------------------------------------------------------------------------------- Captured stdout call --------------------------------------------------------------------------------------------------\n 100.00% :::::::::::::::::::::::::::::::::::::::: |        1 /        1 |      \n_____________________________________________________________________________ test_dataframe_axis_1_no_reduction[anonymous-1000-False-None] ______________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_1_no_reduce = <function func_dataframe_apply_axis_1_no_reduce.<locals>.<lambda> at 0x7df7716de700>, df_size = 1000\n\n    def test_dataframe_axis_1_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n    \n        res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\ntests/test_pandarallel.py:378: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df7716adb30>, extra = {'axis': 1}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n______________________________________________________________________________ test_dataframe_axis_1_no_reduction[anonymous-1000-True-None] ______________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_1_no_reduce = <function func_dataframe_apply_axis_1_no_reduce.<locals>.<lambda> at 0x7df771661b80>, df_size = 1000\n\n    def test_dataframe_axis_1_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n    \n        res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\ntests/test_pandarallel.py:378: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df771620ac0>, extra = {'axis': 1}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n-------------------------------------------------------------------------------------------------- Captured stdout call --------------------------------------------------------------------------------------------------\n 100.00% :::::::::::::::::::::::::::::::::::::::: |      500 /      500 |      \n 100.00% :::::::::::::::::::::::::::::::::::::::: |      500 /      500 |      \n_______________________________________________________________________________ test_dataframe_axis_1_no_reduction[anonymous-1-False-None] _______________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_1_no_reduce = <function func_dataframe_apply_axis_1_no_reduce.<locals>.<lambda> at 0x7df772f9db80>, df_size = 1\n\n    def test_dataframe_axis_1_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n    \n        res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\ntests/test_pandarallel.py:378: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df77162a890>, extra = {'axis': 1}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n_______________________________________________________________________________ test_dataframe_axis_1_no_reduction[anonymous-1-True-None] ________________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_1_no_reduce = <function func_dataframe_apply_axis_1_no_reduce.<locals>.<lambda> at 0x7df7716b50d0>, df_size = 1\n\n    def test_dataframe_axis_1_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n    \n        res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\ntests/test_pandarallel.py:378: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df771644900>, extra = {'axis': 1}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n-------------------------------------------------------------------------------------------------- Captured stdout call --------------------------------------------------------------------------------------------------\n 100.00% :::::::::::::::::::::::::::::::::::::::: |        1 /        1 |      \n==================================================================================================== warnings summary ====================================================================================================\ntests/test_pandarallel.py: 16 warnings\n  /home/user/Documents/repoben/buggycode/nalepae_pandarallel/tests/test_pandarallel.py:235: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n    res = df.applymap(func_dataframe_applymap)\n\n-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html\n================================================================================================ short test summary info =================================================================================================\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[RuntimeError-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[RuntimeError-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[AttributeError-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[AttributeError-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[ZeroDivisionError-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[ZeroDivisionError-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_0[named-1000-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_0[named-1000-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_0[named-1-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_0[named-1-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_0[anonymous-1000-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_0[anonymous-1000-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_0[anonymous-1-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_0[anonymous-1-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_1[named-1000-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_1[named-1000-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_1[named-1-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_1[named-1-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_1[anonymous-1000-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_1[anonymous-1000-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_1[anonymous-1-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_1[anonymous-1-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_empty_dataframe_apply_axis_0[named-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_empty_dataframe_apply_axis_0[named-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_empty_dataframe_apply_axis_0[anonymous-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_empty_dataframe_apply_axis_0[anonymous-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_empty_dataframe_apply_axis_1[named-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_empty_dataframe_apply_axis_1[named-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_empty_dataframe_apply_axis_1[anonymous-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_empty_dataframe_apply_axis_1[anonymous-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_0_no_reduction[named-1000-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_0_no_reduction[named-1000-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_0_no_reduction[named-1-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_0_no_reduction[named-1-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_0_no_reduction[anonymous-1000-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_0_no_reduction[anonymous-1000-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_0_no_reduction[anonymous-1-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_0_no_reduction[anonymous-1-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1000-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1000-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1000-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1000-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1-True-None] - TypeError: 'generator' object is not subscriptable\n====================================================================================== 46 failed, 171 passed, 16 warnings in 10.16s ======================================================================================",
  "Patch": "--- a/nalepae_pandarallel/pandarallel/data_types/dataframe.py\n+++ b/nalepae_pandarallel/pandarallel/data_types/dataframe.py\n@@ -45,6 +45,8 @@\n         def reduce(\n             datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n         ) -> pd.DataFrame:\n+            if isinstance(datas, GeneratorType):\n+                datas = list(datas)\n             axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n             return pd.concat(datas, copy=False, axis=axis)\n \n",
  "BuggyCodeLocation": [
    {
      "file": "nalepae_pandarallel/pandarallel/data_types/dataframe.py",
      "function": null,
      "content_all": {
        "49": "            return pd.concat(datas, copy=False, axis=axis)\n",
        "50": "\n"
      },
      "content_change": {}
    }
  ],
  "Issue": {
    "title": "DataFrame Parallel Apply Fails with GeneratorType Input",
    "description": "When using the `parallel_apply` method on a DataFrame, if the input data is of type `GeneratorType`, the operation fails. This is because the current implementation does not handle `GeneratorType` inputs correctly and attempts to use it without converting it into a list, causing unexpected behaviors or errors. To overcome this limitation, the `parallel_apply` method should first convert `GeneratorType` inputs into a list before proceeding with further operations.\n\nSteps to Reproduce:\n1. Create a DataFrame and prepare a generator that produces constituent DataFrames.\n2. Attempt to use the `parallel_apply` method on this DataFrame with the generator input.\n\nExpected Behavior:\n- The `parallel_apply` method should correctly handle `GeneratorType` inputs by converting them into a list and then performing the apply operation as expected.\n\nActual Behavior:\n- The method fails or misbehaves due to improper handling of `GeneratorType` inputs.\n\nThis issue affects parallel processing capabilities and needs to be addressed to ensure robust handling of various input types for parallel operations.",
    "explanation": "### Summary of the Issue\n\nThe issue reported revolves around the `parallel_apply` method of the `DataFrame` class, specifically when the input data is of type `GeneratorType`. The `parallel_apply` method is intended to apply a function in parallel to a DataFrame to take advantage of multiple cores, but it fails or misbehaves if the input is a generator. This failure occurs because the implementation attempts to manipulate the generator directly instead of converting it into a list, leading to unexpected behaviors or errors.\n\n### Details of the Commit\n\nThe commit introduces a fix to handle `GeneratorType` inputs properly in the `reduce` function within the `dataframe.py` file. The core change made in the commit can be broadly summarized as follows:\n\n1. **Conversion of GeneratorType to List**: \n   - The commit checks if the input data (`datas`) is of type `GeneratorType`.\n   - If the input data is identified as a generator, it converts the generator into a list before proceeding with further operations.\n\n### Explanation of How the Commit Solves the Issue\n\n#### Cause of the Issue:\n- The `parallel_apply` method needs to manage and process chunks of data efficiently. Generators, unlike lists, do not support multiple single passes, implying that once a generator is consumed, it cannot be reused unless explicitly converted to a list.\n- Failure to convert a generator into a list causes errors or unpredictable behavior when trying to apply parallel operations on DataFrame as the underlying generator gets exhausted after initial usage.\n\n#### How the Commit Fixes It:\n- By converting `GeneratorType` inputs into a list, it ensures that all operations can be performed on the data without the risk of exhausting the generator. This also allows the method to handle multiple operations consistently, as lists can be iterated over multiple times, unlike generators.\n- The immediate conversion of the generator to a list in the `reduce` function means that the parallel apply method reliably processes the data, avoiding the pitfalls associated with single-use iterators (generators).\n\n#### Solution to the Issue:\n- With the fix, when `parallel_apply` encounters a `GeneratorType` input, it seamlessly converts the generator to a list.\n- This conversion ensures that subsequent operations on the data within the `parallel_apply` method function as expected, thereby maintaining stability and reliability.\n- The fix effectively increases the robustness of the `parallel_apply` method, ensuring it can now accommodate generator inputs without failure, making the parallel processing of DataFrames more versatile and dependable.\n\n### Summary\nIn essence, the commit addresses a critical limitation in how the `parallel_apply` method deals with generator inputs by adding a necessary conversion step. This small but significant change ensures the method can safely and correctly handle generators, making it more robust and versatile in its parallel processing capabilities. This fix is essential for ensuring consistency and predictability when applying functions in parallel across DataFrame data."
  },
  "Explain": "### Summary of the Issue\n\nThe issue reported revolves around the `parallel_apply` method of the `DataFrame` class, specifically when the input data is of type `GeneratorType`. The `parallel_apply` method is intended to apply a function in parallel to a DataFrame to take advantage of multiple cores, but it fails or misbehaves if the input is a generator. This failure occurs because the implementation attempts to manipulate the generator directly instead of converting it into a list, leading to unexpected behaviors or errors.\n\n### Details of the Commit\n\nThe commit introduces a fix to handle `GeneratorType` inputs properly in the `reduce` function within the `dataframe.py` file. The core change made in the commit can be broadly summarized as follows:\n\n1. **Conversion of GeneratorType to List**: \n   - The commit checks if the input data (`datas`) is of type `GeneratorType`.\n   - If the input data is identified as a generator, it converts the generator into a list before proceeding with further operations.\n\n### Explanation of How the Commit Solves the Issue\n\n#### Cause of the Issue:\n- The `parallel_apply` method needs to manage and process chunks of data efficiently. Generators, unlike lists, do not support multiple single passes, implying that once a generator is consumed, it cannot be reused unless explicitly converted to a list.\n- Failure to convert a generator into a list causes errors or unpredictable behavior when trying to apply parallel operations on DataFrame as the underlying generator gets exhausted after initial usage.\n\n#### How the Commit Fixes It:\n- By converting `GeneratorType` inputs into a list, it ensures that all operations can be performed on the data without the risk of exhausting the generator. This also allows the method to handle multiple operations consistently, as lists can be iterated over multiple times, unlike generators.\n- The immediate conversion of the generator to a list in the `reduce` function means that the parallel apply method reliably processes the data, avoiding the pitfalls associated with single-use iterators (generators).\n\n#### Solution to the Issue:\n- With the fix, when `parallel_apply` encounters a `GeneratorType` input, it seamlessly converts the generator to a list.\n- This conversion ensures that subsequent operations on the data within the `parallel_apply` method function as expected, thereby maintaining stability and reliability.\n- The fix effectively increases the robustness of the `parallel_apply` method, ensuring it can now accommodate generator inputs without failure, making the parallel processing of DataFrames more versatile and dependable.\n\n### Summary\nIn essence, the commit addresses a critical limitation in how the `parallel_apply` method deals with generator inputs by adding a necessary conversion step. This small but significant change ensures the method can safely and correctly handle generators, making it more robust and versatile in its parallel processing capabilities. This fix is essential for ensuring consistency and predictability when applying functions in parallel across DataFrame data.",
  "Source": "Human",
  "Token": 1353,
  "Command": [
    "pytest tests"
  ],
  "FilteredCode": [
    {
      "path": "nalepae_pandarallel/tests/test_pandarallel.py",
      "content": "1 import importlib\n2 import math\n3 \n4 import numpy as np\n5 import pandas as pd\n6 import pytest\n7 from pandarallel import pandarallel\n8 \n9 \n10 @pytest.fixture(params=(1000, 1))\n11 def df_size(request):\n12     return request.param\n13 \n14 \n15 @pytest.fixture(params=(False, True))\n16 def progress_bar(request):\n17     return request.param\n18 \n19 \n20 @pytest.fixture(params=(None, False))\n21 def use_memory_fs(request):\n22     return request.param\n23 \n24 \n25 @pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\n26 def exception(request):\n27     return request.param\n28 \n29 \n30 @pytest.fixture(params=(\"named\", \"anonymous\"))\n31 def func_dataframe_apply_axis_0(request):\n32     def func(x):\n33         return max(x) - min(x)\n34 \n35     return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n36 \n37 \n38 @pytest.fixture(params=(\"named\", \"anonymous\"))\n39 def func_dataframe_apply_axis_1(request):\n40     def func(x):\n41         return math.sin(x.a**2) + math.sin(x.b**2)\n42 \n43     return dict(\n44         named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n45     )[request.param]\n46 \n47 \n48 @pytest.fixture(params=(\"named\", \"anonymous\"))\n49 def func_dataframe_applymap(request):\n50     def func(x):\n51         return math.sin(x**2) - math.cos(x**2)\n52 \n53     return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n54         request.param\n55     ]\n56 \n57 \n58 @pytest.fixture(params=(\"named\", \"anonymous\"))\n59 def func_series_map(request):\n60     def func(x):\n61         return math.log10(math.sqrt(math.exp(x**2)))\n62 \n63     return dict(\n64         named=func, anonymous=lambda x: math.(...truncated)"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe.py",
      "content": "1 from typing import Any, Callable, Dict, Iterable, Iterator\n2 from types import GeneratorType\n3 \n4 import pandas as pd\n5 \n6 from ..utils import chunk, get_axis_int\n7 from .generic import DataType\n8 \n9 \n10 class DataFrame:\n11     class Apply(DataType):\n12         @staticmethod\n13         def get_chunks(\n14             nb_workers: int, data: pd.DataFrame, **kwargs\n15         ) -> Iterator[pd.DataFrame]:\n16             user_defined_function_kwargs = kwargs[\"user_defined_function_kwargs\"]\n17 \n18             axis_int = get_axis_int(user_defined_function_kwargs)\n19             opposite_axis_int = 1 - axis_int\n20 \n21             for chunk_ in chunk(data.shape[opposite_axis_int], nb_workers):\n22                 yield data.iloc[chunk_] if axis_int == 1 else data.iloc[:, chunk_]\n23 \n24         @staticmethod\n25         def work(\n26             data: pd.DataFrame,\n27             user_defined_function: Callable,\n28             user_defined_function_args: tuple,\n29             user_defined_function_kwargs: Dict[str, Any],\n30             extra: Dict[str, Any],\n31         ) -> pd.DataFrame:\n32             return data.apply(\n33                 user_defined_function,\n34                 *user_defined_function_args,\n35                 **user_defined_function_kwargs,\n36             )\n37 \n38         @staticmethod\n39         def get_reduce_extra(\n40             data: Any, user_defined_function_kwargs: Dict[str, Any]\n41         ) -> Dict[str, Any]:\n42             return {\"axis\": get_axis_int(user_defined_function_kwargs)}\n43 \n44         @staticmethod\n45         def reduce(\n46             datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n47         ) -> pd.DataFrame:\n48             axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n49             return pd.concat(datas, copy=False, axis=axis)\n50 \n51     class ApplyMap(DataType):\n52         @staticmethod\n53         def get_chunks(\n54             nb_workers: int, data: pd.DataFrame, **kwargs\n55         ) -> Iterator[pd.DataFrame]:\n56             for chunk_ in chunk(data.shape[0], nb_workers):\n57                 yield data.iloc[chunk_]\n58 \n59         @staticmethod\n60         def work(\n61             data: pd.DataFrame,\n62             user_defined_function: Callable,\n63             user_defined_function_args: tuple,\n64             user_defined_function_kwargs: Dict[str, Any],\n65             extra: Dict[str, Any],\n66         ) -> pd.DataFrame:\n67             return data.applymap(user_defined_function)\n68 \n69         @staticmethod\n70         def reduce(\n71             datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n72         ) -> pd.DataFrame:\n73             return pd.concat(datas, copy=False)"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/core.py",
      "content": "1 import multiprocessing\n2 import os\n3 import pickle\n4 from itertools import count\n5 from multiprocessing.managers import SyncManager\n6 from pathlib import Path\n7 from tempfile import NamedTemporaryFile\n8 from typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, cast\n9 \n10 import dill\n11 import pandas as pd\n12 import psutil\n13 from pandas.core.groupby import DataFrameGroupBy as PandaDataFrameGroupBy\n14 from pandas.core.window.(...truncated)"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
      "content": "1 import multiprocessing\n2 import os\n3 import shutil\n4 import sys\n5 from abc import ABC, abstractmethod\n6 from enum import Enum\n7 from itertools import count\n8 from time import time_ns\n9 from typing import Callable, List, Union\n10 \n11 from .utils import WorkerStatus\n12 \n13 INTERVAL_NS = 250_000_000  # 0.25 sec\n14 MINIMUM_TERMINAL_W(...truncated)"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/utils.py",
      "content": "1 import itertools\n2 from enum import Enum\n3 from typing import Any, Dict, List, Tuple\n4 \n5 import pandas as pd\n6 from pandas import DataFrame, Index\n7 \n8 \n9 def chunk(nb_item: int, nb_chunks: int, start_offset=0(...truncated)"
    }
  ],
  "TokenAll": 14225,
  "FilteredLevel": 1500,
  "Results": {
    "model": "GPT-4o",
    "Difficulty": "Easy",
    "issue_origin": {
      "title": "Incomplete lambda function in test function",
      "description": "In nalepae_pandarallel/tests/test_pandarallel.py, the lambda function for func_series_map appears to be truncated and incomplete. Specifically, the function at line 64 is not properly closed and lacks a coherent implementation. Please ensure that the lambda function is fully written and correct any potential syntax or logical errors.",
      "explanation": "### Issue Summary:\n\nThe issue reported is about an incomplete lambda function in the test file `nalepae_pandarallel/tests/test_pandarallel.py` at line 64. Specifically, the lambda function inside the `func_series_map` fixture seems truncated and does not properly close or complete its logical structure. This issue can potentially cause test failures or misbehavior due to the malformed lambda expression.\n\n### Content of the Commit:\n\nLet's break down the provided code snippet to understand the context and how a commit could address this issue.\n\nCurrent code snippet for the `func_series_map` fixture (potentially problematic part is highlighted):\n```python\n59 @pytest.fixture(params=(\"named\", \"anonymous\"))\n60 def func_series_map(request):\n61     def func(x):\n62         return math.log10(math.sqrt(math.exp(x**2)))\n63 \n64     return dict(\n65         named=func, anonymous=lambda x: math.(...truncated)\n```\n\n### Detailed Explanation of the Issue:\n\n1. **Incomplete Lambda Function**: \n   At line 64, the lambda function intended for the 'anonymous' key in the dictionary does not have a complete implementation. The function abruptly ends (`lambda x: math.`) indicating truncation. This not only causes a SyntaxError but also means the function logic for the 'anonymous' case is not implemented.\n\n### hypothesized fix in a commit:\nThe correct lambda function must replicate the functionality of the `named` function, which is `return math.log10(math.sqrt(math.exp(x**2)))`. So the corrected lambda function should look like this:\n```python\n65        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n```\n\n### How the Commit Solves the Issue:\n\nThe commit with the fix should restore a complete and logically consistent lambda function for the 'anonymous' key.\n\n```diff\n59 @pytest.fixture(params=(\"named\", \"anonymous\"))\n60 def func_series_map(request):\n61     def func(x):\n62         return math.log10(math.sqrt(math.exp(x**2)))\n63 \n64     return dict(\n65         named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2))))\n```\n\n### Detailed Explanation of How the Fix Solves the Issue:\n\n1. **Syntax Correction**:\n    - By completing the lambda function, the commit fixes the syntax error. It ensures the lambda function is syntactically correct and executable.\n    \n2. **Functional Consistency**:\n    - The anonymous lambda function (`lambda x: math.log10(math.sqrt(math.exp(x**2)))`) now performs the exact same calculation as the named function `func`. This guarantees that both versions of the function (`named` and `anonymous`) are functionally equivalent.\n   \n3. **Test Reliability**:\n    - With this correction, the `func_series_map` fixture now provides a valid and complete function for both named and anonymous cases. This ensures that the tests relying on this fixture will run correctly and can reliably test the corresponding functionality without encountering errors.\n\n4. **Code Clarity and Maintenance**:\n    - Completing the lambda expression improves the readability and maintainability of the code. Future developers will not be confused by an incomplete lambda, making it easier to understand and extend the test cases if necessary.\n\n### Conclusion:\nBy completing the lambda function, the commit effectively resolves the core issue of an incomplete and non-functional lambda expression. This ensures consistent functionality between named and anonymous implementations within the `func_series_map` fixture, thereby maintaining the reliability and accuracy of the test cases in `nalepae_pandarallel/tests/test_pandarallel.py`."
    },
    "issue_message": {
      "title": "Generator object is not subscriptable",
      "description": "The code attempts to subscript a 'generator' object in the 'reduce' method of the 'Apply' class in 'pandarallel/data_types/dataframe.py' at line 48. Generators in Python do not support subscripting. To fix this issue, consider converting the generator to a list or another collection type that supports subscripting.",
      "explanation": "### Summary of the Issue\n\nThe error message indicates that the code is attempting to subscript (i.e., use index access) a generator object. This occurs in the `reduce` method of the `Apply` class in the file `pandarallel/data_types/dataframe.py` at line 48. Generators in Python do not support subscripting, leading to a `TypeError`.\n\n### Analysis of the Commit and Solution\n\nBased on the context provided and the provided error message and stack trace, the solution requires modifying the `reduce` method to properly handle the generator object. Here is the pertinent portion of the code:\n```python\n@staticmethod\ndef reduce(\n    datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n) -> pd.DataFrame:\n    axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n    return pd.concat(datas, copy=False, axis=axis)\n```\nThe error occurs because `datas` is an `Iterable`, and in this case, it is a generator that cannot be accessed using the indexing syntax `datas[0]`.\n\n### Explanation of the Solution\n\nTo resolve this issue, the generator needs to be converted into a list or another subscriptable collection before attempting to access its elements using indexing. This can be done by converting the generator to a list.\n\n#### Content of the commit\n\nThe commit would modify the `reduce` method to convert the generator to a list before subscripting:\n```python\n@staticmethod\ndef reduce(\n    datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n) -> pd.DataFrame:\n    datas = list(datas)  # Convert generator to list\n    axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n    return pd.concat(datas, copy=False, axis=axis)\n```\n### How the Commit Solves the Issue\n\n1. **Conversion to List**:\n    ```python\n    datas = list(datas)\n    ```\n    This line converts the `datas` iterable to a list. Since lists are subscriptable, this allows accessing the first element using `datas[0]`.\n\n2. **Indexing and Conditional Check**:\n    ```python\n    axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n    ```\n    The code now safely checks the first element of `datas` to determine the axis based on its type.\n\n3. **Concatenation**:\n    ```python\n    return pd.concat(datas, copy=False, axis=axis)\n    ```\n    Finally, the list of dataframes (or series) is concatenated along the specified axis.\n\n### Benefits of the Commit\n\n- **The solution resolves the TypeError** by ensuring that subscripting is performed on a list rather than a generator.\n- **The functionality of the method is retained** while fixing the error, ensuring that the `reduce` method continues to concatenate the data along the correct axis.\n- **Improvement in readability and maintainability**: Converting the generator to a list at the start and then processing it adds clarity to what type of data structure is being worked with, reducing future errors related to type handling.\n\nBy converting the generator to a list, the code now handles the `datas` parameter correctly and avoids the `TypeError` caused by subscripting a generator, leading to successful execution of the `reduce` method."
    },
    "issue_ground": {
      "title": "DataFrame Parallel Apply Fails with GeneratorType Input",
      "description": "When using the `parallel_apply` method on a DataFrame, if the input data is of type `GeneratorType`, the operation fails. This is because the current implementation does not handle `GeneratorType` inputs correctly and attempts to use it without converting it into a list, causing unexpected behaviors or errors. To overcome this limitation, the `parallel_apply` method should first convert `GeneratorType` inputs into a list before proceeding with further operations.\n\nSteps to Reproduce:\n1. Create a DataFrame and prepare a generator that produces constituent DataFrames.\n2. Attempt to use the `parallel_apply` method on this DataFrame with the generator input.\n\nExpected Behavior:\n- The `parallel_apply` method should correctly handle `GeneratorType` inputs by converting them into a list and then performing the apply operation as expected.\n\nActual Behavior:\n- The method fails or misbehaves due to improper handling of `GeneratorType` inputs.\n\nThis issue affects parallel processing capabilities and needs to be addressed to ensure robust handling of various input types for parallel operations.",
      "explanation": "### Issue Summary\n\n**Title:** DataFrame Parallel Apply Fails with GeneratorType Input\n\n**Description:** When using the `parallel_apply` method on a DataFrame in the `pandarallel` library, the operation fails if the input is of type `GeneratorType`. The current implementation does not handle `GeneratorType` inputs correctly, attempting to use them without converting to a list, causing errors or unexpected behavior.\n\n**Steps to Reproduce:**\n1. Create a DataFrame and prepare a generator producing DataFrames.\n2. Use the `parallel_apply` method on the DataFrame with the generator input.\n\n**Expected Behavior:** The `parallel_apply` method should convert `GeneratorType` inputs to a list before further operations.\n\n**Actual Behavior:** The method fails due to improper handling of `GeneratorType` inputs.\n\n### Specific Code Information Relevant to the Issue:\n\nThe relevant code segments that handle the `apply` functionality are found in `dataframe.py`:\n\n```python\nclass Apply(DataType):\n    ...\n    @staticmethod\n    def get_chunks(nb_workers: int, data: pd.DataFrame, **kwargs) -> Iterator[pd.DataFrame]:\n        ...\n        for chunk_ in chunk(data.shape[opposite_axis_int], nb_workers):\n            yield data.iloc[chunk_] if axis_int == 1 else data.iloc[:, chunk_]\n\n    @staticmethod\n    def work(data: pd.DataFrame, user_defined_function: Callable, user_defined_function_args: tuple, user_defined_function_kwargs: Dict[str, Any], extra: Dict[str, Any]) -> pd.DataFrame:\n        return data.apply(user_defined_function, *user_defined_function_args, **user_defined_function_kwargs)\n\n    @staticmethod\n    def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.DataFrame:\n        axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n        return pd.concat(list(datas), copy=False, axis=axis)\n```\n\nThe critical method here is the `reduce` method, which is where the error occurs:\n\n```python\n@staticmethod\ndef reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.DataFrame:\n    axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n    return pd.concat(datas, copy=False, axis=axis)\n```\n\n### Error Message Summary\n\nTypeError occurred in `reduce` method:\n\n```\nTypeError: 'generator' object is not subscriptable\n```\n\n### Analysis and Identified Cause\n\nThe core issue lies in the `reduce` method's handling of `datas`. When `datas` is a generator (which it is in this context), trying to use subscript notation (`datas[0]`) results in a `TypeError` because generators do not support indexing.\n\n### Solution: Converting Generator to List\n\nTo fix this issue, convert the generator to a list before performing any operations on `datas`. This ensures that the data can be accessed via indexing:\n\n**Commit Content:**\n```python\n@staticmethod\ndef reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.DataFrame:\n    datas = list(datas)  # Add this line to convert generator to list\n    axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n    return pd.concat(datas, copy=False, axis=axis)\n```\n\n**Explanation:**\n1. **Conversion to List:** `datas = list(datas)` converts the generator to a list, allowing subsequent subscript operations.\n2. **Access via Subscript:** This conversion enables accessing `datas[0]` to check if it is a `pd.Series` or not, which informs the `axis` setting.\n3. **Concatenation:** The `pd.concat` function can then operate correctly on a list of DataFrames.\n\nBy converting the generator to a list, all methods that expect list-like behavior from `datas` can now function correctly, preventing the `TypeError`.\n\n### Testing the Solution\n\nAfter the fix, the `parallel_apply` function should be tested with scenarios where the input data is of `GeneratorType` to ensure the issue is resolved.\n\n### Conclusion:\n\nThis detailed explanation covers the issue's root cause, the solution with specific code changes, and how the fix addresses the problem, ensuring robust handling of `GeneratorType` inputs in the `parallel_apply` method."
    },
    "issue_ground_truth": {
      "title": "DataFrame Parallel Apply Fails with GeneratorType Input",
      "description": "When using the `parallel_apply` method on a DataFrame, if the input data is of type `GeneratorType`, the operation fails. This is because the current implementation does not handle `GeneratorType` inputs correctly and attempts to use it without converting it into a list, causing unexpected behaviors or errors. To overcome this limitation, the `parallel_apply` method should first convert `GeneratorType` inputs into a list before proceeding with further operations.\n\nSteps to Reproduce:\n1. Create a DataFrame and prepare a generator that produces constituent DataFrames.\n2. Attempt to use the `parallel_apply` method on this DataFrame with the generator input.\n\nExpected Behavior:\n- The `parallel_apply` method should correctly handle `GeneratorType` inputs by converting them into a list and then performing the apply operation as expected.\n\nActual Behavior:\n- The method fails or misbehaves due to improper handling of `GeneratorType` inputs.\n\nThis issue affects parallel processing capabilities and needs to be addressed to ensure robust handling of various input types for parallel operations.",
      "explanation": "### Summary of the Issue\n\nThe issue reported revolves around the `parallel_apply` method of the `DataFrame` class, specifically when the input data is of type `GeneratorType`. The `parallel_apply` method is intended to apply a function in parallel to a DataFrame to take advantage of multiple cores, but it fails or misbehaves if the input is a generator. This failure occurs because the implementation attempts to manipulate the generator directly instead of converting it into a list, leading to unexpected behaviors or errors.\n\n### Details of the Commit\n\nThe commit introduces a fix to handle `GeneratorType` inputs properly in the `reduce` function within the `dataframe.py` file. The core change made in the commit can be broadly summarized as follows:\n\n1. **Conversion of GeneratorType to List**: \n   - The commit checks if the input data (`datas`) is of type `GeneratorType`.\n   - If the input data is identified as a generator, it converts the generator into a list before proceeding with further operations.\n\n### Explanation of How the Commit Solves the Issue\n\n#### Cause of the Issue:\n- The `parallel_apply` method needs to manage and process chunks of data efficiently. Generators, unlike lists, do not support multiple single passes, implying that once a generator is consumed, it cannot be reused unless explicitly converted to a list.\n- Failure to convert a generator into a list causes errors or unpredictable behavior when trying to apply parallel operations on DataFrame as the underlying generator gets exhausted after initial usage.\n\n#### How the Commit Fixes It:\n- By converting `GeneratorType` inputs into a list, it ensures that all operations can be performed on the data without the risk of exhausting the generator. This also allows the method to handle multiple operations consistently, as lists can be iterated over multiple times, unlike generators.\n- The immediate conversion of the generator to a list in the `reduce` function means that the parallel apply method reliably processes the data, avoiding the pitfalls associated with single-use iterators (generators).\n\n#### Solution to the Issue:\n- With the fix, when `parallel_apply` encounters a `GeneratorType` input, it seamlessly converts the generator to a list.\n- This conversion ensures that subsequent operations on the data within the `parallel_apply` method function as expected, thereby maintaining stability and reliability.\n- The fix effectively increases the robustness of the `parallel_apply` method, ensuring it can now accommodate generator inputs without failure, making the parallel processing of DataFrames more versatile and dependable.\n\n### Summary\nIn essence, the commit addresses a critical limitation in how the `parallel_apply` method deals with generator inputs by adding a necessary conversion step. This small but significant change ensures the method can safely and correctly handle generators, making it more robust and versatile in its parallel processing capabilities. This fix is essential for ensuring consistency and predictability when applying functions in parallel across DataFrame data."
    },
    "location_origin": [
      {
        "file": "nalepae_pandarallel/tests/test_pandarallel.py",
        "function": {
          "60": "func_series_map"
        },
        "content_all": {
          "57": "\n",
          "58": "@pytest.fixture(params=(\"named\", \"anonymous\"))\n",
          "59": "def func_series_map(request):\n",
          "60": "    def func(x):\n",
          "61": "        return math.log10(math.sqrt(math.exp(x**2)))\n",
          "62": "\n",
          "63": "    return dict(\n",
          "64": "        named=func, anonymous=lambda x: math.(...truncated)\n",
          "65": "\n",
          "66": "# Additional fixtures or code below...\n"
        },
        "content_change": {
          "64": "        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n"
        }
      }
    ],
    "location_message": [
      {
        "file": "nalepae_pandarallel/pandarallel/data_types/dataframe.py",
        "function": {
          "45": "reduce"
        },
        "content_all": {
          "42": "        ) -> pd.DataFrame:",
          "43": "            if not isinstance(datas, list):",
          "44": "                datas = list(datas)",
          "45": "            axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]",
          "46": "            return pd.concat(datas, copy=False, axis=axis)",
          "47": "        @staticmethod",
          "48": "        def work("
        },
        "content_change": {
          "43": "            if not isinstance(datas, list):",
          "44": "                datas = list(datas)"
        }
      }
    ],
    "location_ground": [
      {
        "file": "nalepae_pandarallel/pandarallel/data_types/dataframe.py",
        "function": {
          "44": "reduce"
        },
        "content_all": {
          "43": "        @staticmethod\n",
          "44": "        def reduce(\n",
          "45": "            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n",
          "46": "        ) -> pd.DataFrame:\n",
          "47": "            axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n",
          "48": "            return pd.concat(datas, copy=False, axis=axis)\n",
          "49": "\n"
        },
        "content_change": {
          "47": "            datas = list(datas)  # Convert generator to list\n",
          "48": "            axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n"
        }
      }
    ],
    "location_ground_exp": [
      {
        "file": "nalepae_pandarallel/pandarallel/data_types/dataframe.py",
        "function": {
          "44": "reduce"
        },
        "content_all": {
          "41": "        @staticmethod\n",
          "42": "        def reduce(\n",
          "43": "            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n",
          "44": "        ) -> pd.DataFrame:\n",
          "45": "            axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n",
          "46": "            return pd.concat(datas, copy=False, axis=axis)\n",
          "47": "\n"
        },
        "content_change": {
          "45": "            if isinstance(datas, GeneratorType):\n",
          "46": "                datas = list(datas)\n"
        }
      }
    ],
    "location_ground_truth": [
      {
        "file": "nalepae_pandarallel/pandarallel/data_types/dataframe.py",
        "function": null,
        "content_all": {
          "49": "            return pd.concat(datas, copy=False, axis=axis)\n",
          "50": "\n"
        },
        "content_change": {}
      }
    ],
    "patch_i": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -61,4 +61,5 @@ def func_series_map(request):\n     def func(x):\n         return math.log10(math.sqrt(math.exp(x**2)))\n \n     return dict(\n         named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n     )[request.param]\n",
    "patch_im": "\n--- a/nalepae_pandarallel/pandarallel/data_types/dataframe.py\n+++ b/nalepae_pandarallel/pandarallel/data_types/dataframe.py\n@@ -45,10 +45,12 @@\n         @staticmethod\n         def reduce(\n             datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n         ) -> pd.DataFrame:\n+            datas = list(datas)  # Convert generator to list\n             axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n             return pd.concat(datas, copy=False, axis=axis)\n",
    "patch_il": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -64,7 +64,7 @@ def func_series_map(request):\n \n     return dict(\n         named=func, \n-        anonymous=lambda x: math.(...truncated)\n+        anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n     )[request.param]\n\n # Additional fixtures or code below...\n",
    "patch_iml": "\n--- a/nalepae_pandarallel/pandarparallel/data_types/dataframe.py\n+++ b/nalepae_pandarallel/pandarparallel/data_types/dataframe.py\n@@ -45,7 +45,9 @@ def reduce(\n         datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n     ) -> pd.DataFrame:\n+        if not isinstance(datas, list):\n+            datas = list(datas)\n         axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n         return pd.concat(datas, copy=False, axis=axis)\n",
    "patch_ground": "\n--- a/nalepae_pandarallel/pandarallel/data_types/dataframe.py\n+++ b/nalepae_pandarallel/pandarallel/data_types/dataframe.py\n@@ -46,8 +46,10 @@\n     def reduce(\n         datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n     ) -> pd.DataFrame:\n+        datas = list(datas)  # Convert generator to list\n         axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n         return pd.concat(datas, copy=False, axis=axis)\n \n     class ApplyMap(DataType):\n         @staticmethod\n         def get_chunks(\n",
    "patch_ground_location": "\n--- a/nalepae_pandarallel/pandarallel/data_types/dataframe.py\n+++ b/nalepae_pandarallel/pandarallel/data_types/dataframe.py\n@@ -44,8 +44,10 @@\n         @staticmethod\n         def reduce(\n             datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n         ) -> pd.DataFrame:\n+            datas = list(datas)  # Convert generator to list\n             axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n             return pd.concat(datas, copy=False, axis=axis)\n \n     class ApplyMap(DataType):\n         @staticmethod\n",
    "patch_ground_exp": "\n--- a/nalepae_pandarallel/pandarallel/data_types/dataframe.py\n+++ b/nalepae_pandarallel/pandarallel/data_types/dataframe.py\n@@ -44,6 +44,9 @@\n         ) -> pd.DataFrame:\n             axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n \n+            if isinstance(datas, GeneratorType):\n+                datas = list(datas)\n+\n             return pd.concat(datas, copy=False, axis=axis)\n \n     class ApplyMap(DataType):\n",
    "patch_ground_all": "\n--- a/nalepae_pandarallel/pandarallel/data_types/dataframe.py\n+++ b/nalepae_pandarallel/pandarallel/data_types/dataframe.py\n@@ -45,7 +45,12 @@\n         def reduce(\n             datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n         ) -> pd.DataFrame:\n+            # Check if input is of type GeneratorType and convert to list\n+            if isinstance(datas, GeneratorType):\n+                datas = list(datas)\n+                \n             axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n             return pd.concat(datas, copy=False, axis=axis)\n \n",
    "patch_ground_truth": "--- a/nalepae_pandarallel/pandarallel/data_types/dataframe.py\n+++ b/nalepae_pandarallel/pandarallel/data_types/dataframe.py\n@@ -45,6 +45,8 @@\n         def reduce(\n             datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n         ) -> pd.DataFrame:\n+            if isinstance(datas, GeneratorType):\n+                datas = list(datas)\n             axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n             return pd.concat(datas, copy=False, axis=axis)\n \n",
    "message": "pandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df771688d60>, extra = {'axis': 0}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n______________________________________________________________________________ test_dataframe_axis_0_no_reduction[anonymous-1000-True-None] ______________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_0_no_reduce = <function func_dataframe_apply_axis_0_no_reduce.<locals>.<lambda> at 0x7df771661430>, df_size = 1000\n\n    def test_dataframe_axis_0_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_0_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n        res = df.apply(func_dataframe_apply_axis_0_no_reduce)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_0_no_reduce)\n\ntests/test_pandarallel.py:360: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df771600900>, extra = {'axis': 0}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n-------------------------------------------------------------------------------------------------- Captured stdout call --------------------------------------------------------------------------------------------------\n 100.00% :::::::::::::::::::::::::::::::::::::::: |     1000 /     1000 |      \n 100.00% :::::::::::::::::::::::::::::::::::::::: |     1000 /     1000 |      \n_______________________________________________________________________________ test_dataframe_axis_0_no_reduction[anonymous-1-False-None] _______________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_0_no_reduce = <function func_dataframe_apply_axis_0_no_reduce.<locals>.<lambda> at 0x7df771607c10>, df_size = 1\n\n    def test_dataframe_axis_0_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_0_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n        res = df.apply(func_dataframe_apply_axis_0_no_reduce)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_0_no_reduce)\n\ntests/test_pandarallel.py:360: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df7716004a0>, extra = {'axis': 0}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n_______________________________________________________________________________ test_dataframe_axis_0_no_reduction[anonymous-1-True-None] ________________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_0_no_reduce = <function func_dataframe_apply_axis_0_no_reduce.<locals>.<lambda> at 0x7df772f22e50>, df_size = 1\n\n    def test_dataframe_axis_0_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_0_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n        res = df.apply(func_dataframe_apply_axis_0_no_reduce)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_0_no_reduce)\n\ntests/test_pandarallel.py:360: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df7712e1660>, extra = {'axis': 0}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n-------------------------------------------------------------------------------------------------- Captured stdout call --------------------------------------------------------------------------------------------------\n 100.00% :::::::::::::::::::::::::::::::::::::::: |        1 /        1 |      \n 100.00% :::::::::::::::::::::::::::::::::::::::: |        1 /        1 |      \n_______________________________________________________________________________ test_dataframe_axis_1_no_reduction[named-1000-False-None] ________________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_1_no_reduce = <function func_dataframe_apply_axis_1_no_reduce.<locals>.func at 0x7df771661b80>, df_size = 1000\n\n    def test_dataframe_axis_1_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n    \n        res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\ntests/test_pandarallel.py:378: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df7712e15f0>, extra = {'axis': 1}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n________________________________________________________________________________ test_dataframe_axis_1_no_reduction[named-1000-True-None] ________________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_1_no_reduce = <function func_dataframe_apply_axis_1_no_reduce.<locals>.func at 0x7df771607670>, df_size = 1000\n\n    def test_dataframe_axis_1_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n    \n        res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\ntests/test_pandarallel.py:378: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df771600510>, extra = {'axis': 1}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n-------------------------------------------------------------------------------------------------- Captured stdout call --------------------------------------------------------------------------------------------------\n 100.00% :::::::::::::::::::::::::::::::::::::::: |      500 /      500 |      \n 100.00% :::::::::::::::::::::::::::::::::::::::: |      500 /      500 |      \n_________________________________________________________________________________ test_dataframe_axis_1_no_reduction[named-1-False-None] _________________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_1_no_reduce = <function func_dataframe_apply_axis_1_no_reduce.<locals>.func at 0x7df771607d30>, df_size = 1\n\n    def test_dataframe_axis_1_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n    \n        res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\ntests/test_pandarallel.py:378: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df7716003c0>, extra = {'axis': 1}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n_________________________________________________________________________________ test_dataframe_axis_1_no_reduction[named-1-True-None] __________________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_1_no_reduce = <function func_dataframe_apply_axis_1_no_reduce.<locals>.func at 0x7df771607790>, df_size = 1\n\n    def test_dataframe_axis_1_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n    \n        res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\ntests/test_pandarallel.py:378: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df771688200>, extra = {'axis': 1}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n-------------------------------------------------------------------------------------------------- Captured stdout call --------------------------------------------------------------------------------------------------\n 100.00% :::::::::::::::::::::::::::::::::::::::: |        1 /        1 |      \n_____________________________________________________________________________ test_dataframe_axis_1_no_reduction[anonymous-1000-False-None] ______________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_1_no_reduce = <function func_dataframe_apply_axis_1_no_reduce.<locals>.<lambda> at 0x7df7716de700>, df_size = 1000\n\n    def test_dataframe_axis_1_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n    \n        res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\ntests/test_pandarallel.py:378: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df7716adb30>, extra = {'axis': 1}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n______________________________________________________________________________ test_dataframe_axis_1_no_reduction[anonymous-1000-True-None] ______________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_1_no_reduce = <function func_dataframe_apply_axis_1_no_reduce.<locals>.<lambda> at 0x7df771661b80>, df_size = 1000\n\n    def test_dataframe_axis_1_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n    \n        res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\ntests/test_pandarallel.py:378: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df771620ac0>, extra = {'axis': 1}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n-------------------------------------------------------------------------------------------------- Captured stdout call --------------------------------------------------------------------------------------------------\n 100.00% :::::::::::::::::::::::::::::::::::::::: |      500 /      500 |      \n 100.00% :::::::::::::::::::::::::::::::::::::::: |      500 /      500 |      \n_______________________________________________________________________________ test_dataframe_axis_1_no_reduction[anonymous-1-False-None] _______________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_1_no_reduce = <function func_dataframe_apply_axis_1_no_reduce.<locals>.<lambda> at 0x7df772f9db80>, df_size = 1\n\n    def test_dataframe_axis_1_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n    \n        res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\ntests/test_pandarallel.py:378: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df77162a890>, extra = {'axis': 1}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n_______________________________________________________________________________ test_dataframe_axis_1_no_reduction[anonymous-1-True-None] ________________________________________________________________________________\n\npandarallel_init = None, func_dataframe_apply_axis_1_no_reduce = <function func_dataframe_apply_axis_1_no_reduce.<locals>.<lambda> at 0x7df7716b50d0>, df_size = 1\n\n    def test_dataframe_axis_1_no_reduction(\n        pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n    ):\n        df = pd.DataFrame(\n            dict(\n                a=np.random.randint(1, 10, df_size),\n                b=np.random.randint(1, 10, df_size),\n                c=np.random.randint(1, 10, df_size),\n            )\n        )\n    \n        res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n    \n>       res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\ntests/test_pandarallel.py:378: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \npandarallel/core.py:325: in closure\n    return wrapped_reduce_function(\npandarallel/core.py:199: in closure\n    return reduce_function(dfs, extra)\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\ndatas = <generator object wrap_reduce_function_for_file_system.<locals>.closure.<locals>.<genexpr> at 0x7df771644900>, extra = {'axis': 1}\n\n    @staticmethod\n    def reduce(\n        datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n    ) -> pd.DataFrame:\n>       axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\nE       TypeError: 'generator' object is not subscriptable\n\npandarallel/data_types/dataframe.py:48: TypeError\n------------------------------------------------------------------------------------------------- Captured stdout setup --------------------------------------------------------------------------------------------------\nINFO: Pandarallel will run on 2 workers.\nINFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n-------------------------------------------------------------------------------------------------- Captured stdout call --------------------------------------------------------------------------------------------------\n 100.00% :::::::::::::::::::::::::::::::::::::::: |        1 /        1 |      \n==================================================================================================== warnings summary ====================================================================================================\ntests/test_pandarallel.py: 16 warnings\n  /home/user/Documents/repoben/buggycode/nalepae_pandarallel/tests/test_pandarallel.py:235: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n    res = df.applymap(func_dataframe_applymap)\n\n-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html\n================================================================================================ short test summary info =================================================================================================\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[RuntimeError-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[RuntimeError-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[AttributeError-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[AttributeError-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[ZeroDivisionError-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_invalid_function[ZeroDivisionError-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_0[named-1000-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_0[named-1000-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_0[named-1-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_0[named-1-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_0[anonymous-1000-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_0[anonymous-1000-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_0[anonymous-1-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_0[anonymous-1-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_1[named-1000-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_1[named-1000-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_1[named-1-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_1[named-1-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_1[anonymous-1000-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_1[anonymous-1000-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_1[anonymous-1-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_apply_axis_1[anonymous-1-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_empty_dataframe_apply_axis_0[named-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_empty_dataframe_apply_axis_0[named-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_empty_dataframe_apply_axis_0[anonymous-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_empty_dataframe_apply_axis_0[anonymous-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_empty_dataframe_apply_axis_1[named-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_empty_dataframe_apply_axis_1[named-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_empty_dataframe_apply_axis_1[anonymous-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_empty_dataframe_apply_axis_1[anonymous-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_0_no_reduction[named-1000-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_0_no_reduction[named-1000-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_0_no_reduction[named-1-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_0_no_reduction[named-1-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_0_no_reduction[anonymous-1000-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_0_no_reduction[anonymous-1000-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_0_no_reduction[anonymous-1-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_0_no_reduction[anonymous-1-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1000-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1000-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[named-1-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1000-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1000-True-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1-False-None] - TypeError: 'generator' object is not subscriptable\nFAILED tests/test_pandarallel.py::test_dataframe_axis_1_no_reduction[anonymous-1-True-None] - TypeError: 'generator' object is not subscriptable\n====================================================================================== 46 failed, 171 passed, 16 warnings in 10.16s ======================================================================================",
    "CodeBase": [
      {
        "path": "nalepae_pandarallel/tests/test_pandarallel.py",
        "content": "1 import importlib\n2 import math\n3 \n4 import numpy as np\n5 import pandas as pd\n6 import pytest\n7 from pandarallel import pandarallel\n8 \n9 \n10 @pytest.fixture(params=(1000, 1))\n11 def df_size(request):\n12     return request.param\n13 \n14 \n15 @pytest.fixture(params=(False, True))\n16 def progress_bar(request):\n17     return request.param\n18 \n19 \n20 @pytest.fixture(params=(None, False))\n21 def use_memory_fs(request):\n22     return request.param\n23 \n24 \n25 @pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\n26 def exception(request):\n27     return request.param\n28 \n29 \n30 @pytest.fixture(params=(\"named\", \"anonymous\"))\n31 def func_dataframe_apply_axis_0(request):\n32     def func(x):\n33         return max(x) - min(x)\n34 \n35     return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n36 \n37 \n38 @pytest.fixture(params=(\"named\", \"anonymous\"))\n39 def func_dataframe_apply_axis_1(request):\n40     def func(x):\n41         return math.sin(x.a**2) + math.sin(x.b**2)\n42 \n43     return dict(\n44         named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n45     )[request.param]\n46 \n47 \n48 @pytest.fixture(params=(\"named\", \"anonymous\"))\n49 def func_dataframe_applymap(request):\n50     def func(x):\n51         return math.sin(x**2) - math.cos(x**2)\n52 \n53     return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n54         request.param\n55     ]\n56 \n57 \n58 @pytest.fixture(params=(\"named\", \"anonymous\"))\n59 def func_series_map(request):\n60     def func(x):\n61         return math.log10(math.sqrt(math.exp(x**2)))\n62 \n63     return dict(\n64         named=func, anonymous=lambda x: math.(...truncated)"
      },
      {
        "path": "nalepae_pandarallel/pandarallel/data_types/dataframe.py",
        "content": "1 from typing import Any, Callable, Dict, Iterable, Iterator\n2 from types import GeneratorType\n3 \n4 import pandas as pd\n5 \n6 from ..utils import chunk, get_axis_int\n7 from .generic import DataType\n8 \n9 \n10 class DataFrame:\n11     class Apply(DataType):\n12         @staticmethod\n13         def get_chunks(\n14             nb_workers: int, data: pd.DataFrame, **kwargs\n15         ) -> Iterator[pd.DataFrame]:\n16             user_defined_function_kwargs = kwargs[\"user_defined_function_kwargs\"]\n17 \n18             axis_int = get_axis_int(user_defined_function_kwargs)\n19             opposite_axis_int = 1 - axis_int\n20 \n21             for chunk_ in chunk(data.shape[opposite_axis_int], nb_workers):\n22                 yield data.iloc[chunk_] if axis_int == 1 else data.iloc[:, chunk_]\n23 \n24         @staticmethod\n25         def work(\n26             data: pd.DataFrame,\n27             user_defined_function: Callable,\n28             user_defined_function_args: tuple,\n29             user_defined_function_kwargs: Dict[str, Any],\n30             extra: Dict[str, Any],\n31         ) -> pd.DataFrame:\n32             return data.apply(\n33                 user_defined_function,\n34                 *user_defined_function_args,\n35                 **user_defined_function_kwargs,\n36             )\n37 \n38         @staticmethod\n39         def get_reduce_extra(\n40             data: Any, user_defined_function_kwargs: Dict[str, Any]\n41         ) -> Dict[str, Any]:\n42             return {\"axis\": get_axis_int(user_defined_function_kwargs)}\n43 \n44         @staticmethod\n45         def reduce(\n46             datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n47         ) -> pd.DataFrame:\n48             axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n49             return pd.concat(datas, copy=False, axis=axis)\n50 \n51     class ApplyMap(DataType):\n52         @staticmethod\n53         def get_chunks(\n54             nb_workers: int, data: pd.DataFrame, **kwargs\n55         ) -> Iterator[pd.DataFrame]:\n56             for chunk_ in chunk(data.shape[0], nb_workers):\n57                 yield data.iloc[chunk_]\n58 \n59         @staticmethod\n60         def work(\n61             data: pd.DataFrame,\n62             user_defined_function: Callable,\n63             user_defined_function_args: tuple,\n64             user_defined_function_kwargs: Dict[str, Any],\n65             extra: Dict[str, Any],\n66         ) -> pd.DataFrame:\n67             return data.applymap(user_defined_function)\n68 \n69         @staticmethod\n70         def reduce(\n71             datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n72         ) -> pd.DataFrame:\n73             return pd.concat(datas, copy=False)"
      },
      {
        "path": "nalepae_pandarallel/pandarallel/core.py",
        "content": "1 import multiprocessing\n2 import os\n3 import pickle\n4 from itertools import count\n5 from multiprocessing.managers import SyncManager\n6 from pathlib import Path\n7 from tempfile import NamedTemporaryFile\n8 from typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, cast\n9 \n10 import dill\n11 import pandas as pd\n12 import psutil\n13 from pandas.core.groupby import DataFrameGroupBy as PandaDataFrameGroupBy\n14 from pandas.core.window.(...truncated)"
      },
      {
        "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
        "content": "1 import multiprocessing\n2 import os\n3 import shutil\n4 import sys\n5 from abc import ABC, abstractmethod\n6 from enum import Enum\n7 from itertools import count\n8 from time import time_ns\n9 from typing import Callable, List, Union\n10 \n11 from .utils import WorkerStatus\n12 \n13 INTERVAL_NS = 250_000_000  # 0.25 sec\n14 MINIMUM_TERMINAL_W(...truncated)"
      },
      {
        "path": "nalepae_pandarallel/pandarallel/utils.py",
        "content": "1 import itertools\n2 from enum import Enum\n3 from typing import Any, Dict, List, Tuple\n4 \n5 import pandas as pd\n6 from pandas import DataFrame, Index\n7 \n8 \n9 def chunk(nb_item: int, nb_chunks: int, start_offset=0(...truncated)"
      }
    ],
    "CommitSHA": "261a652cddb219ac353ff803e81646c08b72fc6f"
  },
  "Score": {
    "Difficulty": "Easy",
    "issue_origin": {
      "Title": 7,
      "Description": 6,
      "Reproducibility": 6,
      "Relevance": 6,
      "Explanation": 7,
      "Overall": 6.5
    },
    "issue_message": {
      "Title": 6,
      "Description": 5,
      "Reproducibility": 4,
      "Relevance": 8,
      "Explanation": 7,
      "Overall": 6
    },
    "issue_ground": {
      "Title": 7,
      "Description": 8,
      "Reproducibility": 8,
      "Relevance": 8,
      "Explanation": 8,
      "Overall": 8
    },
    "issue_ground_truth": {
      "title": "DataFrame Parallel Apply Fails with GeneratorType Input",
      "description": "When using the `parallel_apply` method on a DataFrame, if the input data is of type `GeneratorType`, the operation fails. This is because the current implementation does not handle `GeneratorType` inputs correctly and attempts to use it without converting it into a list, causing unexpected behaviors or errors. To overcome this limitation, the `parallel_apply` method should first convert `GeneratorType` inputs into a list before proceeding with further operations.\n\nSteps to Reproduce:\n1. Create a DataFrame and prepare a generator that produces constituent DataFrames.\n2. Attempt to use the `parallel_apply` method on this DataFrame with the generator input.\n\nExpected Behavior:\n- The `parallel_apply` method should correctly handle `GeneratorType` inputs by converting them into a list and then performing the apply operation as expected.\n\nActual Behavior:\n- The method fails or misbehaves due to improper handling of `GeneratorType` inputs.\n\nThis issue affects parallel processing capabilities and needs to be addressed to ensure robust handling of various input types for parallel operations.",
      "explanation": "### Summary of the Issue\n\nThe issue reported revolves around the `parallel_apply` method of the `DataFrame` class, specifically when the input data is of type `GeneratorType`. The `parallel_apply` method is intended to apply a function in parallel to a DataFrame to take advantage of multiple cores, but it fails or misbehaves if the input is a generator. This failure occurs because the implementation attempts to manipulate the generator directly instead of converting it into a list, leading to unexpected behaviors or errors.\n\n### Details of the Commit\n\nThe commit introduces a fix to handle `GeneratorType` inputs properly in the `reduce` function within the `dataframe.py` file. The core change made in the commit can be broadly summarized as follows:\n\n1. **Conversion of GeneratorType to List**: \n   - The commit checks if the input data (`datas`) is of type `GeneratorType`.\n   - If the input data is identified as a generator, it converts the generator into a list before proceeding with further operations.\n\n### Explanation of How the Commit Solves the Issue\n\n#### Cause of the Issue:\n- The `parallel_apply` method needs to manage and process chunks of data efficiently. Generators, unlike lists, do not support multiple single passes, implying that once a generator is consumed, it cannot be reused unless explicitly converted to a list.\n- Failure to convert a generator into a list causes errors or unpredictable behavior when trying to apply parallel operations on DataFrame as the underlying generator gets exhausted after initial usage.\n\n#### How the Commit Fixes It:\n- By converting `GeneratorType` inputs into a list, it ensures that all operations can be performed on the data without the risk of exhausting the generator. This also allows the method to handle multiple operations consistently, as lists can be iterated over multiple times, unlike generators.\n- The immediate conversion of the generator to a list in the `reduce` function means that the parallel apply method reliably processes the data, avoiding the pitfalls associated with single-use iterators (generators).\n\n#### Solution to the Issue:\n- With the fix, when `parallel_apply` encounters a `GeneratorType` input, it seamlessly converts the generator to a list.\n- This conversion ensures that subsequent operations on the data within the `parallel_apply` method function as expected, thereby maintaining stability and reliability.\n- The fix effectively increases the robustness of the `parallel_apply` method, ensuring it can now accommodate generator inputs without failure, making the parallel processing of DataFrames more versatile and dependable.\n\n### Summary\nIn essence, the commit addresses a critical limitation in how the `parallel_apply` method deals with generator inputs by adding a necessary conversion step. This small but significant change ensures the method can safely and correctly handle generators, making it more robust and versatile in its parallel processing capabilities. This fix is essential for ensuring consistency and predictability when applying functions in parallel across DataFrame data."
    }
  }
}