{
  "RepoName": "https://github.com/nalepae/pandarallel.git",
  "CommitSHA": "261a652cddb219ac353ff803e81646c08b72fc6f",
  "Time": "",
  "Difficulty": "Medium",
  "Type": "logic error",
  "BuggyCode": [
    {
      "path": "nalepae_pandarallel/setup.py",
      "content": "from setuptools import setup\n\nsetup()\n"
    },
    {
      "path": "nalepae_pandarallel/tests/test_pandarallel.py",
      "content": "import math\n\nimport numpy as np\nimport pandas as pd\nimport pytest\nfrom pandarallel import pandarallel\n\n\n@pytest.fixture(params=(1000, 1))\ndef df_size(request):\n    return request.param\n\n\n@pytest.fixture(params=(False, True))\ndef progress_bar(request):\n    return request.param\n\n\n@pytest.fixture(params=(None, False))\ndef use_memory_fs(request):\n    return request.param\n\n\n@pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\ndef exception(request):\n    return request.param\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0(request):\n    def func(x):\n        return max(x) - min(x)\n\n    return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1(request):\n    def func(x):\n        return math.sin(x.a**2) + math.sin(x.b**2)\n\n    return dict(\n        named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_applymap(request):\n    def func(x):\n        return math.sin(x**2) - math.cos(x**2)\n\n    return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n        request.param\n    ]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_map(request):\n    def func(x):\n        return math.log10(math.sqrt(math.exp(x**2)))\n\n    return dict(\n        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_apply(request):\n    def func(x, power, bias=0):\n        return math.log10(math.sqrt(math.exp(x**power))) + bias\n\n    return dict(\n        named=func,\n        anonymous=lambda x, power, bias=0: math.log10(math.sqrt(math.exp(x**power)))\n        + bias,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply():\n    def func(df):\n        dum = 0\n        for item in df.b:\n            dum += math.log10(math.sqrt(math.exp(item**2)))\n\n        return dum / len(df.b)\n\n    return func\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply_complex():\n    def func(df):\n        return pd.DataFrame(\n            [[df.b.mean(), df.b.min(), df.b.max()]],\n            columns=[\"b_mean\", \"b_min\", \"b_max\"],\n        )\n\n    return func\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_expanding_apply(request):\n    def func(x):\n        return (x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))).sum()\n\n    return dict(\n        named=func,\n        anonymous=lambda x: (\n            x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))\n        ).sum(),\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0_no_reduce(request):\n    def func(x):\n        return x\n\n    return dict(named=func, anonymous=lambda x: x)[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1_no_reduce(request):\n    def func(x):\n        return x**2\n\n    return dict(named=func, anonymous=lambda x: x**2)[request.param]\n\n\n@pytest.fixture\ndef pandarallel_init(progress_bar, use_memory_fs):\n    pandarallel.initialize(\n        progress_bar=progress_bar, use_memory_fs=use_memory_fs, nb_workers=2\n    )\n\n\ndef test_dataframe_apply_invalid_function(pandarallel_init, exception):\n    def f(_):\n        raise exception\n\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(exception):\n        df.parallel_apply(f)\n\n\ndef test_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0, df_size):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.randint(1, 8, df_size),\n            d=np.random.rand(df_size),\n            e=np.random.randint(1, 8, df_size),\n            f=np.random.rand(df_size),\n            g=np.random.randint(1, 8, df_size),\n            h=np.random.rand(df_size),\n        )\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_1, axis=1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1, axis=1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_invalid_axis(pandarallel_init):\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(ValueError):\n        df.parallel_apply(lambda x: x, axis=\"invalid\")\n    \ndef test_empty_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\ndef test_empty_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_applymap(pandarallel_init, func_dataframe_applymap, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.applymap(func_dataframe_applymap)\n    res_parallel = df.parallel_applymap(func_dataframe_applymap)\n    assert res.equals(res_parallel)\n\n\ndef test_series_map(pandarallel_init, func_series_map, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.map(func_series_map)\n    res_parallel = df.a.parallel_map(func_series_map)\n    assert res.equals(res_parallel)\n\n\ndef test_series_apply(pandarallel_init, func_series_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\ndef test_empty_series_apply(pandarallel_init, func_series_apply):\n    df = pd.DataFrame(dict(a=[]))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\n\ndef test_series_rolling_apply(pandarallel_init, func_series_rolling_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.randint(1, 8, df_size), b=list(range(df_size))))\n\n    res = df.b.rolling(4).apply(func_series_rolling_apply, raw=False)\n    res_parallel = df.b.rolling(4).parallel_apply(func_series_rolling_apply, raw=False)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply(\n    pandarallel_init, func_dataframe_groupby_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.rand(df_size),\n        )\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\", \"b\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\", \"b\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply_complex(\n    pandarallel_init, func_dataframe_groupby_apply_complex, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 100, df_size), b=np.random.rand(df_size))\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply_complex)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply_complex)\n    res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_rolling_apply(\n    pandarallel_init, func_dataframe_groupby_rolling_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .parallel_apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_expanding_apply(\n    pandarallel_init, func_dataframe_groupby_expanding_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .parallel_apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res.equals(res_parallel)\n\n\ndef test_dataframe_axis_0_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_0_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n    res = df.apply(func_dataframe_apply_axis_0_no_reduce)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0_no_reduce)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_axis_1_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n\n    res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    assert res.equals(res_parallel)\n\ndef test_memory_fs_root_environment_variable(monkeypatch):\n    monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n    from pandarallel import core\n\n    assert core.MEMORY_FS_ROOT == \"/test\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/utils.py",
      "content": "import itertools\nfrom enum import Enum\nfrom typing import Any, Dict, List, Tuple\n\nimport pandas as pd\nfrom pandas import DataFrame, Index\n\n\ndef chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n    \"\"\"\n    Return `nb_chunks` slices of approximatively `nb_item / nb_chunks` each.\n\n    Parameters\n    ----------\n    nb_item : int\n        Total number of items\n\n    nb_chunks : int\n        Number of chunks to return\n\n    start_offset : int\n        Shift start of slice by this amount\n\n    Returns\n    -------\n    A list of slices\n\n    Examples\n    --------\n    >>> chunks = chunk(103, 4)\n    >>> chunks\n    [slice(0, 26, None), slice(26, 52, None), slice(52, 78, None), slice(78, 103, None)]\n    \"\"\"\n    if nb_item == 0:\n        return [slice(0)]\n    \n    if nb_item <= nb_chunks:\n        return [slice(max(0, idx - start_offset), idx + 1) for idx in range(nb_item)]\n\n    quotient = nb_item // nb_chunks\n    remainder = nb_item % nb_chunks\n\n    quotients = [quotient] * nb_chunks\n    remainders = [1] * remainder + [0] * (nb_chunks - remainder)\n\n    nb_elems_per_chunk = [\n        quotient + remainder for quotient, remainder in zip(quotients, remainders)\n    ]\n\n    accumulated = list(itertools.accumulate(nb_elems_per_chunk))\n    shifted_accumulated = accumulated.copy()\n    shifted_accumulated.insert(0, 0)\n    shifted_accumulated.pop()\n\n    return [\n        slice(max(0, begin - start_offset), end)\n        for begin, end in zip(shifted_accumulated, accumulated)\n    ]\n\n\ndef df_indexed_like(df: DataFrame, axes: List[Index]) -> bool:\n    \"\"\"\n    Returns whether a data frame is indexed in the way specified by the\n    provided axes.\n\n    Used by DataFrameGroupBy to determine whether a group has been modified.\n\n    Function adapted from pandas.core.groupby.ops._is_indexed_like\n\n    Parameters\n    ----------\n    df : DataFrame\n        The data frame in question\n\n    axes : List[Index]\n        The axes to which the data frame is compared\n\n    Returns\n    -------\n    Whether or not the data frame is indexed in the same wa as the axes.\n    \"\"\"\n    if isinstance(df, DataFrame):\n        return df.axes[0].equals(axes[0])\n\n    return False\n\n\ndef get_pandas_version() -> Tuple[int, int]:\n    major_str, minor_str, *_ = pd.__version__.split(\".\")\n    return int(major_str), int(minor_str)\n\n\ndef get_axis_int(user_defined_function_kwargs: Dict[str, Any]):\n    axis = user_defined_function_kwargs.get(\"axis\", 0)\n\n    if axis not in {0, 1, \"index\", \"columns\"}:\n        raise ValueError(f\"No axis named {axis} for object type DataFrame\")\n\n    return {0: 0, 1: 1, \"index\": 0, \"columns\": 1}[axis]\n\n\nclass WorkerStatus(int, Enum):\n    Running = 0\n    Success = 1\n    Error = 2\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/core.py",
      "content": "import multiprocessing\nimport os\nimport pickle\nfrom itertools import count\nfrom multiprocessing.managers import SyncManager\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, cast\n\nimport dill\nimport pandas as pd\nimport psutil\nfrom pandas.core.groupby import DataFrameGroupBy as PandaDataFrameGroupBy\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom .data_types import (\n    DataFrame,\n    DataFrameGroupBy,\n    DataType,\n    ExpandingGroupBy,\n    RollingGroupBy,\n    Series,\n    SeriesRolling,\n)\nfrom .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\nfrom .utils import WorkerStatus\n\nON_WINDOWS = os.name == \"nt\"\nCONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n\n# Root of Memory File System\nMEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n\n# By default, Pandarallel use all available CPUs\nNB_PHYSICAL_CORES = psutil.cpu_count(logical=False)\n\n# Prefix and suffix for files used with Memory File System\nPREFIX = \"pandarallel\"\nPREFIX_INPUT = f\"{PREFIX}_input_\"\nPREFIX_OUTPUT = f\"{PREFIX}_output_\"\nSUFFIX = \".pickle\"\n\n# We use these classes decorators pattern instead of the classic one because of this:\n# https://www.stevenengelhardt.com/2013/01/16/python-multiprocessing-module-and-closures/\n\n\nclass WrapWorkFunctionForFileSystem:\n    def __init__(\n        self,\n        work_function: Callable[\n            [Any, Callable, tuple, Dict[str, Any], Dict[str, Any]], Any\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        input_file_path: Path,\n        output_file_path: Path,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> None:\n        try:\n            # Load dataframe from input file\n            with input_file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            # Delete input file since we don't need it any more. It will free some RAM\n            # since the input file is stored into Shared Memory.\n            input_file_path.unlink()\n\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            result = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            with output_file_path.open(\"wb\") as file_descriptor:\n                pickle.dump(result, file_descriptor)\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\nclass WrapWorkFunctionForPipe:\n    def __init__(\n        self,\n        work_function: Callable[\n            [\n                Any,\n                Callable,\n                tuple,\n                Dict[str, Any],\n                Dict[str, Any],\n            ],\n            Any,\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        data: Any,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        try:\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            results = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n            return results\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\ndef wrap_reduce_function_for_file_system(\n    reduce_function: Callable[[Iterator, Dict[str, Any]], Any]\n) -> Callable[[Iterator[Path], Dict[str, Any]], Any]:\n    \"\"\"This wrapper transforms a `reduce` function which takes as input:\n    - A list of pandas Dataframe\n    - An user defined function\n    and which returns a pandas Dataframe, into a `reduct` function which takes as input:\n    - A list of paths where  pandas Dataframe are pickled\n    which returns a pandas Dataframe.\n    \"\"\"\n\n    def closure(output_file_paths: Iterator[Path], extra: Dict[str, Any]) -> Any:\n        def get_dataframe_and_delete_file(file_path: Path) -> Any:\n            with file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            file_path.unlink()\n            return data\n\n        dfs = (\n            get_dataframe_and_delete_file(output_file_path)\n            for output_file_path in output_file_paths\n        )\n\n        return reduce_function(dfs, extra)\n\n    return closure\n\n\ndef parallelize_with_memory_file_system(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForFileSystem(data_type.work)\n        wrapped_reduce_function = wrap_reduce_function_for_file_system(data_type.reduce)\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        input_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_INPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        output_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_OUTPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        try:\n            for chunk, input_file in zip(chunks, input_files):\n                with Path(input_file.name).open(\"wb\") as file_descriptor:\n                    pickle.dump(chunk, file_descriptor)\n\n            dilled_user_defined_function = dill.dumps(user_defined_function)\n            manager: SyncManager = CONTEXT.Manager()\n            master_workers_queue = manager.Queue()\n\n            work_args_list = [\n                (\n                    Path(input_file.name),\n                    Path(output_file.name),\n                    progress_bars_type,\n                    worker_index,\n                    master_workers_queue,\n                    dilled_user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                    {\n                        **work_extra,\n                        **{\n                            \"master_workers_queue\": master_workers_queue,\n                            \"show_progress_bars\": show_progress_bars,\n                            \"worker_index\": worker_index,\n                        },\n                    },\n                )\n                for worker_index, (\n                    input_file,\n                    output_file,\n                ) in enumerate(zip(input_files, output_files))\n            ]\n\n            pool = CONTEXT.Pool(nb_workers)\n            results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n\n            pool.close()\n\n            generation = count()\n\n            while any(\n                (\n                    worker_status == WorkerStatus.Running\n                    for worker_status in workers_status\n                )\n            ):\n                message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n                worker_index, worker_status, payload = message\n                workers_status[worker_index] = worker_status\n\n                if worker_status == WorkerStatus.Success:\n                    progresses[worker_index] = progresses_length[worker_index]\n                    progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Running:\n                    progress = cast(int, payload)\n                    progresses[worker_index] = progress\n\n                    if next(generation) % nb_workers == 0:\n                        progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Error:\n                    progress_bars.set_error(worker_index)\n                    progress_bars.update(progresses)\n\n            try:\n                return wrapped_reduce_function(\n                    (Path(output_file.name) for output_file in output_files),\n                    reduce_extra,\n                )\n            except EOFError:\n                # Loading the files failed, this most likely means that there\n                # was some error during processing and the files were never\n                # saved at all.\n                results_promise.get()\n\n                # If the above statement does not raise an exception, that\n                # means the multiprocessing went well and we want to re-raise\n                # the original EOFError.\n                raise\n\n        finally:\n            for output_file in output_files:\n                # When pandarallel stop supporting Python 3.7 and older, replace this\n                # try/except clause by:\n                # Path(output_file.name).unlink(missing_ok=True)\n                try:\n                    Path(output_file.name).unlink()\n                except FileNotFoundError:\n                    # Do nothing, this is the nominal case.\n                    pass\n\n    return closure\n\n\ndef parallelize_with_pipe(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForPipe(data_type.work)\n        dilled_user_defined_function = dill.dumps(user_defined_function)\n        manager: SyncManager = CONTEXT.Manager()\n        master_workers_queue = manager.Queue()\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        work_args_list = [\n            (\n                chunk,\n                progress_bars_type,\n                worker_index,\n                master_workers_queue,\n                dilled_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                {\n                    **work_extra,\n                    **{\n                        \"master_workers_queue\": master_workers_queue,\n                        \"show_progress_bars\": show_progress_bars,\n                        \"worker_index\": worker_index,\n                    },\n                },\n            )\n            for worker_index, chunk in enumerate(chunks)\n        ]\n\n        pool = CONTEXT.Pool(nb_workers)\n        results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n        pool.close()\n\n        generation = count()\n\n        while any(\n            (worker_status == WorkerStatus.Running for worker_status in workers_status)\n        ):\n            message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n            worker_index, worker_status, payload = message\n            workers_status[worker_index] = worker_status\n\n            if worker_status == WorkerStatus.Success:\n                progresses[worker_index] = progresses_length[worker_index]\n                progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Running:\n                progress = cast(int, payload)\n                progresses[worker_index] = progress\n\n                if next(generation) % nb_workers == 0:\n                    progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Error:\n                progress_bars.set_error(worker_index)\n\n        results = results_promise.get()\n\n        return data_type.reduce(results, reduce_extra)\n\n    return closure\n\n\nclass pandarallel:\n    @classmethod\n    def initialize(\n        cls,\n        shm_size_mb=None,\n        nb_workers=NB_PHYSICAL_CORES,\n        progress_bar=False,\n        verbose=2,\n        use_memory_fs: Optional[bool] = None,\n    ) -> None:\n        show_progress_bars = progress_bar\n        is_memory_fs_available = Path(MEMORY_FS_ROOT).exists()\n\n        use_memory_fs = (\n            use_memory_fs if use_memory_fs is not None else is_memory_fs_available\n        )\n\n        parallelize = (\n            parallelize_with_memory_file_system\n            if use_memory_fs\n            else parallelize_with_pipe\n        )\n\n        if use_memory_fs and not is_memory_fs_available:\n            raise SystemError(\"Memory file system is not available\")\n\n        if verbose >= 2:\n            print(f\"INFO: Pandarallel will run on {nb_workers} workers.\")\n\n            message = (\n                (\n                    \"INFO: Pandarallel will use Memory file system to transfer data \"\n                    \"between the main process and workers.\"\n                )\n                if use_memory_fs\n                else (\n                    \"INFO: Pandarallel will use standard multiprocessing data transfer \"\n                    \"(pipe) to transfer data between the main process and workers.\"\n                )\n            )\n\n            print(message)\n\n            if ON_WINDOWS and verbose >= 2:\n                print()\n                print(\n                    (\n                        \"WARNING: You are on Windows. If you detect any issue with \"\n                        \"pandarallel, be sure you checked out the Troubleshooting page:\"\n                    )\n                )\n                print(\"https://nalepae.github.io/pandarallel/troubleshooting/\")\n\n        progress_bars_in_user_defined_function = (\n            ProgressBarsType.InUserDefinedFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_user_defined_function_multiply_by_number_of_columns = (\n            ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_work_function = (\n            ProgressBarsType.InWorkFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        # DataFrame\n        pd.DataFrame.parallel_apply = parallelize(\n            nb_workers, DataFrame.Apply, progress_bars_in_user_defined_function\n        )\n        pd.DataFrame.parallel_applymap = parallelize(\n            nb_workers,\n            DataFrame.ApplyMap,\n            progress_bars_in_user_defined_function_multiply_by_number_of_columns,\n        )\n\n        # DataFrame GroupBy\n        PandaDataFrameGroupBy.parallel_apply = parallelize(\n            nb_workers, DataFrameGroupBy.Apply, progress_bars_in_user_defined_function\n        )\n\n        # Expanding GroupBy\n        PandasExpandingGroupby.parallel_apply = parallelize(\n            nb_workers, ExpandingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Rolling GroupBy\n        PandasRollingGroupby.parallel_apply = parallelize(\n            nb_workers, RollingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Series\n        pd.Series.parallel_apply = parallelize(\n            nb_workers, Series.Apply, progress_bars_in_user_defined_function\n        )\n        pd.Series.parallel_map = parallelize(nb_workers, Series.Map, show_progress_bars)\n\n        # Series Rolling\n        pd.core.window.Rolling.parallel_apply = parallelize(\n            nb_workers, SeriesRolling.Apply, progress_bars_in_user_defined_function\n        )\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
      "content": "import multiprocessing\nimport os\nimport shutil\nimport sys\nfrom abc import ABC, abstractmethod\nfrom enum import Enum\nfrom itertools import count\nfrom time import time_ns\nfrom typing import Callable, List, Union\n\nfrom .utils import WorkerStatus\n\nINTERVAL_NS = 250_000_000  # 0.25 sec\nMINIMUM_TERMINAL_WIDTH = 72\n\n\nclass ProgressBarsType(int, Enum):\n    No = 0\n    InUserDefinedFunction = 1\n    InUserDefinedFunctionMultiplyByNumberOfColumns = 2\n    InWorkFunction = 3\n\n\nclass ProgressBars(ABC):\n    @abstractmethod\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        ...\n\n    @abstractmethod\n    def update(self, values: List[int]) -> None:\n        ...\n\n    def set_error(self, index: int) -> None:\n        pass\n\n\nclass ProgressState:\n    def __init__(self, chunk_size: int) -> None:\n        self.last_put_iteration = 0\n        self.next_put_iteration = max(chunk_size // 100, 1)\n        self.last_put_time = time_ns()\n\n\ndef is_notebook_lab() -> bool:\n    try:\n        shell: str = get_ipython().__class__.__name__  # type: ignore\n\n        # Shell: Google Colab\n        # TerminalInteractiveShell: Terminal running IPython\n        # ZMQInteractiveShell: Jupyter notebook/lab or qtconsole\n        return shell in {\"Shell\", \"ZMQInteractiveShell\"}\n    except NameError:\n        # Probably standard Python interpreter\n        return False\n\n\nclass ProgressBarsConsole(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        self.__show = show\n        self.__bars = [[0, max] for max in maxs]\n        self.__width = self.__get_width()\n\n        self.__lines = self.__update_lines()\n\n        if show:\n            sys.stdout.write(\"\\n\".join(self.__lines))\n            sys.stdout.flush()\n\n    def __get_width(self) -> int:\n        try:\n            columns = shutil.get_terminal_size().columns\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except AttributeError:\n            # Python 2\n            pass\n\n        try:\n            columns = int(os.popen(\"stty size\", \"r\").read().split()[1])\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except:\n            return MINIMUM_TERMINAL_WIDTH\n\n    def __remove_displayed_lines(self) -> None:\n        if len(self.__bars) >= 1:\n            sys.stdout.write(\"\\b\" * len(self.__lines[-1]))\n\n        if len(self.__bars) >= 2:\n            sys.stdout.write(\"\\033M\" * (len(self.__lines) - 1))\n\n        self.__lines = []\n\n    def __update_line(self, done: int, total: int) -> str:\n        if total == 0:\n            percent = 0\n        else:\n            percent = done / total\n        bar = (\":\" * int(percent * 40)).ljust(40, \" \")\n        percent = round(percent * 100, 2)\n        format = \" {percent:6.2f}% {bar:s} | {done:8d} / {total:8d} |\"\n        ret = format.format(percent=percent, bar=bar, done=done, total=total)\n        return ret[: self.__width].ljust(self.__width, \" \")\n\n    def __update_lines(self) -> List[str]:\n        return [self.__update_line(value, max) for value, max in self.__bars]\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            self.__bars[index][0] = value\n\n        self.__remove_displayed_lines()\n        self.__lines = self.__update_lines()\n\n        sys.stdout.write(\"\\n\".join(self.__lines))\n        sys.stdout.flush()\n\n\nclass ProgressBarsNotebookLab(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        \"\"\"Initialization.\n        Positional argument:\n        maxs - List containing the max value of each progress bar\n        \"\"\"\n        self.__show = show\n\n        if not show:\n            return\n\n        from IPython.display import display\n        from ipywidgets import HBox, IntProgress, Label, VBox\n\n        self.__bars = [\n            HBox(\n                [\n                    IntProgress(0, 0, max, description=\"{:.2f}%\".format(0)),\n                    Label(\"{} / {}\".format(0, max)),\n                ]\n            )\n            for max in maxs\n        ]\n\n        display(VBox(self.__bars))\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            bar, label = self.__bars[index].children\n\n            label.value = \"{} / {}\".format(value, bar.max)\n            \n            bar.value = value\n\n            if value >= bar.max:\n                bar.bar_style = \"success\"\n\n            if bar.max != 0:\n                bar.description = \"{:.2f}%\".format(bar.value / bar.max * 100)\n\n    def set_error(self, index: int) -> None:\n        \"\"\"Set a bar on error\"\"\"\n        if not self.__show:\n            return\n\n        bar, _ = self.__bars[index].children\n        bar.bar_style = \"danger\"\n\n\ndef get_progress_bars(\n    maxs: List[int], show\n) -> Union[ProgressBarsNotebookLab, ProgressBarsConsole]:\n    return (\n        ProgressBarsNotebookLab(maxs, show)\n        if is_notebook_lab()\n        else ProgressBarsConsole(maxs, show)\n    )\n\n\ndef progress_wrapper(\n    user_defined_function: Callable,\n    master_workers_queue: multiprocessing.Queue,\n    index: int,\n    chunk_size: int,\n) -> Callable:\n    \"\"\"Wrap the function to apply in a function which monitor the part of work already\n    done.\n    \"\"\"\n    counter = count()\n    state = ProgressState(chunk_size)\n\n    def closure(*user_defined_function_args, **user_defined_functions_kwargs):\n        iteration = next(counter)\n\n        if iteration == state.next_put_iteration:\n            time_now = time_ns()\n            master_workers_queue.put_nowait((index, WorkerStatus.Running, iteration))\n\n            delta_t = time_now - state.last_put_time\n            delta_i = iteration - state.last_put_iteration\n\n            state.next_put_iteration += (\n                max(int((delta_i / delta_t) * INTERVAL_NS), 1) if delta_t != 0 else 1\n            )\n\n            state.last_put_iteration = iteration\n            state.last_put_time = time_now\n\n        return user_defined_function(\n            *user_defined_function_args, **user_defined_functions_kwargs\n        )\n\n    return closure\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/__init__.py",
      "content": "from .core import pandarallel\n\n__version__ = \"1.6.5\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/expanding_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass ExpandingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasExpandingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasExpandingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.expanding(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass Series:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n\n    class Map(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.map(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series_rolling.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\nfrom pandas.core.window.rolling import Rolling\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass SeriesRolling:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, rolling: Rolling, **kwargs\n        ) -> Iterator[pd.Series]:\n            chunks = chunk(rolling.obj.size, nb_workers, rolling.window)\n\n            for chunk_ in chunks:\n                yield rolling.obj[chunk_]\n\n        @staticmethod\n        def get_work_extra(data: Rolling) -> Dict[str, Any]:\n            return {\n                \"attributes\": {\n                    attribute: getattr(data, attribute)\n                    for attribute in data._attributes\n                }\n            }\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            attributes: Dict[str, Any] = extra[\"attributes\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            result = data.rolling(**attributes).apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n            return result if worker_index == 0 else result[attributes[\"window\"] :]\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/__init__.py",
      "content": "from .dataframe import DataFrame\nfrom .dataframe_groupby import DataFrameGroupBy\nfrom .expanding_groupby import ExpandingGroupBy\nfrom .rolling_groupby import RollingGroupBy\nfrom .generic import DataType\nfrom .series import Series\nfrom .series_rolling import SeriesRolling\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\nfrom types import GeneratorType\n\nimport pandas as pd\n\nfrom ..utils import chunk, get_axis_int\nfrom .generic import DataType\n\n\nclass DataFrame:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            user_defined_function_kwargs = kwargs[\"user_defined_function_kwargs\"]\n\n            axis_int = get_axis_int(user_defined_function_kwargs)\n            opposite_axis_int = 1 - axis_int\n\n            for chunk_ in chunk(data.shape[opposite_axis_int], nb_workers):\n                yield data.iloc[chunk_] if axis_int == 1 else data.iloc[:, chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs,\n            )\n\n        @staticmethod\n        def get_reduce_extra(\n            data: Any, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"axis\": get_axis_int(user_defined_function_kwargs)}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            if isinstance(datas, GeneratorType):\n                datas = list(datas)\n            axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n            return pd.concat(datas, copy=False, axis=axis)\n\n    class ApplyMap(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            for chunk_ in chunk(data.shape[0], nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.applymap(user_defined_function)\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/generic.py",
      "content": "from abc import ABC, abstractmethod\nfrom typing import Any, Callable, Dict, Iterable, Iterator\n\n\nclass DataType(ABC):\n    @staticmethod\n    @abstractmethod\n    def get_chunks(nb_workers: int, data: Any, **kwargs) -> Iterator[Any]:\n        ...\n\n    @staticmethod\n    def get_work_extra(data: Any) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def work(\n        data: Any,\n        user_defined_function: Callable,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        ...\n\n    @staticmethod\n    def get_reduce_extra(\n        data: Any, user_defined_function_kwargs: Dict[str, Any]\n    ) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def reduce(datas: Iterable[Any], extra: Dict[str, Any]) -> Any:\n        ...\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/rolling_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass RollingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasRollingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasRollingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.rolling(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe_groupby.py",
      "content": "import itertools\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple, Union, cast\n\nimport pandas as pd\nfrom pandas.core.groupby.generic import DataFrameGroupBy as PandasDataFrameGroupBy\n\nfrom ..utils import chunk, df_indexed_like, get_pandas_version\nfrom .generic import DataType\n\n\nclass DataFrameGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, dataframe_groupby: PandasDataFrameGroupBy, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            chunks = chunk(dataframe_groupby.ngroups, nb_workers)\n            iterator = iter(dataframe_groupby)\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[Tuple[int, pd.DataFrame, bool]]:\n            def compute_result(\n                key: int, df: pd.DataFrame\n            ) -> Tuple[int, pd.DataFrame, bool]:\n                result = user_defined_function(\n                    df, *user_defined_function_args, **user_defined_function_kwargs\n                )\n                mutated = not df_indexed_like(result, df.axes)\n                return key, result, mutated\n\n            return [compute_result(key, df) for key, df in data]\n\n        @staticmethod\n        def get_reduce_extra(\n            data: PandasDataFrameGroupBy, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"df_groupby\": data}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[List[Tuple[int, pd.DataFrame, bool]]], extra: Dict[str, Any]\n        ) -> pd.Series:\n            def get_args(\n                keys: List[int],\n                values: List[pd.DataFrame],\n                df_groupby: PandasDataFrameGroupBy,\n            ) -> Union[\n                Tuple[List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[pd.DataFrame]],\n            ]:\n                pandas_version = get_pandas_version()\n\n                if pandas_version < (1, 3):\n                    return keys, values\n                elif pandas_version < (1, 4):\n                    return df_groupby._selected_obj, keys, values\n                else:\n                    return df_groupby._selected_obj, values\n\n            df_groupby: PandasDataFrameGroupBy = extra[\"df_groupby\"]\n\n            results = itertools.chain.from_iterable(datas)\n            keys, values, mutated = zip(*results)\n\n            keys = cast(List[int], keys)\n            values = cast(List[pd.DataFrame], values)\n            mutated = cast(List[bool], mutated)\n\n            args = get_args(keys, values, df_groupby)\n  \n            return df_groupby._wrap_applied_output(*args, not_indexed_same=mutated)\n"
    }
  ],
  "OriginCode": [
    {
      "path": "nalepae_pandarallel/setup.py",
      "content": "from setuptools import setup\n\nsetup()\n"
    },
    {
      "path": "nalepae_pandarallel/tests/test_pandarallel.py",
      "content": "import importlib\nimport math\n\nimport numpy as np\nimport pandas as pd\nimport pytest\nfrom pandarallel import pandarallel\n\n\n@pytest.fixture(params=(1000, 1))\ndef df_size(request):\n    return request.param\n\n\n@pytest.fixture(params=(False, True))\ndef progress_bar(request):\n    return request.param\n\n\n@pytest.fixture(params=(None, False))\ndef use_memory_fs(request):\n    return request.param\n\n\n@pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\ndef exception(request):\n    return request.param\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0(request):\n    def func(x):\n        return max(x) - min(x)\n\n    return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1(request):\n    def func(x):\n        return math.sin(x.a**2) + math.sin(x.b**2)\n\n    return dict(\n        named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_applymap(request):\n    def func(x):\n        return math.sin(x**2) - math.cos(x**2)\n\n    return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n        request.param\n    ]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_map(request):\n    def func(x):\n        return math.log10(math.sqrt(math.exp(x**2)))\n\n    return dict(\n        named=func, anonymous=lambda x: math.log10(math.sqrt(math.exp(x**2)))\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_apply(request):\n    def func(x, power, bias=0):\n        return math.log10(math.sqrt(math.exp(x**power))) + bias\n\n    return dict(\n        named=func,\n        anonymous=lambda x, power, bias=0: math.log10(math.sqrt(math.exp(x**power)))\n        + bias,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_series_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply():\n    def func(df):\n        dum = 0\n        for item in df.b:\n            dum += math.log10(math.sqrt(math.exp(item**2)))\n\n        return dum / len(df.b)\n\n    return func\n\n\n@pytest.fixture()\ndef func_dataframe_groupby_apply_complex():\n    def func(df):\n        return pd.DataFrame(\n            [[df.b.mean(), df.b.min(), df.b.max()]],\n            columns=[\"b_mean\", \"b_min\", \"b_max\"],\n        )\n\n    return func\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_rolling_apply(request):\n    def func(x):\n        return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4\n\n    return dict(\n        named=func,\n        anonymous=lambda x: x.iloc[0]\n        + x.iloc[1] ** 2\n        + x.iloc[2] ** 3\n        + x.iloc[3] ** 4,\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_groupby_expanding_apply(request):\n    def func(x):\n        return (x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))).sum()\n\n    return dict(\n        named=func,\n        anonymous=lambda x: (\n            x.multiply(pd.Series(range(1, len(x)), dtype=\"float\"))\n        ).sum(),\n    )[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_0_no_reduce(request):\n    def func(x):\n        return x\n\n    return dict(named=func, anonymous=lambda x: x)[request.param]\n\n\n@pytest.fixture(params=(\"named\", \"anonymous\"))\ndef func_dataframe_apply_axis_1_no_reduce(request):\n    def func(x):\n        return x**2\n\n    return dict(named=func, anonymous=lambda x: x**2)[request.param]\n\n\n@pytest.fixture\ndef pandarallel_init(progress_bar, use_memory_fs):\n    pandarallel.initialize(\n        progress_bar=progress_bar, use_memory_fs=use_memory_fs, nb_workers=2\n    )\n\n\ndef test_dataframe_apply_invalid_function(pandarallel_init, exception):\n    def f(_):\n        raise exception\n\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(exception):\n        df.parallel_apply(f)\n\n\ndef test_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0, df_size):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.randint(1, 8, df_size),\n            d=np.random.rand(df_size),\n            e=np.random.randint(1, 8, df_size),\n            f=np.random.rand(df_size),\n            g=np.random.randint(1, 8, df_size),\n            h=np.random.rand(df_size),\n        )\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.apply(func_dataframe_apply_axis_1, axis=1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1, axis=1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_apply_invalid_axis(pandarallel_init):\n    df = pd.DataFrame(dict(a=[1, 2, 3, 4]))\n\n    with pytest.raises(ValueError):\n        df.parallel_apply(lambda x: x, axis=\"invalid\")\n    \ndef test_empty_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_0)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)\n    assert res.equals(res_parallel)\n\ndef test_empty_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1):\n    df = pd.DataFrame()\n\n    res = df.apply(func_dataframe_apply_axis_1)\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_applymap(pandarallel_init, func_dataframe_applymap, df_size):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 8, df_size), b=np.random.rand(df_size))\n    )\n    df.index = [item / 10 for item in df.index]\n\n    res = df.applymap(func_dataframe_applymap)\n    res_parallel = df.parallel_applymap(func_dataframe_applymap)\n    assert res.equals(res_parallel)\n\n\ndef test_series_map(pandarallel_init, func_series_map, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.map(func_series_map)\n    res_parallel = df.a.parallel_map(func_series_map)\n    assert res.equals(res_parallel)\n\n\ndef test_series_apply(pandarallel_init, func_series_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\ndef test_empty_series_apply(pandarallel_init, func_series_apply):\n    df = pd.DataFrame(dict(a=[]))\n\n    res = df.a.apply(func_series_apply, args=(2,), bias=3)\n    res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)\n    assert res.equals(res_parallel)\n\n\ndef test_series_rolling_apply(pandarallel_init, func_series_rolling_apply, df_size):\n    df = pd.DataFrame(dict(a=np.random.randint(1, 8, df_size), b=list(range(df_size))))\n\n    res = df.b.rolling(4).apply(func_series_rolling_apply, raw=False)\n    res_parallel = df.b.rolling(4).parallel_apply(func_series_rolling_apply, raw=False)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply(\n    pandarallel_init, func_dataframe_groupby_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 8, df_size),\n            b=np.random.rand(df_size),\n            c=np.random.rand(df_size),\n        )\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n    res = df.groupby([\"a\", \"b\"]).apply(func_dataframe_groupby_apply)\n    res_parallel = df.groupby([\"a\", \"b\"]).parallel_apply(func_dataframe_groupby_apply)\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_apply_complex(\n    pandarallel_init, func_dataframe_groupby_apply_complex, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 100, df_size), b=np.random.rand(df_size))\n    )\n\n    res = df.groupby(\"a\").apply(func_dataframe_groupby_apply_complex)\n    res_parallel = df.groupby(\"a\").parallel_apply(func_dataframe_groupby_apply_complex)\n    res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_rolling_apply(\n    pandarallel_init, func_dataframe_groupby_rolling_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.rolling(4)\n        .parallel_apply(func_dataframe_groupby_rolling_apply, raw=False)\n    )\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_groupby_expanding_apply(\n    pandarallel_init, func_dataframe_groupby_expanding_apply, df_size\n):\n    df = pd.DataFrame(\n        dict(a=np.random.randint(1, 10, df_size), b=np.random.rand(df_size))\n    )\n\n    res = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res_parallel = (\n        df.groupby(\"a\")\n        .b.expanding()\n        .parallel_apply(func_dataframe_groupby_expanding_apply, raw=False)\n    )\n    res.equals(res_parallel)\n\n\ndef test_dataframe_axis_0_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_0_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n    res = df.apply(func_dataframe_apply_axis_0_no_reduce)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_0_no_reduce)\n\n    assert res.equals(res_parallel)\n\n\ndef test_dataframe_axis_1_no_reduction(\n    pandarallel_init, func_dataframe_apply_axis_1_no_reduce, df_size\n):\n    df = pd.DataFrame(\n        dict(\n            a=np.random.randint(1, 10, df_size),\n            b=np.random.randint(1, 10, df_size),\n            c=np.random.randint(1, 10, df_size),\n        )\n    )\n\n    res = df.apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    res_parallel = df.parallel_apply(func_dataframe_apply_axis_1_no_reduce, axis=1)\n\n    assert res.equals(res_parallel)\n\ndef test_memory_fs_root_environment_variable(monkeypatch):\n    monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n    from pandarallel import core\n    importlib.reload(core)\n\n    assert core.MEMORY_FS_ROOT == \"/test\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/utils.py",
      "content": "import itertools\nfrom enum import Enum\nfrom typing import Any, Dict, List, Tuple\n\nimport pandas as pd\nfrom pandas import DataFrame, Index\n\n\ndef chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:\n    \"\"\"\n    Return `nb_chunks` slices of approximatively `nb_item / nb_chunks` each.\n\n    Parameters\n    ----------\n    nb_item : int\n        Total number of items\n\n    nb_chunks : int\n        Number of chunks to return\n\n    start_offset : int\n        Shift start of slice by this amount\n\n    Returns\n    -------\n    A list of slices\n\n    Examples\n    --------\n    >>> chunks = chunk(103, 4)\n    >>> chunks\n    [slice(0, 26, None), slice(26, 52, None), slice(52, 78, None), slice(78, 103, None)]\n    \"\"\"\n    if nb_item == 0:\n        return [slice(0)]\n    \n    if nb_item <= nb_chunks:\n        return [slice(max(0, idx - start_offset), idx + 1) for idx in range(nb_item)]\n\n    quotient = nb_item // nb_chunks\n    remainder = nb_item % nb_chunks\n\n    quotients = [quotient] * nb_chunks\n    remainders = [1] * remainder + [0] * (nb_chunks - remainder)\n\n    nb_elems_per_chunk = [\n        quotient + remainder for quotient, remainder in zip(quotients, remainders)\n    ]\n\n    accumulated = list(itertools.accumulate(nb_elems_per_chunk))\n    shifted_accumulated = accumulated.copy()\n    shifted_accumulated.insert(0, 0)\n    shifted_accumulated.pop()\n\n    return [\n        slice(max(0, begin - start_offset), end)\n        for begin, end in zip(shifted_accumulated, accumulated)\n    ]\n\n\ndef df_indexed_like(df: DataFrame, axes: List[Index]) -> bool:\n    \"\"\"\n    Returns whether a data frame is indexed in the way specified by the\n    provided axes.\n\n    Used by DataFrameGroupBy to determine whether a group has been modified.\n\n    Function adapted from pandas.core.groupby.ops._is_indexed_like\n\n    Parameters\n    ----------\n    df : DataFrame\n        The data frame in question\n\n    axes : List[Index]\n        The axes to which the data frame is compared\n\n    Returns\n    -------\n    Whether or not the data frame is indexed in the same wa as the axes.\n    \"\"\"\n    if isinstance(df, DataFrame):\n        return df.axes[0].equals(axes[0])\n\n    return False\n\n\ndef get_pandas_version() -> Tuple[int, int]:\n    major_str, minor_str, *_ = pd.__version__.split(\".\")\n    return int(major_str), int(minor_str)\n\n\ndef get_axis_int(user_defined_function_kwargs: Dict[str, Any]):\n    axis = user_defined_function_kwargs.get(\"axis\", 0)\n\n    if axis not in {0, 1, \"index\", \"columns\"}:\n        raise ValueError(f\"No axis named {axis} for object type DataFrame\")\n\n    return {0: 0, 1: 1, \"index\": 0, \"columns\": 1}[axis]\n\n\nclass WorkerStatus(int, Enum):\n    Running = 0\n    Success = 1\n    Error = 2\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/core.py",
      "content": "import multiprocessing\nimport os\nimport pickle\nfrom itertools import count\nfrom multiprocessing.managers import SyncManager\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, cast\n\nimport dill\nimport pandas as pd\nimport psutil\nfrom pandas.core.groupby import DataFrameGroupBy as PandaDataFrameGroupBy\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom .data_types import (\n    DataFrame,\n    DataFrameGroupBy,\n    DataType,\n    ExpandingGroupBy,\n    RollingGroupBy,\n    Series,\n    SeriesRolling,\n)\nfrom .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\nfrom .utils import WorkerStatus\n\nON_WINDOWS = os.name == \"nt\"\nCONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n\n# Root of Memory File System\nMEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n\n# By default, Pandarallel use all available CPUs\nNB_PHYSICAL_CORES = psutil.cpu_count(logical=False)\n\n# Prefix and suffix for files used with Memory File System\nPREFIX = \"pandarallel\"\nPREFIX_INPUT = f\"{PREFIX}_input_\"\nPREFIX_OUTPUT = f\"{PREFIX}_output_\"\nSUFFIX = \".pickle\"\n\n# We use these classes decorators pattern instead of the classic one because of this:\n# https://www.stevenengelhardt.com/2013/01/16/python-multiprocessing-module-and-closures/\n\n\nclass WrapWorkFunctionForFileSystem:\n    def __init__(\n        self,\n        work_function: Callable[\n            [Any, Callable, tuple, Dict[str, Any], Dict[str, Any]], Any\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        input_file_path: Path,\n        output_file_path: Path,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> None:\n        try:\n            # Load dataframe from input file\n            with input_file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            # Delete input file since we don't need it any more. It will free some RAM\n            # since the input file is stored into Shared Memory.\n            input_file_path.unlink()\n\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            result = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            with output_file_path.open(\"wb\") as file_descriptor:\n                pickle.dump(result, file_descriptor)\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\nclass WrapWorkFunctionForPipe:\n    def __init__(\n        self,\n        work_function: Callable[\n            [\n                Any,\n                Callable,\n                tuple,\n                Dict[str, Any],\n                Dict[str, Any],\n            ],\n            Any,\n        ],\n    ) -> None:\n        self.work_function = work_function\n\n    def __call__(\n        self,\n        data: Any,\n        progress_bars_type: ProgressBarsType,\n        worker_index: int,\n        master_workers_queue: multiprocessing.Queue,\n        dilled_user_defined_function: bytes,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        try:\n            data_size = len(data)\n            user_defined_function: Callable = dill.loads(dilled_user_defined_function)\n\n            progress_wrapped_user_defined_function = progress_wrapper(\n                user_defined_function, master_workers_queue, worker_index, data_size\n            )\n\n            used_user_defined_function = (\n                progress_wrapped_user_defined_function\n                if progress_bars_type\n                in (\n                    ProgressBarsType.InUserDefinedFunction,\n                    ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns,\n                )\n                else user_defined_function\n            )\n\n            results = self.work_function(\n                data,\n                used_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                extra,\n            )\n\n            master_workers_queue.put((worker_index, WorkerStatus.Success, None))\n\n            return results\n\n        except:\n            master_workers_queue.put((worker_index, WorkerStatus.Error, None))\n            raise\n\n\ndef wrap_reduce_function_for_file_system(\n    reduce_function: Callable[[Iterator, Dict[str, Any]], Any]\n) -> Callable[[Iterator[Path], Dict[str, Any]], Any]:\n    \"\"\"This wrapper transforms a `reduce` function which takes as input:\n    - A list of pandas Dataframe\n    - An user defined function\n    and which returns a pandas Dataframe, into a `reduct` function which takes as input:\n    - A list of paths where  pandas Dataframe are pickled\n    which returns a pandas Dataframe.\n    \"\"\"\n\n    def closure(output_file_paths: Iterator[Path], extra: Dict[str, Any]) -> Any:\n        def get_dataframe_and_delete_file(file_path: Path) -> Any:\n            with file_path.open(\"rb\") as file_descriptor:\n                data = pickle.load(file_descriptor)\n\n            file_path.unlink()\n            return data\n\n        dfs = (\n            get_dataframe_and_delete_file(output_file_path)\n            for output_file_path in output_file_paths\n        )\n\n        return reduce_function(dfs, extra)\n\n    return closure\n\n\ndef parallelize_with_memory_file_system(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForFileSystem(data_type.work)\n        wrapped_reduce_function = wrap_reduce_function_for_file_system(data_type.reduce)\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        input_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_INPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        output_files = [\n            NamedTemporaryFile(\n                prefix=PREFIX_OUTPUT, suffix=SUFFIX, dir=MEMORY_FS_ROOT, delete=False\n            )\n            for _ in range(nb_workers)\n        ]\n\n        try:\n            for chunk, input_file in zip(chunks, input_files):\n                with Path(input_file.name).open(\"wb\") as file_descriptor:\n                    pickle.dump(chunk, file_descriptor)\n\n            dilled_user_defined_function = dill.dumps(user_defined_function)\n            manager: SyncManager = CONTEXT.Manager()\n            master_workers_queue = manager.Queue()\n\n            work_args_list = [\n                (\n                    Path(input_file.name),\n                    Path(output_file.name),\n                    progress_bars_type,\n                    worker_index,\n                    master_workers_queue,\n                    dilled_user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                    {\n                        **work_extra,\n                        **{\n                            \"master_workers_queue\": master_workers_queue,\n                            \"show_progress_bars\": show_progress_bars,\n                            \"worker_index\": worker_index,\n                        },\n                    },\n                )\n                for worker_index, (\n                    input_file,\n                    output_file,\n                ) in enumerate(zip(input_files, output_files))\n            ]\n\n            pool = CONTEXT.Pool(nb_workers)\n            results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n\n            pool.close()\n\n            generation = count()\n\n            while any(\n                (\n                    worker_status == WorkerStatus.Running\n                    for worker_status in workers_status\n                )\n            ):\n                message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n                worker_index, worker_status, payload = message\n                workers_status[worker_index] = worker_status\n\n                if worker_status == WorkerStatus.Success:\n                    progresses[worker_index] = progresses_length[worker_index]\n                    progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Running:\n                    progress = cast(int, payload)\n                    progresses[worker_index] = progress\n\n                    if next(generation) % nb_workers == 0:\n                        progress_bars.update(progresses)\n                elif worker_status == WorkerStatus.Error:\n                    progress_bars.set_error(worker_index)\n                    progress_bars.update(progresses)\n\n            try:\n                return wrapped_reduce_function(\n                    (Path(output_file.name) for output_file in output_files),\n                    reduce_extra,\n                )\n            except EOFError:\n                # Loading the files failed, this most likely means that there\n                # was some error during processing and the files were never\n                # saved at all.\n                results_promise.get()\n\n                # If the above statement does not raise an exception, that\n                # means the multiprocessing went well and we want to re-raise\n                # the original EOFError.\n                raise\n\n        finally:\n            for output_file in output_files:\n                # When pandarallel stop supporting Python 3.7 and older, replace this\n                # try/except clause by:\n                # Path(output_file.name).unlink(missing_ok=True)\n                try:\n                    Path(output_file.name).unlink()\n                except FileNotFoundError:\n                    # Do nothing, this is the nominal case.\n                    pass\n\n    return closure\n\n\ndef parallelize_with_pipe(\n    nb_requested_workers: int,\n    data_type: Type[DataType],\n    progress_bars_type: ProgressBarsType,\n):\n    def closure(\n        data: Any,\n        user_defined_function: Callable,\n        *user_defined_function_args: tuple,\n        **user_defined_function_kwargs: Dict[str, Any],\n    ):\n        wrapped_work_function = WrapWorkFunctionForPipe(data_type.work)\n        dilled_user_defined_function = dill.dumps(user_defined_function)\n        manager: SyncManager = CONTEXT.Manager()\n        master_workers_queue = manager.Queue()\n\n        chunks = list(\n            data_type.get_chunks(\n                nb_requested_workers,\n                data,\n                user_defined_function_kwargs=user_defined_function_kwargs,\n            )\n        )\n\n        nb_workers = len(chunks)\n\n        multiplicator_factor = (\n            len(cast(pd.DataFrame, data).columns)\n            if progress_bars_type\n            == ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            else 1\n        )\n\n        progresses_length = [len(chunk_) * multiplicator_factor for chunk_ in chunks]\n\n        work_extra = data_type.get_work_extra(data)\n        reduce_extra = data_type.get_reduce_extra(data, user_defined_function_kwargs)\n\n        show_progress_bars = progress_bars_type != ProgressBarsType.No\n\n        progress_bars = get_progress_bars(progresses_length, show_progress_bars)\n        progresses = [0] * nb_workers\n        workers_status = [WorkerStatus.Running] * nb_workers\n\n        work_args_list = [\n            (\n                chunk,\n                progress_bars_type,\n                worker_index,\n                master_workers_queue,\n                dilled_user_defined_function,\n                user_defined_function_args,\n                user_defined_function_kwargs,\n                {\n                    **work_extra,\n                    **{\n                        \"master_workers_queue\": master_workers_queue,\n                        \"show_progress_bars\": show_progress_bars,\n                        \"worker_index\": worker_index,\n                    },\n                },\n            )\n            for worker_index, chunk in enumerate(chunks)\n        ]\n\n        pool = CONTEXT.Pool(nb_workers)\n        results_promise = pool.starmap_async(wrapped_work_function, work_args_list)\n        pool.close()\n\n        generation = count()\n\n        while any(\n            (worker_status == WorkerStatus.Running for worker_status in workers_status)\n        ):\n            message: Tuple[int, WorkerStatus, Any] = master_workers_queue.get()\n            worker_index, worker_status, payload = message\n            workers_status[worker_index] = worker_status\n\n            if worker_status == WorkerStatus.Success:\n                progresses[worker_index] = progresses_length[worker_index]\n                progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Running:\n                progress = cast(int, payload)\n                progresses[worker_index] = progress\n\n                if next(generation) % nb_workers == 0:\n                    progress_bars.update(progresses)\n            elif worker_status == WorkerStatus.Error:\n                progress_bars.set_error(worker_index)\n\n        results = results_promise.get()\n\n        return data_type.reduce(results, reduce_extra)\n\n    return closure\n\n\nclass pandarallel:\n    @classmethod\n    def initialize(\n        cls,\n        shm_size_mb=None,\n        nb_workers=NB_PHYSICAL_CORES,\n        progress_bar=False,\n        verbose=2,\n        use_memory_fs: Optional[bool] = None,\n    ) -> None:\n        show_progress_bars = progress_bar\n        is_memory_fs_available = Path(MEMORY_FS_ROOT).exists()\n\n        use_memory_fs = (\n            use_memory_fs if use_memory_fs is not None else is_memory_fs_available\n        )\n\n        parallelize = (\n            parallelize_with_memory_file_system\n            if use_memory_fs\n            else parallelize_with_pipe\n        )\n\n        if use_memory_fs and not is_memory_fs_available:\n            raise SystemError(\"Memory file system is not available\")\n\n        if verbose >= 2:\n            print(f\"INFO: Pandarallel will run on {nb_workers} workers.\")\n\n            message = (\n                (\n                    \"INFO: Pandarallel will use Memory file system to transfer data \"\n                    \"between the main process and workers.\"\n                )\n                if use_memory_fs\n                else (\n                    \"INFO: Pandarallel will use standard multiprocessing data transfer \"\n                    \"(pipe) to transfer data between the main process and workers.\"\n                )\n            )\n\n            print(message)\n\n            if ON_WINDOWS and verbose >= 2:\n                print()\n                print(\n                    (\n                        \"WARNING: You are on Windows. If you detect any issue with \"\n                        \"pandarallel, be sure you checked out the Troubleshooting page:\"\n                    )\n                )\n                print(\"https://nalepae.github.io/pandarallel/troubleshooting/\")\n\n        progress_bars_in_user_defined_function = (\n            ProgressBarsType.InUserDefinedFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_user_defined_function_multiply_by_number_of_columns = (\n            ProgressBarsType.InUserDefinedFunctionMultiplyByNumberOfColumns\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        progress_bars_in_work_function = (\n            ProgressBarsType.InWorkFunction\n            if show_progress_bars\n            else ProgressBarsType.No\n        )\n\n        # DataFrame\n        pd.DataFrame.parallel_apply = parallelize(\n            nb_workers, DataFrame.Apply, progress_bars_in_user_defined_function\n        )\n        pd.DataFrame.parallel_applymap = parallelize(\n            nb_workers,\n            DataFrame.ApplyMap,\n            progress_bars_in_user_defined_function_multiply_by_number_of_columns,\n        )\n\n        # DataFrame GroupBy\n        PandaDataFrameGroupBy.parallel_apply = parallelize(\n            nb_workers, DataFrameGroupBy.Apply, progress_bars_in_user_defined_function\n        )\n\n        # Expanding GroupBy\n        PandasExpandingGroupby.parallel_apply = parallelize(\n            nb_workers, ExpandingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Rolling GroupBy\n        PandasRollingGroupby.parallel_apply = parallelize(\n            nb_workers, RollingGroupBy.Apply, progress_bars_in_work_function\n        )\n\n        # Series\n        pd.Series.parallel_apply = parallelize(\n            nb_workers, Series.Apply, progress_bars_in_user_defined_function\n        )\n        pd.Series.parallel_map = parallelize(nb_workers, Series.Map, show_progress_bars)\n\n        # Series Rolling\n        pd.core.window.Rolling.parallel_apply = parallelize(\n            nb_workers, SeriesRolling.Apply, progress_bars_in_user_defined_function\n        )\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
      "content": "import multiprocessing\nimport os\nimport shutil\nimport sys\nfrom abc import ABC, abstractmethod\nfrom enum import Enum\nfrom itertools import count\nfrom time import time_ns\nfrom typing import Callable, List, Union\n\nfrom .utils import WorkerStatus\n\nINTERVAL_NS = 250_000_000  # 0.25 sec\nMINIMUM_TERMINAL_WIDTH = 72\n\n\nclass ProgressBarsType(int, Enum):\n    No = 0\n    InUserDefinedFunction = 1\n    InUserDefinedFunctionMultiplyByNumberOfColumns = 2\n    InWorkFunction = 3\n\n\nclass ProgressBars(ABC):\n    @abstractmethod\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        ...\n\n    @abstractmethod\n    def update(self, values: List[int]) -> None:\n        ...\n\n    def set_error(self, index: int) -> None:\n        pass\n\n\nclass ProgressState:\n    def __init__(self, chunk_size: int) -> None:\n        self.last_put_iteration = 0\n        self.next_put_iteration = max(chunk_size // 100, 1)\n        self.last_put_time = time_ns()\n\n\ndef is_notebook_lab() -> bool:\n    try:\n        shell: str = get_ipython().__class__.__name__  # type: ignore\n\n        # Shell: Google Colab\n        # TerminalInteractiveShell: Terminal running IPython\n        # ZMQInteractiveShell: Jupyter notebook/lab or qtconsole\n        return shell in {\"Shell\", \"ZMQInteractiveShell\"}\n    except NameError:\n        # Probably standard Python interpreter\n        return False\n\n\nclass ProgressBarsConsole(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        self.__show = show\n        self.__bars = [[0, max] for max in maxs]\n        self.__width = self.__get_width()\n\n        self.__lines = self.__update_lines()\n\n        if show:\n            sys.stdout.write(\"\\n\".join(self.__lines))\n            sys.stdout.flush()\n\n    def __get_width(self) -> int:\n        try:\n            columns = shutil.get_terminal_size().columns\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except AttributeError:\n            # Python 2\n            pass\n\n        try:\n            columns = int(os.popen(\"stty size\", \"r\").read().split()[1])\n            return max(MINIMUM_TERMINAL_WIDTH, columns - 1)\n        except:\n            return MINIMUM_TERMINAL_WIDTH\n\n    def __remove_displayed_lines(self) -> None:\n        if len(self.__bars) >= 1:\n            sys.stdout.write(\"\\b\" * len(self.__lines[-1]))\n\n        if len(self.__bars) >= 2:\n            sys.stdout.write(\"\\033M\" * (len(self.__lines) - 1))\n\n        self.__lines = []\n\n    def __update_line(self, done: int, total: int) -> str:\n        if total == 0:\n            percent = 0\n        else:\n            percent = done / total\n        bar = (\":\" * int(percent * 40)).ljust(40, \" \")\n        percent = round(percent * 100, 2)\n        format = \" {percent:6.2f}% {bar:s} | {done:8d} / {total:8d} |\"\n        ret = format.format(percent=percent, bar=bar, done=done, total=total)\n        return ret[: self.__width].ljust(self.__width, \" \")\n\n    def __update_lines(self) -> List[str]:\n        return [self.__update_line(value, max) for value, max in self.__bars]\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            self.__bars[index][0] = value\n\n        self.__remove_displayed_lines()\n        self.__lines = self.__update_lines()\n\n        sys.stdout.write(\"\\n\".join(self.__lines))\n        sys.stdout.flush()\n\n\nclass ProgressBarsNotebookLab(ProgressBars):\n    def __init__(self, maxs: List[int], show: bool) -> None:\n        \"\"\"Initialization.\n        Positional argument:\n        maxs - List containing the max value of each progress bar\n        \"\"\"\n        self.__show = show\n\n        if not show:\n            return\n\n        from IPython.display import display\n        from ipywidgets import HBox, IntProgress, Label, VBox\n\n        self.__bars = [\n            HBox(\n                [\n                    IntProgress(0, 0, max, description=\"{:.2f}%\".format(0)),\n                    Label(\"{} / {}\".format(0, max)),\n                ]\n            )\n            for max in maxs\n        ]\n\n        display(VBox(self.__bars))\n\n    def update(self, values: List[int]) -> None:\n        \"\"\"Update a bar value.\n        Positional arguments:\n        values - The new values of each bar\n        \"\"\"\n        if not self.__show:\n            return\n\n        for index, value in enumerate(values):\n            bar, label = self.__bars[index].children\n\n            label.value = \"{} / {}\".format(value, bar.max)\n            \n            bar.value = value\n\n            if value >= bar.max:\n                bar.bar_style = \"success\"\n\n            if bar.max != 0:\n                bar.description = \"{:.2f}%\".format(bar.value / bar.max * 100)\n\n    def set_error(self, index: int) -> None:\n        \"\"\"Set a bar on error\"\"\"\n        if not self.__show:\n            return\n\n        bar, _ = self.__bars[index].children\n        bar.bar_style = \"danger\"\n\n\ndef get_progress_bars(\n    maxs: List[int], show\n) -> Union[ProgressBarsNotebookLab, ProgressBarsConsole]:\n    return (\n        ProgressBarsNotebookLab(maxs, show)\n        if is_notebook_lab()\n        else ProgressBarsConsole(maxs, show)\n    )\n\n\ndef progress_wrapper(\n    user_defined_function: Callable,\n    master_workers_queue: multiprocessing.Queue,\n    index: int,\n    chunk_size: int,\n) -> Callable:\n    \"\"\"Wrap the function to apply in a function which monitor the part of work already\n    done.\n    \"\"\"\n    counter = count()\n    state = ProgressState(chunk_size)\n\n    def closure(*user_defined_function_args, **user_defined_functions_kwargs):\n        iteration = next(counter)\n\n        if iteration == state.next_put_iteration:\n            time_now = time_ns()\n            master_workers_queue.put_nowait((index, WorkerStatus.Running, iteration))\n\n            delta_t = time_now - state.last_put_time\n            delta_i = iteration - state.last_put_iteration\n\n            state.next_put_iteration += (\n                max(int((delta_i / delta_t) * INTERVAL_NS), 1) if delta_t != 0 else 1\n            )\n\n            state.last_put_iteration = iteration\n            state.last_put_time = time_now\n\n        return user_defined_function(\n            *user_defined_function_args, **user_defined_functions_kwargs\n        )\n\n    return closure\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/__init__.py",
      "content": "from .core import pandarallel\n\n__version__ = \"1.6.5\"\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/expanding_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass ExpandingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasExpandingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasExpandingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.expanding(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass Series:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n\n    class Map(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.Series, **kwargs\n        ) -> Iterator[pd.Series]:\n            for chunk_ in chunk(data.size, nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            return data.map(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/series_rolling.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\n\nimport pandas as pd\nfrom pandas.core.window.rolling import Rolling\n\nfrom ..utils import chunk\nfrom .generic import DataType\n\n\nclass SeriesRolling:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, rolling: Rolling, **kwargs\n        ) -> Iterator[pd.Series]:\n            chunks = chunk(rolling.obj.size, nb_workers, rolling.window)\n\n            for chunk_ in chunks:\n                yield rolling.obj[chunk_]\n\n        @staticmethod\n        def get_work_extra(data: Rolling) -> Dict[str, Any]:\n            return {\n                \"attributes\": {\n                    attribute: getattr(data, attribute)\n                    for attribute in data._attributes\n                }\n            }\n\n        @staticmethod\n        def work(\n            data: pd.Series,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.Series:\n            attributes: Dict[str, Any] = extra[\"attributes\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            result = data.rolling(**attributes).apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs\n            )\n\n            return result if worker_index == 0 else result[attributes[\"window\"] :]\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.Series], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/__init__.py",
      "content": "from .dataframe import DataFrame\nfrom .dataframe_groupby import DataFrameGroupBy\nfrom .expanding_groupby import ExpandingGroupBy\nfrom .rolling_groupby import RollingGroupBy\nfrom .generic import DataType\nfrom .series import Series\nfrom .series_rolling import SeriesRolling\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe.py",
      "content": "from typing import Any, Callable, Dict, Iterable, Iterator\nfrom types import GeneratorType\n\nimport pandas as pd\n\nfrom ..utils import chunk, get_axis_int\nfrom .generic import DataType\n\n\nclass DataFrame:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            user_defined_function_kwargs = kwargs[\"user_defined_function_kwargs\"]\n\n            axis_int = get_axis_int(user_defined_function_kwargs)\n            opposite_axis_int = 1 - axis_int\n\n            for chunk_ in chunk(data.shape[opposite_axis_int], nb_workers):\n                yield data.iloc[chunk_] if axis_int == 1 else data.iloc[:, chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.apply(\n                user_defined_function,\n                *user_defined_function_args,\n                **user_defined_function_kwargs,\n            )\n\n        @staticmethod\n        def get_reduce_extra(\n            data: Any, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"axis\": get_axis_int(user_defined_function_kwargs)}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            if isinstance(datas, GeneratorType):\n                datas = list(datas)\n            axis = 0 if isinstance(datas[0], pd.Series) else 1 - extra[\"axis\"]\n            return pd.concat(datas, copy=False, axis=axis)\n\n    class ApplyMap(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: pd.DataFrame, **kwargs\n        ) -> Iterator[pd.DataFrame]:\n            for chunk_ in chunk(data.shape[0], nb_workers):\n                yield data.iloc[chunk_]\n\n        @staticmethod\n        def work(\n            data: pd.DataFrame,\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> pd.DataFrame:\n            return data.applymap(user_defined_function)\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[pd.DataFrame], extra: Dict[str, Any]\n        ) -> pd.DataFrame:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/generic.py",
      "content": "from abc import ABC, abstractmethod\nfrom typing import Any, Callable, Dict, Iterable, Iterator\n\n\nclass DataType(ABC):\n    @staticmethod\n    @abstractmethod\n    def get_chunks(nb_workers: int, data: Any, **kwargs) -> Iterator[Any]:\n        ...\n\n    @staticmethod\n    def get_work_extra(data: Any) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def work(\n        data: Any,\n        user_defined_function: Callable,\n        user_defined_function_args: tuple,\n        user_defined_function_kwargs: Dict[str, Any],\n        extra: Dict[str, Any],\n    ) -> Any:\n        ...\n\n    @staticmethod\n    def get_reduce_extra(\n        data: Any, user_defined_function_kwargs: Dict[str, Any]\n    ) -> Dict[str, Any]:\n        return dict()\n\n    @staticmethod\n    @abstractmethod\n    def reduce(datas: Iterable[Any], extra: Dict[str, Any]) -> Any:\n        ...\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/rolling_groupby.py",
      "content": "import multiprocessing\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple\n\nimport pandas as pd\nfrom pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n\nfrom ..utils import WorkerStatus, chunk, get_pandas_version\nfrom .generic import DataType\n\n\nclass RollingGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, data: PandasRollingGroupby, *args, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            pandas_version = get_pandas_version()\n\n            nb_items = (\n                len(data._groupby) if pandas_version < (1, 3) else data._grouper.ngroups\n            )\n\n            chunks = chunk(nb_items, nb_workers)\n\n            iterator = (\n                iter(data._groupby)\n                if pandas_version < (1, 3)\n                else data._grouper.get_iterator(data.obj)\n            )\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def get_work_extra(data: PandasRollingGroupby):\n            attributes = {\n                attribute: getattr(data, attribute) for attribute in data._attributes\n            }\n\n            return {\"attributes\": attributes}\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[pd.DataFrame]:\n            show_progress_bars: bool = extra[\"show_progress_bars\"]\n            master_workers_queue: multiprocessing.Queue = extra[\"master_workers_queue\"]\n            worker_index: int = extra[\"worker_index\"]\n\n            def compute_result(\n                iteration: int,\n                attributes: Dict[str, Any],\n                index: int,\n                df: pd.DataFrame,\n                user_defined_function: Callable,\n                user_defined_function_args: tuple,\n                user_defined_function_kwargs: Dict[str, Any],\n            ) -> pd.DataFrame:\n                item = df.rolling(**attributes).apply(\n                    user_defined_function,\n                    *user_defined_function_args,\n                    **user_defined_function_kwargs\n                )\n\n                item.index = pd.MultiIndex.from_product([[index], item.index])\n\n                if show_progress_bars:\n                    master_workers_queue.put_nowait(\n                        (worker_index, WorkerStatus.Running, iteration)\n                    )\n\n                return item\n\n            attributes = extra[\"attributes\"]\n            attributes.pop(\"_grouper\", None)\n\n            dfs = (\n                compute_result(\n                    iteration,\n                    attributes,\n                    index,\n                    df,\n                    user_defined_function,\n                    user_defined_function_args,\n                    user_defined_function_kwargs,\n                )\n                for iteration, (index, df) in enumerate(data)\n            )\n\n            return pd.concat(dfs)\n\n        @staticmethod\n        def reduce(datas: Iterable[pd.DataFrame], extra: Dict[str, Any]) -> pd.Series:\n            return pd.concat(datas, copy=False)\n"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe_groupby.py",
      "content": "import itertools\nfrom typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple, Union, cast\n\nimport pandas as pd\nfrom pandas.core.groupby.generic import DataFrameGroupBy as PandasDataFrameGroupBy\n\nfrom ..utils import chunk, df_indexed_like, get_pandas_version\nfrom .generic import DataType\n\n\nclass DataFrameGroupBy:\n    class Apply(DataType):\n        @staticmethod\n        def get_chunks(\n            nb_workers: int, dataframe_groupby: PandasDataFrameGroupBy, **kwargs\n        ) -> Iterator[List[Tuple[int, pd.DataFrame]]]:\n            chunks = chunk(dataframe_groupby.ngroups, nb_workers)\n            iterator = iter(dataframe_groupby)\n\n            for chunk_ in chunks:\n                yield [next(iterator) for _ in range(chunk_.stop - chunk_.start)]\n\n        @staticmethod\n        def work(\n            data: List[Tuple[int, pd.DataFrame]],\n            user_defined_function: Callable,\n            user_defined_function_args: tuple,\n            user_defined_function_kwargs: Dict[str, Any],\n            extra: Dict[str, Any],\n        ) -> List[Tuple[int, pd.DataFrame, bool]]:\n            def compute_result(\n                key: int, df: pd.DataFrame\n            ) -> Tuple[int, pd.DataFrame, bool]:\n                result = user_defined_function(\n                    df, *user_defined_function_args, **user_defined_function_kwargs\n                )\n                mutated = not df_indexed_like(result, df.axes)\n                return key, result, mutated\n\n            return [compute_result(key, df) for key, df in data]\n\n        @staticmethod\n        def get_reduce_extra(\n            data: PandasDataFrameGroupBy, user_defined_function_kwargs: Dict[str, Any]\n        ) -> Dict[str, Any]:\n            return {\"df_groupby\": data}\n\n        @staticmethod\n        def reduce(\n            datas: Iterable[List[Tuple[int, pd.DataFrame, bool]]], extra: Dict[str, Any]\n        ) -> pd.Series:\n            def get_args(\n                keys: List[int],\n                values: List[pd.DataFrame],\n                df_groupby: PandasDataFrameGroupBy,\n            ) -> Union[\n                Tuple[List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[int], List[pd.DataFrame]],\n                Tuple[pd.DataFrame, List[pd.DataFrame]],\n            ]:\n                pandas_version = get_pandas_version()\n\n                if pandas_version < (1, 3):\n                    return keys, values\n                elif pandas_version < (1, 4):\n                    return df_groupby._selected_obj, keys, values\n                else:\n                    return df_groupby._selected_obj, values\n\n            df_groupby: PandasDataFrameGroupBy = extra[\"df_groupby\"]\n\n            results = itertools.chain.from_iterable(datas)\n            keys, values, mutated = zip(*results)\n\n            keys = cast(List[int], keys)\n            values = cast(List[pd.DataFrame], values)\n            mutated = cast(List[bool], mutated)\n\n            args = get_args(keys, values, df_groupby)\n  \n            return df_groupby._wrap_applied_output(*args, not_indexed_same=mutated)\n"
    }
  ],
  "ErrorMessage": "tests/test_pandarallel.py::test_memory_fs_root_environment_variable FAILED                                                                                                                    [100%]\n\n============================================================================================= FAILURES ==============================================================================================\n_____________________________________________________________________________ test_memory_fs_root_environment_variable ______________________________________________________________________________\n\nmonkeypatch = <_pytest.monkeypatch.MonkeyPatch object at 0x72990446c9d0>\n\n    def test_memory_fs_root_environment_variable(monkeypatch):\n        monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n        from pandarallel import core\n    \n>       assert core.MEMORY_FS_ROOT == \"/test\"\nE       AssertionError: assert '/dev/shm' == '/test'\nE         \nE         - /test\nE         + /dev/shm\n\ntests/test_pandarallel.py:385: AssertionError\n========================================================================================= warnings summary ==========================================================================================\ntests/test_pandarallel.py: 16 warnings\n  /home/user/Documents/repoben/buggycode/nalepae_pandarallel/tests/test_pandarallel.py:234: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n    res = df.applymap(func_dataframe_applymap)\n\n-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html\n====================================================================================== short test summary info ======================================================================================\nFAILED tests/test_pandarallel.py::test_memory_fs_root_environment_variable - AssertionError: assert '/dev/shm' == '/test'\n============================================================================ 1 failed, 216 passed, 16 warnings in 7.67s =============================================================================",
  "Patch": "--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -1,3 +1,4 @@\n+import importlib\n import math\n \n import numpy as np\n@@ -381,5 +382,6 @@\n def test_memory_fs_root_environment_variable(monkeypatch):\n     monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n     from pandarallel import core\n+    importlib.reload(core)\n \n     assert core.MEMORY_FS_ROOT == \"/test\"\n",
  "BuggyCodeLocation": [
    {
      "file": "nalepae_pandarallel/tests/test_pandarallel.py",
      "function": null,
      "content_all": {
        "1": "import math\n",
        "2": "\n",
        "3": "import numpy as np\n",
        "385": "    assert core.MEMORY_FS_ROOT == \"/test\"\n"
      },
      "content_change": {}
    }
  ],
  "Issue": {
    "title": "Test for MEMORY_FS_ROOT environment variable does not reflect changes during runtime",
    "description": "When running the `test_memory_fs_root_environment_variable` test, changes to the `MEMORY_FS_ROOT` environment variable are not reflected in the `core.MEMORY_FS_ROOT` value if it has already been imported. As a result, updates to this environment variable are not acknowledged, which can lead to tests passing with outdated values that do not match the intended testing scenario. This issue can cause confusion and make debugging harder when environment variable changes are expected to influence the system's behavior dynamically. To ensure the test accurately reflects changes to the environment variable, the `core` module should be reloaded after setting the `MEMORY_FS_ROOT` variable.",
    "explanation": "### Summary of the Issue\n\nThe issue revolves around a test function that verifies if changes to the `MEMORY_FS_ROOT` environment variable during runtime are accurately reflected within the code. Specifically, when the `MEMORY_FS_ROOT` environment variable is altered at runtime, these changes are not correctly propagated to the `core.MEMORY_FS_ROOT` value if the `core` module has already been imported. This can lead to tests passing with incorrect values, causing confusion and making debugging more difficult, especially when environment variable changes should dynamically influence the system's behavior.\n\n### Details of the Issue\n\n- **Title**: The test for `MEMORY_FS_ROOT` environment variable does not reflect changes during runtime.\n- **Description**: The core issue is that changes made to the `MEMORY_FS_ROOT` environment variable do not get picked up by the `core.MEMORY_FS_ROOT` if the `core` module is already loaded. This can result in tests working with outdated values, which undermines the effectiveness of the test and misleads developers about the actual state of configurations. The proposed solution is to reload the `core` module after setting the `MEMORY_FS_ROOT` variable to ensure the changes are acknowledged.\n\n### Details of the Commit\n\nThe commit introduced the following changes:\n\n- **Commit Message**: The commit message details that an import module `importlib` was added and the `core` module is reloaded during the test to ensure that the updated environment variable is correctly reflected in the `core.MEMORY_FS_ROOT` value.\n- **Filename**: Changes were made to `nalepae_pandarallel/tests/test_pandarallel.py`.\n- **Commit Patch**: An import statement for `importlib` was added at the start of the test file, and the test function for `test_memory_fs_root_environment_variable` was modified to reload the `core` module after setting the environment variable.\n\n### Explanation of the Solution\n\n#### Cause of the Issue\n\nThe root cause of the issue is related to the way Python handles module imports and environment variables. When a module is imported in Python, its top-level code is executed, and the module is cached. If the `MEMORY_FS_ROOT` environment variable is changed after the `core` module has already been loaded, the module will not pick up any new changes to the environment variables because it only checks their values at the time of import.\n\n#### Solution Provided in the Commit\n\nThe solution involves the following steps:\n\n1. **Importing `importlib`**: This module provides functionality for importing, reloading, and utilizing modules programmatically.\n2. **Reloading the `core` Module**: After setting the `MEMORY_FS_ROOT` environment variable using `monkeypatch.setenv`, the `core` module is reloaded using `importlib.reload`. By reloading the module, the code within the module is re-executed, and it will pick up the new value of the `MEMORY_FS_ROOT` environment variable.\n\nThe reload mechanism ensures that any changes to the environment variables are read and applied within the module, thus aligning the module's internal state with the current value of the environment variable.\n\n### Conclusion\n\nThe commit rectifies the issue by ensuring that changes to the `MEMORY_FS_ROOT` environment variable are propagated to the `core` module during runtime. This is done by reloading the `core` module after setting the environment variable. This approach validates that the module correctly picks up the new environment variable values, ensuring that the tests are accurate and reflective of the actual conditions. Consequently, this resolves the problem of tests passing with outdated values and enhances the reliability and clarity of the test outcomes."
  },
  "Explain": "### Summary of the Issue\n\nThe issue revolves around a test function that verifies if changes to the `MEMORY_FS_ROOT` environment variable during runtime are accurately reflected within the code. Specifically, when the `MEMORY_FS_ROOT` environment variable is altered at runtime, these changes are not correctly propagated to the `core.MEMORY_FS_ROOT` value if the `core` module has already been imported. This can lead to tests passing with incorrect values, causing confusion and making debugging more difficult, especially when environment variable changes should dynamically influence the system's behavior.\n\n### Details of the Issue\n\n- **Title**: The test for `MEMORY_FS_ROOT` environment variable does not reflect changes during runtime.\n- **Description**: The core issue is that changes made to the `MEMORY_FS_ROOT` environment variable do not get picked up by the `core.MEMORY_FS_ROOT` if the `core` module is already loaded. This can result in tests working with outdated values, which undermines the effectiveness of the test and misleads developers about the actual state of configurations. The proposed solution is to reload the `core` module after setting the `MEMORY_FS_ROOT` variable to ensure the changes are acknowledged.\n\n### Details of the Commit\n\nThe commit introduced the following changes:\n\n- **Commit Message**: The commit message details that an import module `importlib` was added and the `core` module is reloaded during the test to ensure that the updated environment variable is correctly reflected in the `core.MEMORY_FS_ROOT` value.\n- **Filename**: Changes were made to `nalepae_pandarallel/tests/test_pandarallel.py`.\n- **Commit Patch**: An import statement for `importlib` was added at the start of the test file, and the test function for `test_memory_fs_root_environment_variable` was modified to reload the `core` module after setting the environment variable.\n\n### Explanation of the Solution\n\n#### Cause of the Issue\n\nThe root cause of the issue is related to the way Python handles module imports and environment variables. When a module is imported in Python, its top-level code is executed, and the module is cached. If the `MEMORY_FS_ROOT` environment variable is changed after the `core` module has already been loaded, the module will not pick up any new changes to the environment variables because it only checks their values at the time of import.\n\n#### Solution Provided in the Commit\n\nThe solution involves the following steps:\n\n1. **Importing `importlib`**: This module provides functionality for importing, reloading, and utilizing modules programmatically.\n2. **Reloading the `core` Module**: After setting the `MEMORY_FS_ROOT` environment variable using `monkeypatch.setenv`, the `core` module is reloaded using `importlib.reload`. By reloading the module, the code within the module is re-executed, and it will pick up the new value of the `MEMORY_FS_ROOT` environment variable.\n\nThe reload mechanism ensures that any changes to the environment variables are read and applied within the module, thus aligning the module's internal state with the current value of the environment variable.\n\n### Conclusion\n\nThe commit rectifies the issue by ensuring that changes to the `MEMORY_FS_ROOT` environment variable are propagated to the `core` module during runtime. This is done by reloading the `core` module after setting the environment variable. This approach validates that the module correctly picks up the new environment variable values, ensuring that the tests are accurate and reflective of the actual conditions. Consequently, this resolves the problem of tests passing with outdated values and enhances the reliability and clarity of the test outcomes.",
  "Source": "Human",
  "Token": 1142,
  "Command": [
    "pytest tests"
  ],
  "FilteredCode": [
    {
      "path": "nalepae_pandarallel/tests/test_pandarallel.py",
      "content": "1 import math\n2 \n3 import numpy as np\n4 import pandas as pd\n5 import pytest\n6 from pandarallel import pandarallel\n7 \n8 \n9 @pytest.fixture(params=(1000, 1))\n10 def df_size(request):\n11     return request.param\n12 \n13 \n14 @pytest.fixture(params=(False, True))\n15 def progress_bar(request):\n16     return request.param\n17 \n18 \n19 @pytest.fixture(params=(None, False))\n20 def use_memory_fs(request):\n21     return request.param\n22 \n23 \n24 @pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\n25 def exception(request):\n26     return request.param\n27 \n28 \n29 @pytest.fixture(params=(\"named\", \"anonymous\"))\n30 def func_dataframe_apply_axis_0(request):\n31     def func(x):\n32         return max(x) - min(x)\n33 \n34     return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n35 \n36 \n37 @pytest.fixture(params=(\"named\", \"anonymous\"))\n38 def func_dataframe_apply_axis_1(request):\n39     def func(x):\n40         return math.sin(x.a**2) + math.sin(x.b**2)\n41 \n42     return dict(\n43         named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n44     )[request.param]\n45 \n46 \n47 @pytest.fixture(params=(\"named\", \"anonymous\"))\n48 def func_dataframe_applymap(request):\n49     def func(x):\n50         return math.sin(x**2) - math.cos(x**2)\n51 \n52     return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n53         request.param\n54     ]\n55 \n56 \n57 @pytest.fixture(params=(\"named\", \"anonymous\"))\n58 def func_series_map(request):\n59     def func(x):\n60         return math.log10(math.sqrt(math.exp(x**2)))\n61 \n62     return dict(\n63         named=func, anonymous=lambda x: math.log10(math.sqrt(m(...truncated)"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/__init__.py",
      "content": "1 from .core import pandarallel\n2 \n3 __version__ = \"1.6.5\""
    },
    {
      "path": "nalepae_pandarallel/pandarallel/core.py",
      "content": "1 import multiprocessing\n2 import os\n3 import pickle\n4 from itertools import count\n5 from multiprocessing.managers import SyncManager\n6 from pathlib import Path\n7 from tempfile import NamedTemporaryFile\n8 from typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, cast\n9 \n10 import dill\n11 import pandas as pd\n12 import psutil\n13 from pandas.core.groupby import DataFrameGroupBy as PandaDataFrameGroupBy\n14 from pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\n15 from pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n16 \n17 from .data_types import (\n18     DataFrame,\n19     DataFrameGroupBy,\n20     DataType,\n21     ExpandingGroupBy,\n22     RollingGroupBy,\n23     Series,\n24     SeriesRolling,\n25 )\n26 from .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\n27 from .utils import WorkerStatus\n28 \n29 ON_WINDOWS = os.name == \"nt\"\n30 CONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n31 \n32 # Root of Memory File System\n33 MEMORY_FS_ROOT = os.environ.get(\"MEMO(...truncated)"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
      "content": "1 import multiprocessing\n2 import os\n3 import shutil\n4 import sys\n5 from abc import ABC, abstractmethod\n6 from enum import Enum\n7 from itertools import count\n8 from time import time_ns\n9 from typing import Callable, List, Union\n10 \n11 from .utils import WorkerStatus\n12 \n13 INTERVAL_NS = 250_000_000  # 0.25 sec\n14 MINIMUM_TERMINAL_WIDTH = 72\n15 \n16 \n17 class ProgressBarsType(int, Enum):\n18     No = 0\n19     InUserDefinedFunction = 1\n20     InUserDefinedFunctionMultiplyByNumberOfColumns = 2\n21     InWorkFunction = 3\n22 \n23 \n24 class ProgressBars(ABC):\n25     @abstractmethod\n26     def __init__(self, maxs: List[int], show: bool) -> None:\n27         ...\n28 \n29     @abstractmethod\n30     def update(self, values: List[int]) -> None:\n31         ...\n32 \n33     def set_error(self, index: int) -> None:\n34        (...truncated)"
    },
    {
      "path": "nalepae_pandarallel/pandarallel/data_types/dataframe_groupby.py",
      "content": "1 import itertools\n2 from typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple, Union, cast\n3 \n4 import pandas as pd\n5 from pandas.core.groupby.generic import DataFrameGroupBy as PandasDataFrameGroupBy\n6 \n7 from ..utils import chunk, df_indexed_like, get_pandas_version\n8 from .generic import DataType\n9 \n10 \n11 class DataFrameGroupBy:\n12     class Apply(DataType):\n13         @staticmethod\n14         def get_chunks(\n15             nb_workers: int, dataframe_groupby: PandasDataFrameGroupBy, **kwargs\n16   (...truncated)"
    }
  ],
  "TokenAll": 14231,
  "FilteredLevel": 1500,
  "Results": {
    "model": "GPT-4o",
    "Difficulty": "Medium",
    "issue_origin": {
      "title": "Potential Exception Handling Issue",
      "description": "In the file 'nalepae_pandarallel/tests/test_pandarallel.py', the function 'func_dataframe_apply_axis_1' may encounter an AttributeError if 'x' does not have the attributes 'a' and 'b'. Please ensure that 'x' always has these attributes or add appropriate exception handling.",
      "explanation": "### Issue Summary\n\nThe issue reported is an AttributeError that may occur in the function `func_dataframe_apply_axis_1` within the file `test_pandarallel.py`. This function attempts to access attributes `a` and `b` on an object `x`, which may not always have these attributes. The recommendation is to ensure that `x` always has these attributes or to add appropriate exception handling.\n\n### Code Summary\n\nHere is a brief of the current state of the code:\n\n#### File: `test_pandarallel.py`\n\nThe code has various pytest fixtures that define functions. The problematic function is within the `func_dataframe_apply_axis_1` fixture as follows:\n\n```python\n@pytest.fixture(params=(None, False))\ndef func_dataframe_apply_axis_1(request):\n    def func(x):\n        return math.sin(x.a**2) + math.sin(x.b**2)\n    \n    return dict(\n        named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n    )[request.param]\n```\n\nThis function accesses `x.a` and `x.b`, which can raise an `AttributeError` if `x` does not have these attributes.\n\n### Commit Summary\n\nThe commit likely added exception handling to resolve this issue. Below is an example of what the commit might look like:\n\n```diff\n@pytest.fixture(params=(None, False))\ndef func_dataframe_apply_axis_1(request):\n    def func(x):\n-       return math.sin(x.a**2) + math.sin(x.b**2)\n+       try:\n+           return math.sin(x.a**2) + math.sin(x.b**2)\n+       except AttributeError:\n+           return float('nan')  # or some default/fallback value\n\n    return dict(\n-       named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n+       named=func, anonymous=lambda x: (lambda: math.sin(x.a**2) + math.sin(x.b**2) if hasattr(x, 'a') and hasattr(x, 'b') else float('nan'))()\n    )[request.param]\n```\n\n### Explanation of the Commit\n\nThe commit adds exception handling using a `try`/`except` block as well as a conditional check within the lambda function to handle cases where `x` does not have the attributes `a` and `b`.\n\n#### Detailed Changes:\n\n1. **Try/Except Block in Named Function:**\n   - The function `func` now encloses the attribute access within a `try` block.\n   - If an `AttributeError` occurs, a `float(\"nan\")` (indicating \"Not-a-Number\") is returned, which is often used as a placeholder for invalid numerical results.\n   \n   ```python\n   def func(x):\n       try:\n           return math.sin(x.a**2) + math.sin(x.b**2)\n       except AttributeError:\n           return float('nan')  # or some default/fallback value\n   ```\n\n2. **Conditional Check in Anonymous Function:**\n   - A conditional check using `hasattr(x, 'a') and hasattr(x, 'b')` is inserted into the lambda function.\n   - If both attributes `a` and `b` exist, the calculation is performed; otherwise, it returns `float('nan')`.\n   \n   ```python\n   anonymous=lambda x: (lambda: math.sin(x.a**2) + math.sin(x.b**2) if hasattr(x, 'a') and hasattr(x, 'b') else float('nan'))()\n   ```\n\n### How It Solves the Issue\n\n1. **Prevent AttributeError:**\n   - By using a `try`/`except` block in the named function and `hasattr` checks in the anonymous function, we safeguard access to `x.a` and `x.b`.\n   - This prevents the function from crashing due to missing attributes, logging an `AttributeError` as a caught exception instead.\n\n2. **Graceful Degradation:**\n   - Instead of failing, the function now returns a `float('nan')` when `x` lacks the necessary attributes, ensuring the overall test suite can proceed without interruption.\n   - This approach provides a clear indication (through `nan`) that an invalid/undefined condition was encountered during the calculations.\n\n3. **Maintain Test Integrity:**\n   - Ensuring that the function handles missing attributes gracefully allows tests to remain robust and resilient to edge cases, maintaining their integrity.\n\n### Conclusion\n\nThis commit effectively mitigates the potential `AttributeError` by incorporating exception handling and attribute existence checks. This change enhances the robustness of `func_dataframe_apply_axis_1`, ensuring the test code handles edge cases gracefully without breaking the test execution."
    },
    "issue_message": {
      "title": "Environment variable not being set correctly in test",
      "description": "The test function test_memory_fs_root_environment_variable in tests/test_pandarallel.py is failing because the MEMORY_FS_ROOT environment variable is expected to be '/test' but is actually '/dev/shm'. Please ensure that the environment variable is being set correctly within the test.",
      "explanation": "### Summary of the Issue\nThe issue titled \"Environment variable not being set correctly in test\" revolves around the `MEMORY_FS_ROOT` environment variable not being correctly set within a test function in the `tests/test_pandarallel.py` file. The test function `test_memory_fs_root_environment_variable` is intended to set the `MEMORY_FS_ROOT` environment variable to `/test`, but it fails because the value found is instead `/dev/shm`.\n\n### Content of the Commit\nThe commit should:\n1. Correctly set the `MEMORY_FS_ROOT` environment variable in the test context.\n2. Ensure that the `pandarallel` module is reloaded after setting the environment variable so that the new environment value is read afresh.\n\nHere’s a potential commit that addresses these issues:\n\n```python\n# Import necessary modules to reload pandarallel core\nimport importlib\n\ndef test_memory_fs_root_environment_variable(monkeypatch):\n    # Set the MEMORY_FS_ROOT environment variable\n    monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n    \n    # Reload the pandarallel core module to apply the new environment variable\n    from pandarallel import core\n    importlib.reload(core)\n    \n    # Assert the new value of MEMORY_FS_ROOT\n    assert core.MEMORY_FS_ROOT == \"/test\"\n```\n\n### Explanation of the Solution\nThe test failure occurs because the `MEMORY_FS_ROOT` environment variable change is not reflected in the `pandarallel.core` module, leading to the older value being used. The primary issue here is that the `pandarallel` module is likely imported and initialized before the environment variable is set, causing the original value of the environment variable to persist.\n\nHere is a detailed step-by-step explanation of the cause and solution:\n\n#### Cause of the Issue\n1. **Environment Variable Set After Import**:\n    - The environment variable `MEMORY_FS_ROOT` is being set using the `monkeypatch.setenv()` method.\n    - The `pandarallel` module is then imported via `from pandarallel import core`, but this import occurs *after* the environment variable is set.\n    - If the `core` module is already loaded in memory (which is likely), it does not pick up the newly set environment variable. Therefore, it retains the value it had when it was first loaded, which is `'/dev/shm'`.\n\n2. **Module Caching**:\n    - Python caches imported modules. Once `pandarallel.core` is loaded, it remains in the `sys.modules` dictionary.\n    - This means that subsequent changes to the environment variables do not affect the already loaded modules.\n\n#### Solution Provided by the Commit\n1. **Set Environment Variable Using `monkeypatch`**:\n    - The `monkeypatch` utility from `pytest` is used to set the environment variable to `/test`.\n\n2. **Reload the `core` Module**:\n    - After the environment variable is set, the `pandarallel.core` module is reloaded using `importlib.reload(core)`.\n    - This ensures that the `core` module reads the new environment variable value.\n\n3. **Assertion**:\n    - Finally, the new value of `MEMORY_FS_ROOT` is asserted to be `/test`, verifying that the environment variable has been correctly set and recognized by the `pandarallel.core` module.\n\nBy reloading the `core` module after setting the environment variable, the test ensures that any environment-dependent initialization in `core` occurs with the new environment setting, thus resolving the issue.\n\nThis systematic reloading approach is crucial for the test to work as intended, especially dealing with environmental changes that must be reflected in already-loaded modules."
    },
    "issue_ground": {
      "title": "Test for MEMORY_FS_ROOT environment variable does not reflect changes during runtime",
      "description": "When running the `test_memory_fs_root_environment_variable` test, changes to the `MEMORY_FS_ROOT` environment variable are not reflected in the `core.MEMORY_FS_ROOT` value if it has already been imported. As a result, updates to this environment variable are not acknowledged, which can lead to tests passing with outdated values that do not match the intended testing scenario. This issue can cause confusion and make debugging harder when environment variable changes are expected to influence the system's behavior dynamically. To ensure the test accurately reflects changes to the environment variable, the `core` module should be reloaded after setting the `MEMORY_FS_ROOT` variable.",
      "explanation": "### Summary of the Issue\n\nThe issue arises when running the `test_memory_fs_root_environment_variable` test in the `tests/test_pandarallel.py` file. The test aims to check the behavior of the `MEMORY_FS_ROOT` environment variable during runtime. However, any changes to the `MEMORY_FS_ROOT` environment variable are not being reflected in the `core.MEMORY_FS_ROOT` if it has already been imported. Consequently, modifying this environment variable doesn't influence the system's behavior dynamically as expected, leading to outdated values causing the tests to fail.\n\n### Detailed Analysis\n\nThe core of the issue lies in the environment variable `MEMORY_FS_ROOT` which is being set using `monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")`. This change is supposed to affect the `MEMORY_FS_ROOT` variable within the `core` module. However, because Python modules are cached after their first import, subsequent changes to environment variables do not affect the already loaded modules unless they are explicitly reloaded.\n\nThe error message indicates that the expected value `'/test'` does not match the actual value `'/dev/shm'`, suggesting that the `MEMORY_FS_ROOT` value was already set and cached at `'/dev/shm'` before the test ran.\n\n### Specific Code\n\n#### Test Code (tests/test_pandarallel.py)\n```python\n9 @pytest.fixture(params=(None, False))\n20 def use_memory_fs(request):\n21     return request.param\n22\n23\n24 @pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\n...\n298 import pytest\n299 \n300 def test_memory_fs_root_environment_variable(monkeypatch):\n301     monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n302     from pandarallel import core\n303 \n304     assert core.MEMORY_FS_ROOT == \"/test\"\n```\n\n#### Core Module (pandarallel/core.py)\n```python\n29 ON_WINDOWS = os.name == \"nt\"\n30 CONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n31\n32 # Root of Memory File System\n33 MEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")\n```\n\n### Explanation of the Commit\n\nTo address this issue, the solution would involve reloading the `core` module after setting the `MEMORY_FS_ROOT` environment variable. This ensures that any change to the environment variable is reflected properly within the module’s variables and constants. One way to accomplish this is by using Python’s built-in `importlib.reload` function to reload the module after changing the environment variable.\n\nThe commit would look something like this:\n\n```python\nimport pytest\nimport importlib\n\ndef test_memory_fs_root_environment_variable(monkeypatch):\n    monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n    \n    # Reload the core module to reflect the updated environment variable\n    from pandarallel import core\n    importlib.reload(core)\n    \n    assert core.MEMORY_FS_ROOT == \"/test\"\n```\n\n### How the Commit Solves the Issue\n\n1. **Setting the Environment Variable**: The `monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")` command sets the `MEMORY_FS_ROOT` environment variable to `'/test'`.\n\n2. **Reloading the Module**: The key part of the solution is the addition of `importlib.reload(core)`. This command forces the re-importation of the `core` module, which will now read the updated environment variable `MEMORY_FS_ROOT`.\n\n3. **Verification**: After the reload, the `core.MEMORY_FS_ROOT` is now evaluated again, and this time it captures the newly set value `'/test'`. The assertion `assert core.MEMORY_FS_ROOT == \"/test\"` will pass.\n\nBy reloading the module, the environment variable changes during runtime are accurately reflected, ensuring that the test scenario matches the intended behavior. This makes the debugging process simpler and eliminates confusion caused by outdated values."
    },
    "issue_ground_truth": {
      "title": "Test for MEMORY_FS_ROOT environment variable does not reflect changes during runtime",
      "description": "When running the `test_memory_fs_root_environment_variable` test, changes to the `MEMORY_FS_ROOT` environment variable are not reflected in the `core.MEMORY_FS_ROOT` value if it has already been imported. As a result, updates to this environment variable are not acknowledged, which can lead to tests passing with outdated values that do not match the intended testing scenario. This issue can cause confusion and make debugging harder when environment variable changes are expected to influence the system's behavior dynamically. To ensure the test accurately reflects changes to the environment variable, the `core` module should be reloaded after setting the `MEMORY_FS_ROOT` variable.",
      "explanation": "### Summary of the Issue\n\nThe issue revolves around a test function that verifies if changes to the `MEMORY_FS_ROOT` environment variable during runtime are accurately reflected within the code. Specifically, when the `MEMORY_FS_ROOT` environment variable is altered at runtime, these changes are not correctly propagated to the `core.MEMORY_FS_ROOT` value if the `core` module has already been imported. This can lead to tests passing with incorrect values, causing confusion and making debugging more difficult, especially when environment variable changes should dynamically influence the system's behavior.\n\n### Details of the Issue\n\n- **Title**: The test for `MEMORY_FS_ROOT` environment variable does not reflect changes during runtime.\n- **Description**: The core issue is that changes made to the `MEMORY_FS_ROOT` environment variable do not get picked up by the `core.MEMORY_FS_ROOT` if the `core` module is already loaded. This can result in tests working with outdated values, which undermines the effectiveness of the test and misleads developers about the actual state of configurations. The proposed solution is to reload the `core` module after setting the `MEMORY_FS_ROOT` variable to ensure the changes are acknowledged.\n\n### Details of the Commit\n\nThe commit introduced the following changes:\n\n- **Commit Message**: The commit message details that an import module `importlib` was added and the `core` module is reloaded during the test to ensure that the updated environment variable is correctly reflected in the `core.MEMORY_FS_ROOT` value.\n- **Filename**: Changes were made to `nalepae_pandarallel/tests/test_pandarallel.py`.\n- **Commit Patch**: An import statement for `importlib` was added at the start of the test file, and the test function for `test_memory_fs_root_environment_variable` was modified to reload the `core` module after setting the environment variable.\n\n### Explanation of the Solution\n\n#### Cause of the Issue\n\nThe root cause of the issue is related to the way Python handles module imports and environment variables. When a module is imported in Python, its top-level code is executed, and the module is cached. If the `MEMORY_FS_ROOT` environment variable is changed after the `core` module has already been loaded, the module will not pick up any new changes to the environment variables because it only checks their values at the time of import.\n\n#### Solution Provided in the Commit\n\nThe solution involves the following steps:\n\n1. **Importing `importlib`**: This module provides functionality for importing, reloading, and utilizing modules programmatically.\n2. **Reloading the `core` Module**: After setting the `MEMORY_FS_ROOT` environment variable using `monkeypatch.setenv`, the `core` module is reloaded using `importlib.reload`. By reloading the module, the code within the module is re-executed, and it will pick up the new value of the `MEMORY_FS_ROOT` environment variable.\n\nThe reload mechanism ensures that any changes to the environment variables are read and applied within the module, thus aligning the module's internal state with the current value of the environment variable.\n\n### Conclusion\n\nThe commit rectifies the issue by ensuring that changes to the `MEMORY_FS_ROOT` environment variable are propagated to the `core` module during runtime. This is done by reloading the `core` module after setting the environment variable. This approach validates that the module correctly picks up the new environment variable values, ensuring that the tests are accurate and reflective of the actual conditions. Consequently, this resolves the problem of tests passing with outdated values and enhances the reliability and clarity of the test outcomes."
    },
    "location_origin": [
      {
        "file": "nalepae_pandarallel/tests/test_pandarallel.py",
        "function": {
          "38": "func_dataframe_apply_axis_1"
        },
        "content_all": {
          "35": "\n",
          "36": "\n",
          "37": "@pytest.fixture(params=(\"named\", \"anonymous\"))\n",
          "38": "def func_dataframe_apply_axis_1(request):\n",
          "39": "    def func(x):\n",
          "40": "        return math.sin(x.a**2) + math.sin(x.b**2)\n",
          "41": "\n",
          "42": "    return dict(\n",
          "43": "        named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n",
          "44": "    )[request.param]\n",
          "45": "\n"
        },
        "content_change": {
          "40": "        try:\n",
          "41": "            return math.sin(x.a**2) + math.sin(x.b**2)\n",
          "42": "        except AttributeError:\n",
          "43": "            return float('nan')  # or some default/fallback value\n",
          "43.1": "        named=func, anonymous=lambda x: (lambda: math.sin(x.a**2) + math.sin(x.b**2) if hasattr(x, 'a') and hasattr(x, 'b') else float('nan'))()\n"
        }
      }
    ],
    "location_message": [
      {
        "file": "nalepae_pandarallel/tests/test_pandarallel.py",
        "function": {
          "385": "test_memory_fs_root_environment_variable"
        },
        "content_all": {
          "382": "",
          "383": "def test_memory_fs_root_environment_variable(monkeypatch):",
          "384": "    monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")",
          "385": "    from pandarallel import core",
          "386": "",
          "387": "    assert core.MEMORY_FS_ROOT == \"/test\"",
          "388": ""
        },
        "content_change": {
          "385": "    from pandarallel import core",
          "386": "    import importlib",
          "387": "    importlib.reload(core)"
        }
      },
      {
        "file": "nalepae_pandarallel/pandarallel/core.py",
        "function": {
          "32": "Initialization"
        },
        "content_all": {
          "29": "ON_WINDOWS = os.name == \"nt\"",
          "30": "CONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")",
          "31": "",
          "32": "# Root of Memory File System",
          "33": "MEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")",
          "34": ""
        },
        "content_change": {
          "33": "MEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")"
        }
      }
    ],
    "location_ground": [
      {
        "file": "nalepae_pandarallel/tests/test_pandarallel.py",
        "function": {
          "300": "test_memory_fs_root_environment_variable"
        },
        "content_all": {
          "297": "",
          "298": "import pytest",
          "299": "",
          "300": "def test_memory_fs_root_environment_variable(monkeypatch):",
          "301": "    monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")",
          "302": "    from pandarallel import core",
          "303": "",
          "304": "    assert core.MEMORY_FS_ROOT == \"/test\"",
          "305": ""
        },
        "content_change": {
          "302": "    from pandarallel import core",
          "303": "",
          "304": "    import importlib",
          "305": "    importlib.reload(core)"
        }
      },
      {
        "file": "nalepae_pandarallel/pandarallel/core.py",
        "function": {
          "32": "Global scope"
        },
        "content_all": {
          "29": "ON_WINDOWS = os.name == \"nt\"",
          "30": "CONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")",
          "31": "",
          "32": "# Root of Memory File System",
          "33": "MEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")",
          "34": "",
          "35": ""
        },
        "content_change": {
          "33": "MEMORY_FS_ROOT = os.environ.get(\"MEMORY_FS_ROOT\", \"/dev/shm\")"
        }
      }
    ],
    "location_ground_exp": [
      {
        "file": "nalepae_pandarallel/tests/test_pandarallel.py",
        "function": {
          "70": "test_memory_fs_root_environment_variable"
        },
        "content_all": {
          "67": "    # the presence of the file would mean the environment variable setting was successful",
          "68": "    assert path.exists(None if use_memory_fs else True)",
          "69": "\n",
          "70": "def test_memory_fs_root_environment_variable(monkeypatch):",
          "71": "    import core",
          "72": "    core.MEMORY_FS_ROOT = 'initial_value'",
          "73": "    monkeypatch.setenv('MEMORY_FS_ROOT', 'new_value')",
          "74": "    import importlib",
          "75": "    importlib.reload(core)",
          "76": "    assert core.MEMORY_FS_ROOT == 'new_value'",
          "77": "\n",
          "78": "def test_other_functionality():"
        },
        "content_change": {
          "71": "    import core",
          "75": "    importlib.reload(core)"
        }
      },
      {
        "file": "nalepae_pandarallel/pandarallel/core.py",
        "function": {
          "29": "initialization at the start of module"
        },
        "content_all": {
          "26": "from .utils import WorkerStatus",
          "27": "\n",
          "28": "ON_WINDOWS = os.name == 'nt'",
          "29": "CONTEXT = multiprocessing.get_context('spawn' if ON_WINDOWS else 'fork')",
          "30": "\n",
          "31": "# Root of Memory File System",
          "32": "MEMORY_FS_ROOT = os.environ.get('MEMORY_FS_ROOT', '/default/memory_fs')",
          "33": "\n",
          "34": "# Other initialization"
        },
        "content_change": {
          "32": "MEMORY_FS_ROOT = os.environ.get('MEMORY_FS_ROOT', '/default/memory_fs')"
        }
      }
    ],
    "location_ground_truth": [
      {
        "file": "nalepae_pandarallel/tests/test_pandarallel.py",
        "function": null,
        "content_all": {
          "1": "import math\n",
          "2": "\n",
          "3": "import numpy as np\n",
          "385": "    assert core.MEMORY_FS_ROOT == \"/test\"\n"
        },
        "content_change": {}
      }
    ],
    "patch_i": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -38,7 +38,13 @@\n def func_dataframe_apply_axis_1(request):\n     def func(x):\n-        return math.sin(x.a**2) + math.sin(x.b**2)\n+        try:\n+            return math.sin(x.a**2) + math.sin(x.b**2)\n+        except AttributeError:\n+            return float('nan')  # or some default/fallback value\n\n     return dict(\n-        named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n+        named=func, anonymous=lambda x: (lambda: math.sin(x.a**2) + math.sin(x.b**2) if hasattr(x, 'a') and hasattr(x, 'b') else float('nan'))()\n     )[request.param]\n",
    "patch_im": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -7,6 +7,14 @@\n \n import numpy as np\n import pandas as pd\n import pytest\n from pandarallel import pandarallel\n+\n+# Import necessary modules to reload pandarallel core\n+import importlib\n+\n+def test_memory_fs_root_environment_variable(monkeypatch):\n+    # Set the MEMORY_FS_ROOT environment variable\n+    monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n+    \n+    # Reload the pandarallel core module to apply the new environment variable\n+    from pandarallel import core\n+    importlib.reload(core)\n+    \n+    # Assert the new value of MEMORY_FS_ROOT\n+    assert core.MEMORY_FS_ROOT == \"/test\"\n",
    "patch_il": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -38,7 +38,11 @@\n def func_dataframe_apply_axis_1(request):\n     def func(x):\n-        return math.sin(x.a**2) + math.sin(x.b**2)\n+        try:\n+            return math.sin(x.a**2) + math.sin(x.b**2)\n+        except AttributeError:\n+            return float('nan')  # or some default/fallback value\n \n     return dict(\n-        named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n+        named=func, anonymous=lambda x: (lambda: math.sin(x.a**2) + math.sin(x.b**2) if hasattr(x, 'a') and hasattr(x, 'b') else float('nan'))()\n     )[request.param]\n",
    "patch_iml": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -382,7 +382,9 @@\n \n def test_memory_fs_root_environment_variable(monkeypatch):\n     monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n-    from pandarallel import core\n+\n+    # Import necessary modules to reload pandarallel core\n+    import importlib\n     from pandarallel import core\n+    importlib.reload(core)\n \n     assert core.MEMORY_FS_ROOT == \"/test\"\n",
    "patch_ground": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -297,7 +297,9 @@\n import pytest\n\n def test_memory_fs_root_environment_variable(monkeypatch):\n-    monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n+    import importlib\n+    \n+    monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n     from pandarallel import core\n+    importlib.reload(core)\n \n     assert core.MEMORY_FS_ROOT == \"/test\"\n",
    "patch_ground_location": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -300,7 +300,9 @@ def test_memory_fs_root_environment_variable(monkeypatch):\n     monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n     from pandarallel import core\n \n+    import importlib\n+    importlib.reload(core)\n \n     assert core.MEMORY_FS_ROOT == \"/test\"\n\n",
    "patch_ground_exp": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -1,5 +1,6 @@\n import math\n \n+import importlib\n import numpy as np\n import pandas as pd\n import pytest\n@@ -68,10 +69,11 @@ def path():\n     return Path(\"/some/path\")\n \n def test_memory_fs_root_environment_variable(monkeypatch):\n-    import core\n+    from pandarallel import core\n     core.MEMORY_FS_ROOT = 'initial_value'\n     monkeypatch.setenv('MEMORY_FS_ROOT', 'new_value')\n-    import importlib\n+    importlib.reload(core)\n     assert core.MEMORY_FS_ROOT == 'new_value'\n \n def test_other_functionality():\n--- a/nalepae_pandarallel/pandarallel/core.py\n+++ b/nalepae_pandarallel/pandarallel/core.py\n@@ -30,7 +30,7 @@ CONTEXT = multiprocessing.get_context('spawn' if ON_WINDOWS else 'fork')\n \n # Root of Memory File System\n-MEMORY_FS_ROOT = os.environ.get('MEMORY_FS_ROOT', '/default/memory_fs')\n+def get_memory_fs_root():\n+    return os.environ.get('MEMORY_FS_ROOT', '/default/memory_fs')\n\n-MEMORY_FS_ROOT = get_memory_fs_root()\n+MEMORY_FS_ROOT = get_memory_fs_root()\n\n # Other initialization\n",
    "patch_ground_all": "\n--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -2,6 +2,7 @@ import math\n \n import numpy as np\n import pandas as pd\n+import importlib\n import pytest\n from pandarallel import pandarallel\n \n@@ -382,6 +383,8 @@ def test_memory_fs_root_environment_variable(monkeypatch):\n     # Modify the MEMORY_FS_ROOT environment variable\n     monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n \n+    # Reload the core module to reflect the environment variable change\n+    importlib.reload(core)\n+\n     assert core.MEMORY_FS_ROOT == \"/test\"\n",
    "patch_ground_truth": "--- a/nalepae_pandarallel/tests/test_pandarallel.py\n+++ b/nalepae_pandarallel/tests/test_pandarallel.py\n@@ -1,3 +1,4 @@\n+import importlib\n import math\n \n import numpy as np\n@@ -381,5 +382,6 @@\n def test_memory_fs_root_environment_variable(monkeypatch):\n     monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n     from pandarallel import core\n+    importlib.reload(core)\n \n     assert core.MEMORY_FS_ROOT == \"/test\"\n",
    "message": "tests/test_pandarallel.py::test_memory_fs_root_environment_variable FAILED                                                                                                                    [100%]\n\n============================================================================================= FAILURES ==============================================================================================\n_____________________________________________________________________________ test_memory_fs_root_environment_variable ______________________________________________________________________________\n\nmonkeypatch = <_pytest.monkeypatch.MonkeyPatch object at 0x72990446c9d0>\n\n    def test_memory_fs_root_environment_variable(monkeypatch):\n        monkeypatch.setenv(\"MEMORY_FS_ROOT\", \"/test\")\n        from pandarallel import core\n    \n>       assert core.MEMORY_FS_ROOT == \"/test\"\nE       AssertionError: assert '/dev/shm' == '/test'\nE         \nE         - /test\nE         + /dev/shm\n\ntests/test_pandarallel.py:385: AssertionError\n========================================================================================= warnings summary ==========================================================================================\ntests/test_pandarallel.py: 16 warnings\n  /home/user/Documents/repoben/buggycode/nalepae_pandarallel/tests/test_pandarallel.py:234: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n    res = df.applymap(func_dataframe_applymap)\n\n-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html\n====================================================================================== short test summary info ======================================================================================\nFAILED tests/test_pandarallel.py::test_memory_fs_root_environment_variable - AssertionError: assert '/dev/shm' == '/test'\n============================================================================ 1 failed, 216 passed, 16 warnings in 7.67s =============================================================================",
    "CodeBase": [
      {
        "path": "nalepae_pandarallel/tests/test_pandarallel.py",
        "content": "1 import math\n2 \n3 import numpy as np\n4 import pandas as pd\n5 import pytest\n6 from pandarallel import pandarallel\n7 \n8 \n9 @pytest.fixture(params=(1000, 1))\n10 def df_size(request):\n11     return request.param\n12 \n13 \n14 @pytest.fixture(params=(False, True))\n15 def progress_bar(request):\n16     return request.param\n17 \n18 \n19 @pytest.fixture(params=(None, False))\n20 def use_memory_fs(request):\n21     return request.param\n22 \n23 \n24 @pytest.fixture(params=(RuntimeError, AttributeError, ZeroDivisionError))\n25 def exception(request):\n26     return request.param\n27 \n28 \n29 @pytest.fixture(params=(\"named\", \"anonymous\"))\n30 def func_dataframe_apply_axis_0(request):\n31     def func(x):\n32         return max(x) - min(x)\n33 \n34     return dict(named=func, anonymous=lambda x: max(x) - min(x))[request.param]\n35 \n36 \n37 @pytest.fixture(params=(\"named\", \"anonymous\"))\n38 def func_dataframe_apply_axis_1(request):\n39     def func(x):\n40         return math.sin(x.a**2) + math.sin(x.b**2)\n41 \n42     return dict(\n43         named=func, anonymous=lambda x: math.sin(x.a**2) + math.sin(x.b**2)\n44     )[request.param]\n45 \n46 \n47 @pytest.fixture(params=(\"named\", \"anonymous\"))\n48 def func_dataframe_applymap(request):\n49     def func(x):\n50         return math.sin(x**2) - math.cos(x**2)\n51 \n52     return dict(named=func, anonymous=lambda x: math.sin(x**2) - math.cos(x**2))[\n53         request.param\n54     ]\n55 \n56 \n57 @pytest.fixture(params=(\"named\", \"anonymous\"))\n58 def func_series_map(request):\n59     def func(x):\n60         return math.log10(math.sqrt(math.exp(x**2)))\n61 \n62     return dict(\n63         named=func, anonymous=lambda x: math.log10(math.sqrt(m(...truncated)"
      },
      {
        "path": "nalepae_pandarallel/pandarallel/__init__.py",
        "content": "1 from .core import pandarallel\n2 \n3 __version__ = \"1.6.5\""
      },
      {
        "path": "nalepae_pandarallel/pandarallel/core.py",
        "content": "1 import multiprocessing\n2 import os\n3 import pickle\n4 from itertools import count\n5 from multiprocessing.managers import SyncManager\n6 from pathlib import Path\n7 from tempfile import NamedTemporaryFile\n8 from typing import Any, Callable, Dict, Iterator, Optional, Tuple, Type, cast\n9 \n10 import dill\n11 import pandas as pd\n12 import psutil\n13 from pandas.core.groupby import DataFrameGroupBy as PandaDataFrameGroupBy\n14 from pandas.core.window.expanding import ExpandingGroupby as PandasExpandingGroupby\n15 from pandas.core.window.rolling import RollingGroupby as PandasRollingGroupby\n16 \n17 from .data_types import (\n18     DataFrame,\n19     DataFrameGroupBy,\n20     DataType,\n21     ExpandingGroupBy,\n22     RollingGroupBy,\n23     Series,\n24     SeriesRolling,\n25 )\n26 from .progress_bars import ProgressBarsType, get_progress_bars, progress_wrapper\n27 from .utils import WorkerStatus\n28 \n29 ON_WINDOWS = os.name == \"nt\"\n30 CONTEXT = multiprocessing.get_context(\"spawn\" if ON_WINDOWS else \"fork\")\n31 \n32 # Root of Memory File System\n33 MEMORY_FS_ROOT = os.environ.get(\"MEMO(...truncated)"
      },
      {
        "path": "nalepae_pandarallel/pandarallel/progress_bars.py",
        "content": "1 import multiprocessing\n2 import os\n3 import shutil\n4 import sys\n5 from abc import ABC, abstractmethod\n6 from enum import Enum\n7 from itertools import count\n8 from time import time_ns\n9 from typing import Callable, List, Union\n10 \n11 from .utils import WorkerStatus\n12 \n13 INTERVAL_NS = 250_000_000  # 0.25 sec\n14 MINIMUM_TERMINAL_WIDTH = 72\n15 \n16 \n17 class ProgressBarsType(int, Enum):\n18     No = 0\n19     InUserDefinedFunction = 1\n20     InUserDefinedFunctionMultiplyByNumberOfColumns = 2\n21     InWorkFunction = 3\n22 \n23 \n24 class ProgressBars(ABC):\n25     @abstractmethod\n26     def __init__(self, maxs: List[int], show: bool) -> None:\n27         ...\n28 \n29     @abstractmethod\n30     def update(self, values: List[int]) -> None:\n31         ...\n32 \n33     def set_error(self, index: int) -> None:\n34        (...truncated)"
      },
      {
        "path": "nalepae_pandarallel/pandarallel/data_types/dataframe_groupby.py",
        "content": "1 import itertools\n2 from typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple, Union, cast\n3 \n4 import pandas as pd\n5 from pandas.core.groupby.generic import DataFrameGroupBy as PandasDataFrameGroupBy\n6 \n7 from ..utils import chunk, df_indexed_like, get_pandas_version\n8 from .generic import DataType\n9 \n10 \n11 class DataFrameGroupBy:\n12     class Apply(DataType):\n13         @staticmethod\n14         def get_chunks(\n15             nb_workers: int, dataframe_groupby: PandasDataFrameGroupBy, **kwargs\n16   (...truncated)"
      }
    ],
    "CommitSHA": "261a652cddb219ac353ff803e81646c08b72fc6f"
  },
  "Score": {
    "Difficulty": "Medium",
    "issue_origin": {
      "Title": 4,
      "Description": 5,
      "Reproducibility": 5,
      "Relevance": 6,
      "Explanation": 6,
      "Overall": 5
    },
    "issue_message": {
      "Title": 6,
      "Description": 5,
      "Reproducibility": 5,
      "Relevance": 7,
      "Explanation": 6,
      "Overall": 5
    },
    "issue_ground": {
      "Title": 10,
      "Description": 9,
      "Reproducibility": 10,
      "Relevance": 8,
      "Explanation": 10,
      "Overall": 9
    },
    "issue_ground_truth": {
      "title": "Test for MEMORY_FS_ROOT environment variable does not reflect changes during runtime",
      "description": "When running the `test_memory_fs_root_environment_variable` test, changes to the `MEMORY_FS_ROOT` environment variable are not reflected in the `core.MEMORY_FS_ROOT` value if it has already been imported. As a result, updates to this environment variable are not acknowledged, which can lead to tests passing with outdated values that do not match the intended testing scenario. This issue can cause confusion and make debugging harder when environment variable changes are expected to influence the system's behavior dynamically. To ensure the test accurately reflects changes to the environment variable, the `core` module should be reloaded after setting the `MEMORY_FS_ROOT` variable.",
      "explanation": "### Summary of the Issue\n\nThe issue revolves around a test function that verifies if changes to the `MEMORY_FS_ROOT` environment variable during runtime are accurately reflected within the code. Specifically, when the `MEMORY_FS_ROOT` environment variable is altered at runtime, these changes are not correctly propagated to the `core.MEMORY_FS_ROOT` value if the `core` module has already been imported. This can lead to tests passing with incorrect values, causing confusion and making debugging more difficult, especially when environment variable changes should dynamically influence the system's behavior.\n\n### Details of the Issue\n\n- **Title**: The test for `MEMORY_FS_ROOT` environment variable does not reflect changes during runtime.\n- **Description**: The core issue is that changes made to the `MEMORY_FS_ROOT` environment variable do not get picked up by the `core.MEMORY_FS_ROOT` if the `core` module is already loaded. This can result in tests working with outdated values, which undermines the effectiveness of the test and misleads developers about the actual state of configurations. The proposed solution is to reload the `core` module after setting the `MEMORY_FS_ROOT` variable to ensure the changes are acknowledged.\n\n### Details of the Commit\n\nThe commit introduced the following changes:\n\n- **Commit Message**: The commit message details that an import module `importlib` was added and the `core` module is reloaded during the test to ensure that the updated environment variable is correctly reflected in the `core.MEMORY_FS_ROOT` value.\n- **Filename**: Changes were made to `nalepae_pandarallel/tests/test_pandarallel.py`.\n- **Commit Patch**: An import statement for `importlib` was added at the start of the test file, and the test function for `test_memory_fs_root_environment_variable` was modified to reload the `core` module after setting the environment variable.\n\n### Explanation of the Solution\n\n#### Cause of the Issue\n\nThe root cause of the issue is related to the way Python handles module imports and environment variables. When a module is imported in Python, its top-level code is executed, and the module is cached. If the `MEMORY_FS_ROOT` environment variable is changed after the `core` module has already been loaded, the module will not pick up any new changes to the environment variables because it only checks their values at the time of import.\n\n#### Solution Provided in the Commit\n\nThe solution involves the following steps:\n\n1. **Importing `importlib`**: This module provides functionality for importing, reloading, and utilizing modules programmatically.\n2. **Reloading the `core` Module**: After setting the `MEMORY_FS_ROOT` environment variable using `monkeypatch.setenv`, the `core` module is reloaded using `importlib.reload`. By reloading the module, the code within the module is re-executed, and it will pick up the new value of the `MEMORY_FS_ROOT` environment variable.\n\nThe reload mechanism ensures that any changes to the environment variables are read and applied within the module, thus aligning the module's internal state with the current value of the environment variable.\n\n### Conclusion\n\nThe commit rectifies the issue by ensuring that changes to the `MEMORY_FS_ROOT` environment variable are propagated to the `core` module during runtime. This is done by reloading the `core` module after setting the environment variable. This approach validates that the module correctly picks up the new environment variable values, ensuring that the tests are accurate and reflective of the actual conditions. Consequently, this resolves the problem of tests passing with outdated values and enhances the reliability and clarity of the test outcomes."
    }
  }
}