<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
  <meta charset="utf-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <title>pygho.hodata.ParallelPreprocess &mdash; PyGHO 0.0.1 documentation</title>
      <link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
      <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
  <!--[if lt IE 9]>
    <script src="../../../_static/js/html5shiv.min.js"></script>
  <![endif]-->
  
        <script src="../../../_static/jquery.js?v=5d32c60e"></script>
        <script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
        <script src="../../../_static/documentation_options.js?v=d45e8c67"></script>
        <script src="../../../_static/doctools.js?v=888ff710"></script>
        <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
    <script src="../../../_static/js/theme.js"></script>
    <link rel="index" title="Index" href="../../../genindex.html" />
    <link rel="search" title="Search" href="../../../search.html" /> 
</head>

<body class="wy-body-for-nav"> 
  <div class="wy-grid-for-nav">
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >

          
          
          <a href="../../../index.html" class="icon icon-home">
            PyGHO
          </a>
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>
        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
              <p class="caption" role="heading"><span class="caption-text">Notes</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../notes/installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../notes/miniexample.html">Minimal Example</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../notes/datastructure.html">Refined Basic Data Structure</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../notes/hodata.html">Efficient High Order Data Processing</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../notes/operator.html">Operators</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Advanced Tutorial</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../notes/multtensor.html">Multiple Tensor</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Package Reference</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules/backend.html">pygho.backend package</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../modules/hodata.html">pygho.hodata package</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../modules/honn.html">pygho.honn package</a></li>
</ul>

        </div>
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="../../../index.html">PyGHO</a>
      </nav>

      <div class="wy-nav-content">
        <div class="rst-content">
          <div role="navigation" aria-label="Page navigation">
  <ul class="wy-breadcrumbs">
      <li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
          <li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
      <li class="breadcrumb-item active">pygho.hodata.ParallelPreprocess</li>
      <li class="wy-breadcrumbs-aside">
      </li>
  </ul>
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
             
  <h1>Source code for pygho.hodata.ParallelPreprocess</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">torch</span>
<span class="kn">from</span> <span class="nn">torch_geometric.data</span> <span class="kn">import</span> <span class="n">InMemoryDataset</span><span class="p">,</span> <span class="n">Data</span> <span class="k">as</span> <span class="n">PygData</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Callable</span><span class="p">,</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">Iterable</span>
<span class="kn">from</span> <span class="nn">multiprocessing</span> <span class="kn">import</span> <span class="n">Pool</span>
<span class="kn">from</span> <span class="nn">pqdm.processes</span> <span class="kn">import</span> <span class="n">pqdm</span>
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
<span class="kn">from</span> <span class="nn">.Wrapper</span> <span class="kn">import</span> <span class="n">_repr</span>
<span class="kn">import</span> <span class="nn">os.path</span> <span class="k">as</span> <span class="nn">osp</span>


<div class="viewcode-block" id="ParallelPreprocessDataset">
<a class="viewcode-back" href="../../../modules/hodata.html#pygho.hodata.ParallelPreprocess.ParallelPreprocessDataset">[docs]</a>
<span class="k">class</span> <span class="nc">ParallelPreprocessDataset</span><span class="p">(</span><span class="n">InMemoryDataset</span><span class="p">):</span>
<span class="w">    </span><span class="sd">&#39;&#39;&#39;</span>
<span class="sd">    Parallelly transform a PyG dataset.</span>

<span class="sd">    This dataset class allows parallel preprocessing of a list of PyGData or PyGDataset instances.</span>

<span class="sd">    Args:</span>
<span class="sd">    </span>
<span class="sd">    - root (str): The directory to save processed data.</span>
<span class="sd">    - data_list (Iterable[PygData]): A list of PygData or PygDataset instances.</span>
<span class="sd">    - pre_transform (Callable[[PygData], PygData]): A function that maps PygData to PygData. It is executed only once for all data and is typically a tuple sampler.</span>
<span class="sd">    - num_worker (int): The number of processes for parallel preprocessing. It can be set to the number of available CPU cores.</span>
<span class="sd">    - processedname (Optional[str]): The name to save the processed data. If None, the name will be a hash of the pre_transform function.</span>
<span class="sd">    - transform (Optional[Callable[[PygData], PygData]]): A function to dynamically transform data during data loading.</span>
<span class="sd">    &#39;&#39;&#39;</span>

    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
                 <span class="n">root</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
                 <span class="n">data_list</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">PygData</span><span class="p">],</span>
                 <span class="n">pre_transform</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">PygData</span><span class="p">],</span> <span class="n">PygData</span><span class="p">],</span>
                 <span class="n">num_worker</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
                 <span class="n">processedname</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
                 <span class="n">transform</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Callable</span><span class="p">[[</span><span class="n">PygData</span><span class="p">],</span> <span class="n">PygData</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">tmp_data_list</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">data_list</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">num_worker</span> <span class="o">=</span> <span class="n">num_worker</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">processedname</span> <span class="o">=</span> <span class="n">processedname</span>
        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">root</span><span class="p">,</span>
                         <span class="n">pre_transform</span><span class="o">=</span><span class="n">pre_transform</span><span class="p">,</span>
                         <span class="n">transform</span><span class="o">=</span><span class="n">transform</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">slices</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">processed_paths</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>

    <span class="nd">@property</span>
    <span class="k">def</span> <span class="nf">processed_file_names</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
        <span class="k">return</span> <span class="s1">&#39;data.pt&#39;</span>

    <span class="nd">@property</span>
    <span class="k">def</span> <span class="nf">processed_dir</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">processedname</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
            <span class="k">return</span> <span class="n">osp</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
                <span class="bp">self</span><span class="o">.</span><span class="n">root</span><span class="p">,</span>
                <span class="sa">f</span><span class="s1">&#39;processed__</span><span class="si">{</span><span class="n">_repr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pre_transform</span><span class="p">)</span><span class="si">}</span><span class="s1">__</span><span class="si">{</span><span class="n">_repr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pre_filter</span><span class="p">)</span><span class="si">}</span><span class="s1">&#39;</span>
            <span class="p">)</span>
        <span class="k">else</span><span class="p">:</span>
            <span class="k">return</span> <span class="n">osp</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">root</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;processed__</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">processedname</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>

<div class="viewcode-block" id="ParallelPreprocessDataset.process">
<a class="viewcode-back" href="../../../modules/hodata.html#pygho.hodata.ParallelPreprocess.ParallelPreprocessDataset.process">[docs]</a>
    <span class="k">def</span> <span class="nf">process</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_worker</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
            <span class="n">data_list</span> <span class="o">=</span> <span class="n">pqdm</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmp_data_list</span><span class="p">,</span>
                             <span class="bp">self</span><span class="o">.</span><span class="n">pre_transform</span><span class="p">,</span>
                             <span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_worker</span><span class="p">)</span>
        <span class="k">else</span><span class="p">:</span>
            <span class="n">data_list</span> <span class="o">=</span> <span class="p">[</span>
                <span class="bp">self</span><span class="o">.</span><span class="n">pre_transform</span><span class="p">(</span><span class="n">_</span><span class="p">)</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmp_data_list</span><span class="p">)</span>
            <span class="p">]</span>
        <span class="n">torch</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">collate</span><span class="p">(</span><span class="n">data_list</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">processed_paths</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span></div>
</div>

</pre></div>

           </div>
          </div>
          <footer>

  <hr/>

  <div role="contentinfo">
    <p>&#169; Copyright 2023.</p>
  </div>

  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
    provided by <a href="https://readthedocs.org">Read the Docs</a>.
   

</footer>
        </div>
      </div>
    </section>
  </div>
  <script>
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script> 

</body>
</html>