  <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
  <html>

  <!-- ======================================================================= -->
  <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.8.2/jquery.min.js" type="text/javascript"></script>
  <style type="text/css">
    body {
      font-family: "Crimson Text","HelveticaNeue-Light", "Helvetica Neue Light", "Helvetica Neue", Helvetica, Arial, "Lucida Grande", sans-serif;
      font-weight:300;
      font-size:18px;
      margin-left: auto;
      margin-right: auto;
      width: 100%;
    }

    pre {
      background-color: #f6f8fa;
      padding: 16px;
    }

    code {
      font-family: "SFMono-Regular","Consolas","Liberation Mono","Menlo",monospace;
      overflow: scroll;
    }

    .description {
      max-width: 1000px;
      padding: 0px;
    }

    h1 {
      font-family: "Source Sans Pro";
      font-weight:300;
    }

    div {
      max-width: 95%;
      margin:auto;
      padding: 10px;
    }

    .table-like {
      display: flex;
      flex-wrap: wrap;
      flex-flow: row wrap;
      justify-content: center;
    }

    .box {
      padding: 0px;
      text-align: center;
      width: 50%;
    }

    @media screen and (max-width: 1279px) {
      .box {
        width: 100%;
      }
    }

		.table-like hr {
			width: 100%;
      flex-basis: 100%;
      height: 0;
      margin: 0;
      border: 0;
		}

    .disclaimerbox {
      background-color: #eee;
      border: 1px solid #eeeeee;
      border-radius: 10px ;
      -moz-border-radius: 10px ;
      -webkit-border-radius: 10px ;
      padding: 20px;
    }

    video.header-vid {
      height: 140px;
      border: 1px solid black;
      border-radius: 10px ;
      -moz-border-radius: 10px ;
      -webkit-border-radius: 10px ;
    }

    img {
      padding: 0;
      display: block;
      margin: 0 auto;
      max-height: 100%;
      max-width: 100%;
    }

    iframe {
      max-width: 100%;
    }

    img.header-img {
      height: 140px;
      border: 1px solid black;
      border-radius: 10px ;
      -moz-border-radius: 10px ;
      -webkit-border-radius: 10px ;
    }

    img.rounded {
      border: 1px solid #eeeeee;
      border-radius: 10px ;
      -moz-border-radius: 10px ;
      -webkit-border-radius: 10px ;
    }

    a:link,a:visited
    {
      color: #1367a7;
      text-decoration: none;
    }
    a:hover {
      color: #208799;
    }

    td.dl-link {
      height: 160px;
      text-align: center;
      font-size: 22px;
    }

    .layered-paper-big { /* modified from: http://css-tricks.com/snippets/css/layered-paper/ */
      box-shadow:
              0px 0px 1px 1px rgba(0,0,0,0.35), /* The top layer shadow */
              5px 5px 0 0px #fff, /* The second layer */
              5px 5px 1px 1px rgba(0,0,0,0.35), /* The second layer shadow */
              10px 10px 0 0px #fff, /* The third layer */
              10px 10px 1px 1px rgba(0,0,0,0.35), /* The third layer shadow */
              15px 15px 0 0px #fff, /* The fourth layer */
              15px 15px 1px 1px rgba(0,0,0,0.35), /* The fourth layer shadow */
              20px 20px 0 0px #fff, /* The fifth layer */
              20px 20px 1px 1px rgba(0,0,0,0.35), /* The fifth layer shadow */
              25px 25px 0 0px #fff, /* The fifth layer */
              25px 25px 1px 1px rgba(0,0,0,0.35); /* The fifth layer shadow */
      margin-left: 10px;
      margin-right: 45px;
    }


    .layered-paper { /* modified from: http://css-tricks.com/snippets/css/layered-paper/ */
      box-shadow:
              0px 0px 1px 1px rgba(0,0,0,0.35), /* The top layer shadow */
              5px 5px 0 0px #fff, /* The second layer */
              5px 5px 1px 1px rgba(0,0,0,0.35), /* The second layer shadow */
              10px 10px 0 0px #fff, /* The third layer */
              10px 10px 1px 1px rgba(0,0,0,0.35); /* The third layer shadow */
      margin-top: 5px;
      margin-left: 10px;
      margin-right: 30px;
      margin-bottom: 5px;
    }

    .vert-cent {
      position: relative;
        top: 50%;
        transform: translateY(-50%);
    }

    hr
    {
      border: 0;
      height: 1px;
      max-width: 1100px;
      background-image: linear-gradient(to right, rgba(0, 0, 0, 0), rgba(0, 0, 0, 0.75), rgba(0, 0, 0, 0));
    }

    #authors td {
      padding-bottom:5px;
      padding-top:30px;
    }

    .mySlides {display: none}

    /* Slideshow container */
    .slideshow-container {
      max-width: 1280px;
      position: relative;
      margin: auto;
    }

    /* Next & previous buttons */
    .prev, .next {
      cursor: pointer;
      position: absolute;
      top: 50%;
      width: auto;
      padding: 16px;
      margin-top: -22px;
      color: rgb(0, 0, 0);
      font-weight: bold;
      font-size: 25px;
      transition: 0.6s ease;
      border-radius: 0 3px 3px 0;
      user-select: none;
    }

    /* Position the "next button" to the right */
    .next {
      right: 0;
      border-radius: 3px 0 0 3px;
    }

    /* On hover, add a black background color with a little bit see-through */
    .prev:hover, .next:hover {
      background-color: rgba(0,0,0,0.8);
    }

    /* Caption text */
    .caption {
      color: #000000;
      font-size: 25;
      width: 100%;
      text-align: center;
      padding: 0px;
    }

    /* Number text (1/3 etc) */
    .numbertext {
      color: #000000;
      font-size: 18px;
      padding: 8px 12px;
      position: absolute;
      top: 0;
    }

    /* The dots/bullets/indicators */
    .dot {
      cursor: pointer;
      height: 15px;
      width: 15px;
      margin: 0 2px;
      background-color: #bbb;
      border-radius: 50%;
      display: inline-block;
      transition: background-color 0.6s ease;
    }

    .active, .dot:hover {
      background-color: #717171;
    }

    /* Fading animation */
    .fade {
      animation-name: fade;
      animation-duration: 1.5s;
    }

    @keyframes fade {
      from {opacity: .4} 
      to {opacity: 1}
    }

    /* On smaller screens, decrease text size */
    @media only screen and (max-width: 300px) {
      .prev, .next,.text {font-size: 11px}
    }
  </style>

  <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
  <script type="text/javascript" id="MathJax-script" async
    src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js">
  </script>
  <link href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:ital,wght@0,400;0,600;1,400&display=swap" rel="stylesheet">
  <link href="https://fonts.googleapis.com/css2?family=Crimson+Text:ital,wght@0,400;0,600;1,400&display=swap" rel="stylesheet">
  <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
  <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>

  <head>
  <div max-width=100%>
    <meta charset="utf-8" />
    <meta http-equiv="X-UA-Compatible" content="IE=edge" />
    <title>Self-Improving Robots: End-to-End Autonomous Visuomotor Reinforcement Learning</title>
  <meta name="HandheldFriendly" content="True" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <meta name="referrer" content="no-referrer-when-downgrade" />
</head>

<body>

      <br>
      <center><span style="font-size:44px;font-weight:bold;font-family:Source Sans Pro;">Self-Improving Robots:<br> End-to-End Autonomous Visuomotor Reinforcement Learning</span></center><br/>
      <div class="table-like" style="justify-content:space-evenly;max-width:900px;margin:auto;">
        <hr>
      </div>
      <div class="table-like" style="justify-content:space-evenly;max-width:900px;margin:auto;">
        <hr>
      </div>
      <div class="table-like" style="justify-content:space-evenly;max-width:900px;margin:auto;">
          <hr>
      </div>

      <div class="table-like" style="max-width:900px;">
        <hr>
      </div>

      <div style="max-width:900px">
        <hr>
        <center><h1>Overview</h1></center>
        <p>
          <b>Summary.</b> In imitation and reinforcement learning, the cost of
          human supervision limits the amount of data that robots can be
          trained on. An aspirational goal is to construct self-improving
          robots: robots that can learn and improve on their own, from
          autonomous interaction with minimal human supervision or
          oversight. Such robots could collect and train on much larger
          datasets, and thus learn more robust and performant policies.
          MEDAL++ is an autonomous reinforcement learning algorithm that trains a forward policy to do the task,
          and a backward policy to undo the task towards states visited by an expert. Starting with a small set 
          of demonstrations collected by an expert, the forward and backward policy interact with the environment 
          in a cyclic fashion, switching control after a fixed number of steps. Chaining the forward and backward 
          policies allows the robot to self-improve, minimizing the need for humans to reset the environment after 
          every episode. Importantly, MEDAL++ learns end-to-end from high-dimensional visual inputs and learns the 
          reward function from the expert demonstrations, bypassing the need for reward engineering. In contrast to 
          prior work, this allows MEDAL++ to be applied in the real world, improving the success rate by 30-70% 
          over behavior cloning policies in practice. Overall, MEDAL++ takes a step towards simple and general 
          self-improving robotic systems. 
        </p>

        <div class="table-like" style="justify-content:space-evenly;margin:auto;padding:0px;">
          <div>
            <video width="800" autoplay muted loop>
              <source src="resources/overview.mp4" type="video/mp4">
            </video>
          </div>
        </div>

        <p>
          This website features autonomous training and evaluation videos of MEDAL++ on three manipulation tasks using the Franka Panda arm: cloth hanging, peg insertion and bowl covering.
        </p>
      </div>

      <div style="width:1280; margin:0 auto; text-align=left" align="left">
        <hr>

        <center id="videos"><h1>Cloth Hanging</h1></center>
        <div class="description">
          <p style="text-align:center">
            This task requires the robot to grasp a cloth and hang it on a fixed hook. The cloth itself can be in any location and arbitrary shape.
          </p>
        </div>

        <div class="table-like" style="justify-content:space-evenly;margin:auto;padding:0px;">
          <div class="caption">Annotated Training Video</div>
          <p>
            An annotated segment of training using MEDAL++, showing how forward and backward policies interact to enable the robot to practice autonomously.
          </p>
          <div>
            <video width="800" autoplay muted loop>
              <source src="resources/cloth_hook/cloth_detailed.mp4" type="video/mp4">
            </video>
          </div>
          <div class="caption">Training Timelapse</div>
          <p>
            A timelapse of training, showing the diverse set of states visited by the robot and, failures and successes of the policy.
          </p>
          <div>
            <video width="800" autoplay muted loop>
              <source src="resources/cloth_hook/timelapse_compressed.mp4" type="video/mp4">
            </video>
          </div>
        </div>

        <div class="slideshow-container" id="cloth-hook-container">
          <div class="mySlides fade">
            <div class="numbertext">1 / 3</div>
            <div class="caption">Evaluation Videos</div>
            <div class="table-like" style="justify-content:space-evenly-width:100%;margin:auto;padding:0px;">
              <div class="box">
                <center><p align="center"><h3>MEDAL++<h3></p></center>
                <video width="400" autoplay muted loop>
                  <source src="resources/cloth_hook/medal/s1.mp4" type="video/mp4">
                </video>
              </div>

              <div class="box">
                <center><p align="center"><h3>BC</h3></p></center>
                <video width="400" autoplay muted loop>
                  <source src="resources/cloth_hook/bc/f2.mp4" type="video/mp4">
                </video>
              </div>
            </div>
          </div>

          <div class="mySlides fade">
            <div class="numbertext">2 / 3</div>
            <div class="caption">Evaluation Videos</div>
            <div class="table-like" style="justify-content:space-evenly-width:100%;margin:auto;padding:0px;">
              <div class="box">
                <center><p align="center"><h3>MEDAL++<h3></p></center>
                <video width="400" autoplay muted loop>
                  <source src="resources/cloth_hook/medal/s2.mp4" type="video/mp4">
                </video>
              </div>

              <div class="box">
                <center><p align="center"><h3>BC</h3></p></center>
                <video width="400" autoplay muted loop>
                  <source src="resources/cloth_hook/bc/f1.mp4" type="video/mp4">
                </video>
              </div>
            </div>
          </div>

          <div class="mySlides fade">
            <div class="numbertext">3 / 3</div>
            <div class="caption">Evaluation Videos</div>
            <div class="table-like" style="justify-content:space-evenly-width:100%;margin:auto;padding:0px;">
              <div class="box">
                <center><p align="center"><h3>MEDAL++<h3></p></center>
                <video width="400" autoplay muted loop>
                  <source src="resources/cloth_hook/medal/s3.mp4" type="video/mp4">
                </video>
              </div>

              <div class="box">
                <center><p align="center"><h3>BC</h3></p></center>
                <video width="400" autoplay muted loop>
                  <source src="resources/cloth_hook/bc/f3.mp4" type="video/mp4">
                </video>
              </div>
            </div>
          </div>

          <a class="prev" onclick="slideshow3.plusSlides(-1)">❮</a>
          <a class="next" onclick="slideshow3.plusSlides(1)">❯</a>
        </div>
        <br>
        <div id="cloth-hook-dots" style="text-align:center">
          <span class="dot" onclick="slideshow3.currentSlide(1)"></span> 
          <span class="dot" onclick="slideshow3.currentSlide(2)"></span>
          <span class="dot" onclick="slideshow3.currentSlide(3)"></span>
        </div>
      </div>

      <div style="width:1280; margin:0 auto; text-align=left" align="left">
        <hr>

        <center id="videos"><h1>Peg Insertion</h1></center>
        <div class="description">
          <p style="text-align:center">
            This task requires the robot to insert a peg into the goal location, marked by a green boundary.
          </p>
        </div>

        <div class="table-like" style="justify-content:space-evenly;margin:auto;padding:0px;">
          <div class="caption">Annotated Training Video</div>
          <p>
            An annotated segment of training using MEDAL++, showing how forward and backward policies interact to enable the robot to practice autonomously.
          </p>
          <div>
            <video width="800" autoplay muted loop>
              <source src="resources/peg_insertion/peg_detailed.mp4" type="video/mp4">
            </video>
          </div>
          <div class="caption">Training Timelapse</div>
          <p>
            A timelapse of training, showing the diverse set of states visited by the robot and, failures and successes of the policy.
          </p>
          <div>
            <video width="800" autoplay muted loop>
              <source src="resources/peg_insertion/timelapse_final.mp4" type="video/mp4">
            </video>
          </div>
        </div>

        <div class="slideshow-container" id="peg-insertion-container">
          <div class="mySlides fade">
            <div class="numbertext">1 / 3</div>
            <div class="caption">Evaluation Videos</div>
            <div class="table-like" style="justify-content:space-evenly-width:100%;margin:auto;padding:0px;">
              <div class="box">
                <center><p align="center"><h3>MEDAL++<h3></p></center>
                <video width="400" autoplay muted loop>
                  <source src="resources/peg_insertion/medal/s1.mp4" type="video/mp4">
                </video>
              </div>

              <div class="box">
                <center><p align="center"><h3>BC</h3></p></center>
                <video width="400" autoplay muted loop>
                  <source src="resources/peg_insertion/bc/f3.mp4" type="video/mp4">
                </video>
              </div>
            </div>
          </div>

          <div class="mySlides fade">
            <div class="numbertext">2 / 3</div>
            <div class="caption">Evaluation Videos</div>
            <div class="table-like" style="justify-content:space-evenly-width:100%;margin:auto;padding:0px;">
              <div class="box">
                <center><p align="center"><h3>MEDAL++<h3></p></center>
                <video width="400" autoplay muted loop>
                  <source src="resources/peg_insertion/medal/s2.mp4" type="video/mp4">
                </video>
              </div>

              <div class="box">
                <center><p align="center"><h3>BC</h3></p></center>
                <video width="400" autoplay muted loop>
                  <source src="resources/peg_insertion/bc/fail1_fast.mp4" type="video/mp4">
                </video>
              </div>
            </div>
          </div>

          <div class="mySlides fade">
            <div class="numbertext">3 / 3</div>
            <div class="caption">Evaluation Videos</div>
            <div class="table-like" style="justify-content:space-evenly-width:100%;margin:auto;padding:0px;">
              <div class="box">
                <center><p align="center"><h3>MEDAL++<h3></p></center>
                <video width="400" autoplay muted loop>
                  <source src="resources/peg_insertion/medal/s3.mp4" type="video/mp4">
                </video>
              </div>

              <div class="box">
                <center><p align="center"><h3>BC</h3></p></center>
                <video width="400" autoplay muted loop>
                  <source src="resources/peg_insertion/bc/fail2.mp4" type="video/mp4">
                </video>
              </div>
            </div>
          </div>

          <a class="prev" onclick="slideshow1.plusSlides(-1)">❮</a>
          <a class="next" onclick="slideshow1.plusSlides(1)">❯</a>
        </div>
        <br>
        <div id="peg-insertion-dots" style="text-align:center">
          <span class="dot" onclick="slideshow1.currentSlide(1)"></span> 
          <span class="dot" onclick="slideshow1.currentSlide(2)"></span>
          <span class="dot" onclick="slideshow1.currentSlide(3)"></span>
        </div>
      </div>

      <div style="width:1280; margin:0 auto; text-align=left" align="left">
        <hr>

        <center id="videos"><h1>Bowl Cover</h1></center>
        <div class="description">
          <p style="text-align:center">
            This task requires the robot to cover a bowl using a cloth. The cloth itself can be in any location and arbitrary shape.
          </p>
        </div>

        <div class="table-like" style="justify-content:space-evenly;margin:auto;padding:0px;">
          <div class="caption">Annotated Training Video</div>
          <p>
            An annotated segment of training using MEDAL++, showing how forward and backward policies interact to enable the robot to practice autonomously.
          </p>
          <div>
            <video width="800" autoplay muted loop>
              <source src="resources/bowl_cover/bowl_detailed.mp4" type="video/mp4">
            </video>
          </div>
          <div class="caption">Training Timelapse</div>
          <p>
            A timelapse of training, showing the diverse set of states visited by the robot and, failures and successes of the policy.
          </p>
          <div>
            <video width="800" autoplay muted loop>
              <source src="resources/bowl_cover/bowl_cover_timelapse_final.mp4" type="video/mp4">
            </video>
          </div>
        </div>

        <div class="slideshow-container" id="bowl-cover-container">
          <div class="mySlides fade">
            <div class="numbertext">1 / 3</div>
            <div class="caption">Evaluation Videos</div>
            <div class="table-like" style="justify-content:space-evenly-width:100%;margin:auto;padding:0px;">
              <div class="box">
                <center><p align="center"><h3>MEDAL++<h3></p></center>
                <video width="400" autoplay muted loop>
                  <source src="resources/bowl_cover/medal/s3.mp4" type="video/mp4">
                </video>
              </div>

              <div class="box">
                <center><p align="center"><h3>BC</h3></p></center>
                <video width="400" autoplay muted loop>
                  <source src="resources/bowl_cover/bc/f1.mp4" type="video/mp4">
                </video>
              </div>
            </div>
          </div>

          <div class="mySlides fade">
            <div class="numbertext">2 / 3</div>
            <div class="caption">Evaluation Videos</div>
            <div class="table-like" style="justify-content:space-evenly-width:100%;margin:auto;padding:0px;">
              <div class="box">
                <center><p align="center"><h3>MEDAL++<h3></p></center>
                <video width="400" autoplay muted loop>
                  <source src="resources/bowl_cover/medal/s2.mp4" type="video/mp4">
                </video>
              </div>

              <div class="box">
                <center><p align="center"><h3>BC</h3></p></center>
                <video width="400" autoplay muted loop>
                  <source src="resources/bowl_cover/bc/f2.mp4" type="video/mp4">
                </video>
              </div>
            </div>
          </div>

          <div class="mySlides fade">
            <div class="numbertext">3 / 3</div>
            <div class="caption">Evaluation Videos</div>
            <div class="table-like" style="justify-content:space-evenly-width:100%;margin:auto;padding:0px;">
              <div class="box">
                <center><p align="center"><h3>MEDAL++<h3></p></center>
                <video width="400" autoplay muted loop>
                  <source src="resources/bowl_cover/medal/s1.mp4" type="video/mp4">
                </video>
              </div>

              <div class="box">
                <center><p align="center"><h3>BC</h3></p></center>
                <video width="400" autoplay muted loop>
                  <source src="resources/bowl_cover/bc/f3.mp4" type="video/mp4">
                </video>
              </div>
            </div>
          </div>

          <a class="prev" onclick="slideshow2.plusSlides(-1)">❮</a>
          <a class="next" onclick="slideshow2.plusSlides(1)">❯</a>
        </div>
        <br>
        <div id="bowl-cover-dots" style="text-align:center">
          <span class="dot" onclick="slideshow2.currentSlide(1)"></span> 
          <span class="dot" onclick="slideshow2.currentSlide(2)"></span>
          <span class="dot" onclick="slideshow2.currentSlide(3)"></span>
        </div>
      </div>

      <div style="width:800px; margin:0 auto" align="center">
        <hr>
      </div>

      <script>
        const Slideshow = {
          slideIndex: 1,
          name: "",
          plusSlides(n) {
            this.showSlides(this.slideIndex += n);
          },
          currentSlide(n) {
            this.showSlides(this.slideIndex = n);
          },
          showSlides(n) {
            let i;
            let container = document.getElementById(this.name + "-container")
            let slides = container.getElementsByClassName("mySlides");
            let dots = document.getElementById(this.name + "-dots").getElementsByClassName("dot");
            if (n > slides.length) {this.slideIndex = 1}    
            if (n < 1) {this.slideIndex = slides.length}
            for (i = 0; i < slides.length; i++) {
              slides[i].style.display = "none";  
            }
            for (i = 0; i < dots.length; i++) {
              dots[i].className = dots[i].className.replace(" active", "");
            }
            slides[this.slideIndex-1].style.display = "block";  
            dots[this.slideIndex-1].className += " active";
          }
        }

        slideshow1 = Object.create(Slideshow);
        slideshow1.name = "peg-insertion"
        slideshow1.showSlides(slideshow1.slideIndex);

        slideshow2 = Object.create(Slideshow);
        slideshow2.name = "bowl-cover"
        slideshow2.showSlides(slideshow2.slideIndex);

        slideshow3 = Object.create(Slideshow);
        slideshow3.name = "cloth-hook"
        slideshow3.showSlides(slideshow3.slideIndex);

      </script>
</body>
</html>
