{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The autoreload extension is already loaded. To reload it, use:\n",
      "  %reload_ext autoreload\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<style>.container { width:100% !important; }</style>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "copy\n",
      "0\n",
      "('copy D B Y s r C y Y Q g J H S y w r X', 'D B Y s r C y Y Q g J H S y w r X')\n",
      "('copy O S t j i G i P K m I n e r v G m x G S k m u D J I', 'O S t j i G i P K m I n e r v G m x G S k m u D J I')\n",
      "('copy W A t c W A C T K o o c x Z F j P', 'W A t c W A C T K o o c x Z F j P')\n",
      "('copy o C X B h R p Z J L y S N S X P p l G b u D M J j T S', 'o C X B h R p Z J L y S N S X P p l G b u D M J j T S')\n",
      "('copy F u D B J e h Q y B F G o Z n q D m w U G S l Z o u j Q r', 'F u D B J e h Q y B F G o Z n q D m w U G S l Z o u j Q r')\n",
      "('copy k R Z C w f F Q z o i F P v', 'k R Z C w f F Q z o i F P v')\n",
      "('copy t s h Y', 't s h Y')\n",
      "('copy U D R h S i D d g j s l s j E Y k X S p p e h W q d U r t', 'U D R h S i D d g j s l s j E Y k X S p p e h W q d U r t')\n",
      "('copy R V D w', 'R V D w')\n",
      "('copy G Y w v v', 'G Y w v v')\n",
      "('copy d h J g I B b D k P p', 'd h J g I B b D k P p')\n",
      "('copy H B l y S i n A', 'H B l y S i n A')\n",
      "('copy J v T N R l D J h I B Y B w V p f R g', 'J v T N R l D J h I B Y B w V p f R g')\n",
      "('copy p y T I R F u k C L H f O D g q L M J m D b V U', 'p y T I R F u k C L H f O D g q L M J m D b V U')\n",
      "('copy z i q h K P u H s R p h n o T W C s o P g d L z R u J K', 'z i q h K P u H s R p h n o T W C s o P g d L z R u J K')\n",
      "('copy S k y T P y a L E E F T H V v N U i f M a', 'S k y T P y a L E E F T H V v N U i f M a')\n",
      "('copy x s F t G I W k o Y y b v w q f', 'x s F t G I W k o Y y b v w q f')\n",
      "('copy z Z L E c', 'z Z L E c')\n",
      "('copy z b X o K G p x t w s B A g Y d z P e S U S U S q D', 'z b X o K G p x t w s B A g Y d z P e S U S U S q D')\n",
      "('copy r E F l i j S m D h v h I u k Q U I N u a a q e f I e f h h', 'r E F l i j S m D h v h I u k Q U I N u a a q e f I e f h h')\n",
      "\n",
      "reverse\n",
      "0\n",
      "('reverse J q R Q G t l K Z t M i S W n P H', 'H P n W S i M t Z K l t G Q R q J')\n",
      "('reverse K E i O t S S U g o r E', 'E r o g U S S t O i E K')\n",
      "('reverse f Z o e W', 'W e o Z f')\n",
      "('reverse H e l k I o i d z k A D t', 't D A k z d i o I k l e H')\n",
      "('reverse V O R i y s g V Q p N F N C D B T p', 'p T B D C N F N p Q V g s y i R O V')\n",
      "('reverse r A c j W p j j D K H V w m Z V', 'V Z m w V H K D j j p W j c A r')\n",
      "('reverse s Z j A B h v s d r q G N D d m y y t l o t d W k J U', 'U J k W d t o l t y y m d D N G q r d s v h B A j Z s')\n",
      "('reverse I s z r b B Z k z w c W w E v c U k a', 'a k U c v E w W c w z k Z B b r z s I')\n",
      "('reverse O t N M S y', 'y S M N t O')\n",
      "('reverse R p x t L g t D G I W', 'W I G D t g L t x p R')\n",
      "('reverse d l S C N H U o w l g r p p C t T A x n', 'n x A T t C p p r g l w o U H N C S l d')\n",
      "('reverse m k D C q e q I z q H i r L', 'L r i H q z I q e q C D k m')\n",
      "('reverse q G n M a n k o h E M e A v m L D X l U L H l v d X T v S S', 'S S v T X d v l H L U l X D L m v A e M E h o k n a M n G q')\n",
      "('reverse l Q S F W I T a x S R M b F', 'F b M R S x a T I W F S Q l')\n",
      "('reverse w P o Y v t i M Z C r k b M P o o o z K i G Y Y z x B', 'B x z Y Y G i K z o o o P M b k r C Z M i t v Y o P w')\n",
      "('reverse I M R R y e c Y o T N o c K S k m', 'm k S K c o N T o Y c e y R R M I')\n",
      "('reverse w T k B', 'B k T w')\n",
      "('reverse f L e s u c A x m R t Y m b w w e e J M p M g Y p b', 'b p Y g M p M J e e w w b m Y t R m x A c u s e L f')\n",
      "('reverse q u b a H w c H R z d u J a', 'a J u d z R H c w H a b u q')\n",
      "('reverse X Z j n A Y X F c O', 'O c F X Y A n j Z X')\n",
      "\n",
      "set\n",
      "0\n",
      "('set s s s V C H e', 's V C H e')\n",
      "('set X O L i d J E L r V g N h U S t U t h O g a E O z g n t W p', 'X O L i d J E r V g N h U S t a z n W p')\n",
      "('set P H P H P P H H H', 'P H')\n",
      "('set v B G L y t M l c R U I R a m c M l d y l Y c c Q y U a', 'v B G L y t M l c R U I a m d Y Q')\n",
      "('set t G t Q Q G Z Z Z C G Z C R G C R G G R Q R R G', 't G Q Z C R')\n",
      "('set U t F F t B f t a U t t t f t a F a U', 'U t F B f a')\n",
      "('set L W W o u u u S o S u u u S o W o L W W S o o o o o o o', 'L W o u S')\n",
      "('set a a a a a a a a a a a a a a a', 'a')\n",
      "('set g R B x s L', 'g R B x s L')\n",
      "('set m B o m', 'm B o')\n",
      "('set u F t R h W o F N s B D z D q y f t R r T J V t s V V Q h', 'u F t R h W o N s B D z q y f r T J V Q')\n",
      "('set W P g c h Z C F T k k w', 'W P g c h Z C F T k w')\n",
      "('set H H T H C T y T C', 'H T C y')\n",
      "('set x z x X X O N M M x x X g O V X N N z Q Q Q Q e', 'x z X O N M g V Q e')\n",
      "('set R t t Q q Z l', 'R t Q q Z l')\n",
      "('set B r E j u y d e W V E F', 'B r E j u y d e W V F')\n",
      "('set h N a H Z a S M Y u H N H e M a Z v u a M S a', 'h N a H Z S M Y u e v')\n",
      "('set K K K K K', 'K')\n",
      "('set C L y i X b b I K N B X g', 'C L y i X b I K N B g')\n",
      "('set l a H t n f a Z k d L B W i p K a e n', 'l a H t n f Z k d L B W i p K e')\n",
      "\n",
      "first_char\n",
      "0\n",
      "('first_char c Y M s B j j U Z O t O K L i V y b i', 'c')\n",
      "('first_char W k n G G E V O G x t V s Y n b H a C', 'W')\n",
      "('first_char R f q u M b I T I x E j h e d J U d', 'R')\n",
      "('first_char i h t n u', 'i')\n",
      "('first_char e s A J g', 'e')\n",
      "('first_char j H V Z v D Y P K z O y r N y Y K s a T X U s H A U', 'j')\n",
      "('first_char R G F Q N a p p I m Z O H O U u W D a X c', 'R')\n",
      "('first_char M k P j R f p b R i p W', 'M')\n",
      "('first_char R f t W b f O c v p i v r r d F G Q i j C a W', 'R')\n",
      "('first_char T z d D r B g T z I v J a a p P h s M f', 'T')\n",
      "('first_char e h R d T', 'e')\n",
      "('first_char N l E J c T f K A O Z v b E', 'N')\n",
      "('first_char R Y c Z q Q', 'R')\n",
      "('first_char O c G Z G B a v', 'O')\n",
      "('first_char h r C y F W r H u P z H N S B V E u b U U t j s', 'h')\n",
      "('first_char M A h f p m a h b W', 'M')\n",
      "('first_char q k z E Y t Q G S l', 'q')\n",
      "('first_char G E n T x e B n E F X B e r c G T N D k l X', 'G')\n",
      "('first_char v R L x l N g w l a P W C c W J j V x A A S X k w T k t A u', 'v')\n",
      "('first_char Z U X S U B E R s p D J i M c N W', 'Z')\n",
      "\n",
      "last_char\n",
      "0\n",
      "('last_char w P V w M H z l o R u z o g w L X', 'X')\n",
      "('last_char a J L J H c h b R G P U Z d H n J F t', 't')\n",
      "('last_char h b C J t V L I g g', 'g')\n",
      "('last_char v j z P Z t j N P C l N o K c w O D T p m n T', 'T')\n",
      "('last_char e a h X h N n I f m P o m b a K E A p b m t u Y E', 'E')\n",
      "('last_char g W B A o n i s r m C J F v i M e p P N G j d z j K g Y F', 'F')\n",
      "('last_char S q q N Z R S R L S E m D U N h w S i l U w u p H m f n', 'n')\n",
      "('last_char x A I b r K d d v', 'v')\n",
      "('last_char A v J l', 'l')\n",
      "('last_char g S j a z X i T', 'T')\n",
      "('last_char V D s G h t P E e m m Z k j i t a Z J Q K e W t T b P A', 'A')\n",
      "('last_char V s s k G z', 'z')\n",
      "('last_char i i Q B U l Z Q M D s Z x f p b a s m o n j', 'j')\n",
      "('last_char y T w f i Z L S b d N U T d J n c B T C B n O J', 'J')\n",
      "('last_char B g O n P w a T O X a t y Y R U K t x K j z d f Y', 'Y')\n",
      "('last_char K K y E P p I A J i E a d E H o D p f m I i c x F V F', 'F')\n",
      "('last_char Y D m f R g C P y n e b P M x B B k', 'k')\n",
      "('last_char N n g A I A D m A k u V u Y y k e v p c q', 'q')\n",
      "('last_char y F Z Q U W x y X Z d e a p k U', 'U')\n",
      "('last_char u d H a H y m g F f k w o y b l u R c f b M H', 'H')\n",
      "\n",
      "deduplicate\n",
      "0\n",
      "('deduplicate T T l l s s M M S S S s s S i x', 'T l s M S s S i x')\n",
      "('deduplicate r Z Z Z c c', 'r Z c')\n",
      "('deduplicate L L L f f f f K K', 'L f K')\n",
      "('deduplicate Y y o o a a n T J e a W w N f q j', 'Y y o a n T J e a W w N f q j')\n",
      "('deduplicate j t t f E F F F G G G m m m m i n n n n n n n n n t', 'j t f E F G m i n t')\n",
      "('deduplicate T T T T v v B B B B B v v v v v v v U U U U x x x V V V I', 'T v B v U x V I')\n",
      "('deduplicate j j j O v v v v v v v', 'j O v')\n",
      "('deduplicate B B B B B B', 'B')\n",
      "('deduplicate l l l I s a a a a l m m d h h j w C s x x g g l i p p p I', 'l I s a l m d h j w C s x g l i p I')\n",
      "('deduplicate c c c c E E E E i i', 'c E i')\n",
      "('deduplicate s w U O e e H F F d d Z Z Z Z P N v v r b b v v v y', 's w U O e H F d Z P N v r b v y')\n",
      "('deduplicate f f f f f f f f f', 'f')\n",
      "('deduplicate P P P P P P P P P P L L L L L L L L L L c c c', 'P L c')\n",
      "('deduplicate m N y Z B j w w V V R Z Z', 'm N y Z B j w V R Z')\n",
      "('deduplicate A A P C C', 'A P C')\n",
      "('deduplicate Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y', 'Y')\n",
      "('deduplicate Y Y Y Y Z Z w w w H H H H J J', 'Y Z w H J')\n",
      "('deduplicate w I Q Q w w w w S S K N N A A L P', 'w I Q w S K N A L P')\n",
      "('deduplicate h U d d d C', 'h U d C')\n",
      "('deduplicate A A A A A A w w w w w w N N N N J J J J J J J J J J J', 'A w N J')\n",
      "\n",
      "length\n",
      "0\n",
      "('length R E j a x r B', '7')\n",
      "('length K a a C', '4')\n",
      "('length O w s e e G r M X K P s p C j W k D f K J E w H W t B G A', '2 9')\n",
      "('length M Z f W J J G N x w l t f G L r c Z Q N K T j N C V c g k I', '3 0')\n",
      "('length Z R U Y g F d J t r r T j S M E Y o U K K U W O z H E P', '2 8')\n",
      "('length Q j F H D a z', '7')\n",
      "('length q z P E d W R N a T l v a h P x s M z o', '2 0')\n",
      "('length n A Q C p v Z b t P x l w o q B C I o n W W n t F K M D A', '2 9')\n",
      "('length L y h H c v P s e r Q Z r I h T p I R L N j r t w X i S v', '2 9')\n",
      "('length h k g c i', '5')\n",
      "('length K O k w Q V T r C B Y C E D T N i l N h k L e a a s j r', '2 8')\n",
      "('length B T i S I Z a l m m Z p I I E N E F a H U W t', '2 3')\n",
      "('length H u V Q n P', '6')\n",
      "('length m f H H o M X t U r V C J I U E b a J C p Z F', '2 3')\n",
      "('length H A e d k l n i Q G i U a A p N k r R G', '2 0')\n",
      "('length C d Y P A w g d t W A m O z w l T q', '1 8')\n",
      "('length v R w m S D K M W n j e', '1 2')\n",
      "('length e Z Q i A o I P k', '9')\n",
      "('length u f P d P B K R z Q G u x R l E V I m S N v l B R p F O', '2 8')\n",
      "('length G e z P Y k F k I D K d q r E m u', '1 7')\n",
      "\n",
      "longest_word\n",
      "0\n",
      "@input = longest_word F , N J , S , o U , h , S , S , f , M , P , M , u , e , a\n",
      "output = N J\n",
      "@input = longest_word R r z n , l t C , Z a i\n",
      "output = R r z n\n",
      "@input = longest_word J , u s , k\n",
      "output = u s\n",
      "@input = longest_word a , b f , H T , y A , F , l , Q , y , Y , N F I , v t , u O\n",
      "output = N F I\n",
      "@input = longest_word C n a T , l , J h o F E o Z L , n y C B C N u M\n",
      "output = J h o F E o Z L\n",
      "@input = longest_word K b u b l , t N G Z U g , z S , e H b k e , P I g\n",
      "output = t N G Z U g\n",
      "@input = longest_word I w Z L , k W W D f , k s v L V , R A T m A k o s N\n",
      "output = R A T m A k o s N\n",
      "@input = longest_word K , Q K , F , G\n",
      "output = Q K\n",
      "@input = longest_word N , i , Q , k , v , v , h , E , B , y , O , i l , a , D\n",
      "output = i l\n",
      "@input = longest_word H X Q j o S w w E M D G\n",
      "output = H X Q j o S w w E M D G\n",
      "@input = longest_word y , A F u d , T z O , x\n",
      "output = A F u d\n",
      "@input = longest_word F , j , F , t , v , K , p , A , k , f , o , g , s , H\n",
      "output = F\n",
      "@input = longest_word O , h o , P S , n g r , r , f y , U , W Z , i , U v\n",
      "output = n g r\n",
      "@input = longest_word v V S , q , n r , n , a , w v , o j , M I h , t , o , c\n",
      "output = v V S\n",
      "@input = longest_word X i h r A t y j j M o g h h i L Z b c f Q R r l U U p o\n",
      "output = X i h r A t y j j M o g h h i L Z b c f Q R r l U U p o\n",
      "@input = longest_word L , R s , z , j K Z m , e , X , W , W\n",
      "output = j K Z m\n",
      "@input = longest_word B W b y , j y H\n",
      "output = B W b y\n",
      "@input = longest_word j , T , k , o H , t\n",
      "output = o H\n",
      "@input = longest_word f , k , G , l , p , C , M m , C Z , t , w , P , m , Q Y M\n",
      "output = Q Y M\n",
      "@input = longest_word D , o f , O b , D W , E Y , Y Q q , g y , c w , V N , Q , U\n",
      "output = Y Q q\n",
      "\n",
      "duplicate\n",
      "0\n",
      "('duplicate C A t L g S J M o p A T c m m F O P W U i k', 'C C A A t t L L g g S S J J M M o o p p A A T T c c m m m m F F O O P P W W U U i i k k')\n",
      "('duplicate g L g r V g b e r A I h c X v y H I R z z k f', 'g g L L g g r r V V g g b b e e r r A A I I h h c c X X v v y y H H I I R R z z z z k k f f')\n",
      "('duplicate r n l i K R T J g z L w w V l h s K O S S K j W h j o M x U', 'r r n n l l i i K K R R T T J J g g z z L L w w w w V V l l h h s s K K O O S S S S K K j j W W h h j j o o M M x x U U')\n",
      "('duplicate x I l M s v m j l F W t', 'x x I I l l M M s s v v m m j j l l F F W W t t')\n",
      "('duplicate X y m u F R M I C q O H G k z', 'X X y y m m u u F F R R M M I I C C q q O O H H G G k k z z')\n",
      "('duplicate e P m Z A N X X R U J l l p P s V V R g l J', 'e e P P m m Z Z A A N N X X X X R R U U J J l l l l p p P P s s V V V V R R g g l l J J')\n",
      "('duplicate f S T U U t Y A x Y E w z C S F R r r Q U W R k K a X u K', 'f f S S T T U U U U t t Y Y A A x x Y Y E E w w z z C C S S F F R R r r r r Q Q U U W W R R k k K K a a X X u u K K')\n",
      "('duplicate B R c S H m A g', 'B B R R c c S S H H m m A A g g')\n",
      "('duplicate c a M w p Y c Y S', 'c c a a M M w w p p Y Y c c Y Y S S')\n",
      "('duplicate m K m K i R N n z w l P F y c z R C u s A', 'm m K K m m K K i i R R N N n n z z w w l l P P F F y y c c z z R R C C u u s s A A')\n",
      "('duplicate u Y N a u v T L R H t m Q V R I b g h y S C c Y', 'u u Y Y N N a a u u v v T T L L R R H H t t m m Q Q V V R R I I b b g g h h y y S S C C c c Y Y')\n",
      "('duplicate k j x t B', 'k k j j x x t t B B')\n",
      "('duplicate l o F x g u o w a T G X Y w n n N z d l H o', 'l l o o F F x x g g u u o o w w a a T T G G X X Y Y w w n n n n N N z z d d l l H H o o')\n",
      "('duplicate Q A Z I m A Z M L V M j D g B t l A Z I f g D j g T t X u A', 'Q Q A A Z Z I I m m A A Z Z M M L L V V M M j j D D g g B B t t l l A A Z Z I I f f g g D D j j g g T T t t X X u u A A')\n",
      "('duplicate Z b R M Y c V J Z a H C c W V x O z m', 'Z Z b b R R M M Y Y c c V V J J Z Z a a H H C C c c W W V V x x O O z z m m')\n",
      "('duplicate F s t E a y s s G p u A T B M b p L i d g G x b w', 'F F s s t t E E a a y y s s s s G G p p u u A A T T B B M M b b p p L L i i d d g g G G x x b b w w')\n",
      "('duplicate d M i d f J O L N z', 'd d M M i i d d f f J J O O L L N N z z')\n",
      "('duplicate h J i i Z s u d U z s M m a s h i B J E j x x', 'h h J J i i i i Z Z s s u u d d U U z z s s M M m m a a s s h h i i B B J J E E j j x x x x')\n",
      "('duplicate H o A t E n d Z P H T v Y C', 'H H o o A A t t E E n n d d Z Z P P H H T T v v Y Y C C')\n",
      "('duplicate x F L U m', 'x x F F L L U U m m')\n",
      "\n",
      "count\n",
      "0\n",
      "('count j o i a M x I N I X M g M M M M M P <sep> M', '7')\n",
      "('count r r r y a z T b r T r u r t r z J r Y a l c <sep> r', '8')\n",
      "('count Y E C Z E k E M R p <sep> E', '3')\n",
      "('count l v x O m u p z A a G M C J T C s z f m r A n e M U w x <sep> o', '0')\n",
      "('count J J J J J J J Q J J <sep> J', '9')\n",
      "('count D D p D D G d D n D d D D u J D x D <sep> D', '1 0')\n",
      "('count h N g r c N <sep> D', '0')\n",
      "('count J T J J J J J T J x J J J V X J u V T J S Y J k J J <sep> J', '1 5')\n",
      "('count D D d D Z E K H D G <sep> D', '4')\n",
      "('count O O O O I <sep> O', '4')\n",
      "('count Z c U U y I F n U t U U Q U U z q e Q v o J U U <sep> U', '9')\n",
      "('count M O W X W n g W W y g F K L v l c U H W y A <sep> W', '5')\n",
      "('count n D D V X D D D D V D D U D h t o D D D D D D <sep> D', '1 5')\n",
      "('count H H H H h H H H H H H H H t H H H H H H <sep> H', '1 8')\n",
      "('count h h h h h h h h h N I h k e h h h q h D s q T D <sep> h', '1 4')\n",
      "('count f f f <sep> f', '3')\n",
      "('count P P P P P M a P P P N P P P P F P P P P P P P P P s P z <sep> P', '2 2')\n",
      "('count y h h C h h Y h B h p <sep> h', '6')\n",
      "('count h Q L x h h f d g h j h h y <sep> h', '6')\n",
      "('count r q T b w i Z M U S j M j x H h Z <sep> f', '0')\n",
      "\n",
      "delete\n",
      "0\n",
      "('delete k D w t <sep> e Z j g', 'k D w t')\n",
      "('delete H H H H Y H H H H H H H H H H <sep> H', 'H H H Y H H H H H H H H H H')\n",
      "('delete C n J g Z M y F G C l T x a C l s x Y <sep> G C l T x', 'C n J g Z M y F a C l s x Y')\n",
      "('delete U R J C W r x W r x L Z W r x O i N t W r x W r x E <sep> W r x', 'U R J C W r x L Z W r x O i N t W r x W r x E')\n",
      "('delete F g F U H M P p h e t t f d X j K F Z O s t <sep> h e t', 'F g F U H M P p t f d X j K F Z O s t')\n",
      "('delete G R H f k D U C G R H f k G R H f k D m <sep> G R H f k', 'D U C G R H f k G R H f k D m')\n",
      "('delete D L g M C l f u k e V <sep> o J w m', 'D L g M C l f u k e V')\n",
      "('delete c U i b K Q d d j y M Z v G N Z N n s <sep> Q d d j y M', 'c U i b K Z v G N Z N n s')\n",
      "('delete u o m u a y f k h a c c z <sep> Z t s c G s i', 'u o m u a y f k h a c c z')\n",
      "('delete N X H U s w Z n k P f n d J f j <sep> k P', 'N X H U s w Z n f n d J f j')\n",
      "('delete Z R W a R I e V K F Z T T L <sep> a R I', 'Z R W e V K F Z T T L')\n",
      "('delete l q F B M M C S m Q g t Z d Y h V Y k W P <sep> M M C S m Q g t', 'l q F B Z d Y h V Y k W P')\n",
      "('delete t y i j E v O I E l E q R B <sep> v O I E l E', 't y i j E q R B')\n",
      "('delete f k H w k H w W k H w f n k H w w m k H w J P <sep> k H w', 'f k H w W k H w f n k H w w m k H w J P')\n",
      "('delete T d p <sep> p', 'T d')\n",
      "('delete p n f E D z U p <sep> P m K v r R c H', 'p n f E D z U p')\n",
      "('delete j H j H j H j H j H j H j H j H O j H j H j H j H <sep> j H', 'j H j H j H j H j H j H j H O j H j H j H j H')\n",
      "('delete f k U Q v N D j k U Q v N D j Z <sep> k U Q v N D j', 'f k U Q v N D j Z')\n",
      "('delete h G w c j J L x X K v N i I W I c u h f <sep> L x X K v N i I W', 'h G w c j J I c u h f')\n",
      "('delete Y C C C C I f t C C C C C C C C w R <sep> C', 'Y C C C I f t C C C C C C C C w R')\n",
      "\n",
      "filter\n",
      "0\n",
      "('filter c q U s l <sep> q U', 'c s l')\n",
      "('filter N H c O I p o c O I x u z <sep> c O I', 'N H p o x u z')\n",
      "('filter i z M n N B W d H j o u s T G I T n d <sep> N B W d H j o u', 'i z M n s T G I T n d')\n",
      "('filter L f L L L <sep> L', 'f')\n",
      "('filter k I p Y j T y t b I H k <sep> V y W T', 'k I p Y j T y t b I H k')\n",
      "('filter H d y h N M t H d y h N M t <sep> H d y h N M t', '')\n",
      "('filter F M U s F M U F M U F M U <sep> F M U', 's')\n",
      "('filter c X c X c X c X c X <sep> c X', '')\n",
      "('filter o B u k j B Y x s o B u k j B Y x s <sep> o B u k j B Y x s', '')\n",
      "('filter x w O t d F t d F r L O R <sep> t d F', 'x w O r L O R')\n",
      "('filter Y F u Y r Y R c Y w c Y Y X d P Y Y Y z Y Y H K <sep> Y', 'F u r R c w c X d P z H K')\n",
      "('filter u S S Z b F o f I H q n E b H g h X S P <sep> F o f I H q n E b', 'u S S Z b H g h X S P')\n",
      "('filter F G Y G T s D a B F G Y G T s D <sep> F G Y G T s D', 'a B')\n",
      "('filter c c P c c c P w F c c P h c c P X I M J l I <sep> c c P', 'c w F h X I M J l I')\n",
      "('filter b I Z v p c i O z C Z r f <sep> k s p Y E V', 'b I Z v p c i O z C Z r f')\n",
      "('filter Z l h r v d Z l h r v d C <sep> Z l h r v d', 'C')\n",
      "('filter T I F I p f O i Z T I y u I P Q w m D <sep> I', 'T F p f O i Z T y u P Q w m D')\n",
      "('filter v g x G x D g x G x D t J J g x G x D <sep> g x G x D', 'v t J J')\n",
      "('filter u I f h z b u I f h z b k u I f h z b Q <sep> u I f h z b', 'k Q')\n",
      "('filter Z v X A q W F X I C R c i h U D <sep> c l D i', 'Z v X A q W F X I C R c i h U D')\n",
      "\n",
      "get_index\n",
      "0\n",
      "('get_index p Z j p Z j p Z j p Z j p Z j p Z j p Z j <sep> p Z j', '0')\n",
      "('get_index F M m p d k g e z F M m p d k g w <sep> F M m p d k g', '0')\n",
      "('get_index E M G P M G P I <sep> M G P', '1')\n",
      "('get_index S c j V Y T b p T A <sep> j V Y', '2')\n",
      "('get_index A N U i r P J r s N J Z L c y r Y I L b S k I <sep> r', '4')\n",
      "('get_index l M J b q h X C g X <sep> q', '4')\n",
      "('get_index J J J J J J N J J J J J J J J J <sep> J', '0')\n",
      "('get_index o q R x R i k t u t O j t a <sep> m z A A', '- 1')\n",
      "('get_index b t K q U q O J S i g m B z <sep> q U q O J', '3')\n",
      "('get_index y A N V C z a w Z v y P a c w c <sep> C z a w Z v y P', '4')\n",
      "('get_index k e Q W P f P W S n p M K a k <sep> U v v z Z d G c k o X o O', '- 1')\n",
      "('get_index c Q g Q g Q g <sep> Q g', '1')\n",
      "('get_index b Z t G d l z l i y u y y a t o B i F R <sep> i y u y y a t o', '8')\n",
      "('get_index M H w n c K H w n H w n H H w n H w n T H w n q j <sep> H w n', '1')\n",
      "('get_index S o z l R K s S o z l R K s S o z l R K s C <sep> S o z l R K s', '0')\n",
      "('get_index G I t G I t G I t <sep> G I t', '0')\n",
      "('get_index i t L p t L m i t L L F <sep> t L', '1')\n",
      "('get_index R s X m Q K s X m s X m L D Q S h s X m e X <sep> s X m', '1')\n",
      "('get_index A p p Q p C E p p p B p p i <sep> p', '1')\n",
      "('get_index T J H N f J M w P Q C <sep> J M', '5')\n",
      "\n",
      "search\n",
      "0\n",
      "('search t c o U E P o L J G y q s T c q m p g <sep> o L J G y q', 'y e s')\n",
      "('search M s J R k h N u s J R k h N u <sep> s J R k h N u', 'y e s')\n",
      "('search C k b N T P r T f O l D T o v <sep> T P r T', 'y e s')\n",
      "('search V C I c V u i n d H f u w g a k q f <sep> d H f u w g a', 'y e s')\n",
      "('search E e b P M u s b F C o B M D P T <sep> K k L j u x Y P v O K A x', 'n o')\n",
      "('search Q N z o I q i Y n M X U Y h V n Z u x <sep> q i', 'y e s')\n",
      "('search l u c W L e u c W Y u c W o W u c W <sep> u c W', 'y e s')\n",
      "('search X X X X X X X X X X X w X X X X <sep> X', 'y e s')\n",
      "('search c f l J b v H g M E b J E s u o a b S n A <sep> b', 'y e s')\n",
      "('search d U W u w q Q B b n f V z U L d K v <sep> Q B b n f V z U', 'y e s')\n",
      "('search A u x D Q Z v W O p d J f c M U I E p J G J M <sep> p d J f c', 'y e s')\n",
      "('search g d N X G d s G r d n <sep> X G d s G', 'y e s')\n",
      "('search S k z B r r S k z B r h Z S k z B r S k z B r <sep> S k z B r', 'y e s')\n",
      "('search l c X w C b Y O d <sep> o M Y d L D', 'n o')\n",
      "('search K g B R I S Q T w x A R s X m <sep> I S Q T w x', 'y e s')\n",
      "('search W X z q Z U <sep> M D X V', 'n o')\n",
      "('search X O a c j v C z L l d <sep> y x m i s y x R I', 'n o')\n",
      "('search F X a a F e I W S q <sep> a Y u U', 'n o')\n",
      "('search J A q i o V x F M A w T E <sep> V x F M A w', 'y e s')\n",
      "('search Z P I W P g R b I W P g R d I h <sep> I W P g R', 'y e s')\n",
      "\n",
      "sort\n",
      "0\n",
      "('sort X o e d F o o S H S d H o X X H <sep> G e w d S W H Q X o F u', 'e d d S S H H H X X X o o o o F')\n",
      "('sort Y H Y Y H q q H Y q q q q H q q Y q Y Y q <sep> W q Y H Y', 'q q q q q q q q q q H H H H Y Y Y Y Y Y Y')\n",
      "('sort F z F F z F e z F e F F e e <sep> F W e z', 'F F F F F F F e e e e z z z')\n",
      "('sort E I I I p M I p l h M u z <sep> D q I l H z M p u E h', 'I I I I l z M M p p u E h')\n",
      "('sort x V P P d D P O x j x j D M x <sep> v P O x j M V x L d p D', 'P P P O j j M V x x x x d D D')\n",
      "('sort F E H H w F H F F E w H F E <sep> r P w E H F', 'w w E E E H H H H F F F F F')\n",
      "('sort r r Z E Z X h E V W E r z <sep> z r Z V W h z r h X E', 'Z Z V W z r r r h X E E E')\n",
      "('sort L X B S s X s S L s X <sep> L N J I X S B u s', 'L L X X X S S B s s s')\n",
      "('sort H H C Z S S N Z L V H o w S V <sep> w V N o Z C L S H S A', 'w V V N o Z Z C L H H H S S S')\n",
      "('sort b g m b m g m b g m g g m b b b g g <sep> K g m b s L', 'g g g g g g g m m m m m b b b b b b')\n",
      "('sort o H H o o W a o o a H W H H x H W H x <sep> x H o h W h V a', 'x x H H H H H H H o o o o o W W W a a')\n",
      "('sort e Z Q c c A e e W l c p Z <sep> Z N e y l W A c X G Q p', 'Z Z e e e l W A c c c Q p')\n",
      "('sort T T T z V T z T T T z <sep> f z T E V', 'z z z T T T T T T T V')\n",
      "('sort n M o M o Q X X M X n t E u X E <sep> X M u Q t x t T E n h o', 'X X X X M M M u Q t E E n n o o')\n",
      "('sort o o o o <sep> o Z', 'o o o o')\n",
      "('sort r R R C h e b R g H H R x <sep> x e b g O R d C H F r h H', 'x e b g R R R R C r h H H')\n",
      "('sort G h f h n P h N N <sep> G d f h h n N P u', 'G f h h h n N N P')\n",
      "('sort L L L L n n L L L d n L n L L L d L <sep> L d n L', 'd d n n n n L L L L L L L L L L L L')\n",
      "('sort y u g o o u g <sep> g V o u B J y', 'g g o o u u y')\n",
      "('sort Z y y Z Z N y N d Z y d y y y y <sep> N e Z d a k y O', 'N N Z Z Z Z d d y y y y y y y y')\n",
      "\n",
      "replace\n",
      "0\n",
      "('replace c b <sep> b t', 'c t')\n",
      "('replace X u u x V z R j c X z O p d k S c A <sep> z w', 'X u u x V w R j c X w O p d k S c A')\n",
      "('replace W <sep> W g', 'g')\n",
      "('replace w <sep> w x', 'x')\n",
      "('replace d T I B E q Y Z Z Q h E X J M b E Y <sep> Y V', 'd T I B E q V Z Z Q h E X J M b E V')\n",
      "('replace u D n h c U F o S F D R <sep> u Z', 'Z D n h c U F o S F D R')\n",
      "('replace K A z w t t y R l p P c E d w B L F E C Q D b D o e <sep> e h', 'K A z w t t y R l p P c E d w B L F E C Q D b D o h')\n",
      "('replace u p P W E F l T i i y z m Y H g s t V r y V N I <sep> T Y', 'u p P W E F l Y i i y z m Y H g s t V r y V N I')\n",
      "('replace K O m z J x P h H V G s I o w A r B D K t u k q e U <sep> O q', 'K q m z J x P h H V G s I o w A r B D K t u k q e U')\n",
      "('replace x C r h l O k U C b n J i M p J c B E <sep> E o', 'x C r h l O k U C b n J i M p J c B o')\n",
      "('replace a G s o q S Q z I N N S q E i k o C K O L O T F <sep> S M', 'a G s o q M Q z I N N M q E i k o C K O L O T F')\n",
      "('replace Z r U g B j g c O H L E y m G B x U <sep> r f', 'Z f U g B j g c O H L E y m G B x U')\n",
      "('replace q h k X R b f c i h f T L q p n B s F y T J L w <sep> k D', 'q h D X R b f c i h f T L q p n B s F y T J L w')\n",
      "('replace g F p l G G c q U <sep> F c', 'g c p l G G c q U')\n",
      "('replace T o u B h w G G O y m N f G A f i e I <sep> i Y', 'T o u B h w G G O y m N f G A f Y e I')\n",
      "('replace H t Z G g S N G T H Z B O s Q y K <sep> T h', 'H t Z G g S N G h H Z B O s Q y K')\n",
      "('replace s Z T k O P E R n d T q x x Q h N v p <sep> d c', 's Z T k O P E R n c T q x x Q h N v p')\n",
      "('replace A D l w W F m l j a b O M e u a n R <sep> w W', 'A D l W W F m l j a b O M e u a n R')\n",
      "('replace u R Z P <sep> P K', 'u R Z K')\n",
      "('replace r Y H d O V Y A p X <sep> d p', 'r Y H p O V Y A p X')\n",
      "\n",
      "replace_many\n",
      "0\n",
      "('replace_many d u O h z t F <sep> z V d S t H O d u s', 'S s d h V H F')\n",
      "('replace_many D L U g v T M b x J Q G Q G <sep> b p Q h g L U d G w D h', 'h L d L v T M p x J h w h w')\n",
      "('replace_many F T f l z v Q <sep> v Y z v l Q f Y Q T F G', 'G T Y Q v Y T')\n",
      "('replace_many G T Q d S <sep> G H T P d b Q t', 'H P t b S')\n",
      "('replace_many Y U q U n z <sep> q G z q n C', 'Y U G U C q')\n",
      "('replace_many f f O a b W k l v R Y W V k v <sep> R j f S V e a B', 'S S O B b W k l v j Y W e k v')\n",
      "('replace_many K a P u v i a M q p P f m y L D Q R U w E u <sep> p v M I', 'K a P u v i a I q v P f m y L D Q R U w E u')\n",
      "('replace_many K f f q a <sep> q a a H f T K D', 'D T T a H')\n",
      "('replace_many Q d N G H <sep> G x H s', 'Q d N x s')\n",
      "('replace_many u f H N j J q t x r V t l Z u <sep> Z Y H v t x l R r l', 'u f v N j J q x x l V x R Y u')\n",
      "('replace_many H F x y b z k I <sep> F Y k e y u H p I h b T', 'p Y x u T z e h')\n",
      "('replace_many g p F R f p V C L <sep> V Z g W f o L Q', 'W p F R o p Z C Q')\n",
      "('replace_many T f s X z Z L D f e c q d m I M b Q C O h <sep> C O h K X V Q N', 'T f s V z Z L D f e c q d m I M b N O O K')\n",
      "('replace_many m E c f s o W k a g J <sep> m K a h g F E d', 'K d c f s o W k h F J')\n",
      "('replace_many y n y l w X J O j t t t M o I <sep> j F o D M D I Z n Q l B X a', 'y Q y B w a J O F t t t D D Z')\n",
      "('replace_many K w d j d g m q Z x y R v D <sep> R W y H', 'K w d j d g m q Z x H W v D')\n",
      "('replace_many F K L w t M Z T X T <sep> w X L I Z i K W t G F X X F T e M S', 'X W I X G S i e F e')\n",
      "('replace_many e a w o e K M z b D G M s Z w u N <sep> N T u b', 'e a w o e K M z b D G M s Z w b T')\n",
      "('replace_many W D F Y g u I v w l <sep> Y F l W g I u w W L w z F S', 'L D S F I w I v z W')\n",
      "('replace_many a H h K j t G d Y H y n R g s b Y c U y k b U j <sep> H x j x', 'a x h K x t G d Y x y n R g s b Y c U y k b U x')\n",
      "\n",
      "union\n",
      "0\n",
      "('union m I c <sep> K K C L A', 'c C I m K L A')\n",
      "('union L w J f d X S <sep> R s x', 'S x J s X w d R f L')\n",
      "('union <sep> M P H I a y', ' H y M I a P')\n",
      "('union h q a V h X R i d K q U q d a J f B c <sep> W V a j f', 'B J i W V d R h q f K X j c a U')\n",
      "('union K W n Y f Q <sep> d V', 'd n Q f Y W V K')\n",
      "('union i P k R a H <sep>', 'H R k i a P')\n",
      "('union d z Q n D o X a U <sep> I I', 'I X z d Q n o D a U')\n",
      "('union f l p <sep> g S', 'l S g f p')\n",
      "('union d O <sep> C q m Q i w z K w', 'i m w z d Q q O C K')\n",
      "('union <sep> n X x i m M W A', ' x i X m W n M A')\n",
      "('union Z <sep> W B W v U z H c P', 'H B v W z P c Z U')\n",
      "('union j T z S y O i C c <sep> g W L Q', 'S g y i W z T j c Q O C L')\n",
      "('union j S l F V U J u A <sep> h w Q', 'l S J w V j F u Q h A U')\n",
      "('union J L p c <sep> J P r f y T T Z N N p a h F H', 'H J N Z h f r y P T c F a L p')\n",
      "('union h J z o t x h q W <sep> Y J', 'x J W t z o h q Y')\n",
      "('union b K j <sep> q r b W S', 'S r b W j q K')\n",
      "('union z E C N p <sep> r M q Y L v I p R u h U H X k I w', 'H k v w N z R E h q U C M r I X u Y L p')\n",
      "('union P p f y T T <sep> C N S t T V Z C x', 'S y x t P T N V Z C f p')\n",
      "('union O a q z F y <sep> D u W E i g h H t O q i Z d', 'H i W z d E h q O Z g y t u F D a')\n",
      "('union z <sep> f T C l W P X w s w q A u w W', 'l s X W w z T P u q C f A')\n",
      "\n",
      "intersect\n",
      "0\n",
      "('intersect J k B d k Y G <sep> U y', '')\n",
      "('intersect Q y q d R F j Z U u m d S F f l X t R <sep> o Z u S J U o', 'S Z U u')\n",
      "('intersect e N k X P I j z l G U d c <sep> x M P j O R g a G U o l R Z g', 'l G P j U')\n",
      "('intersect e f N r f q <sep> z I Y C y d I O J l k s F D P G f R K R', 'f')\n",
      "('intersect I E j z P p H Y n f s P h <sep> n Z D T j', 'n j')\n",
      "('intersect i Y t p A y <sep> q R N p N', 'p')\n",
      "('intersect N Z r Q R u O <sep> Z T p H c', 'Z')\n",
      "('intersect Q H B <sep> X u d b', '')\n",
      "('intersect E y m F d D q C o P o j O u M q J <sep> m V u r c k h J T u R', 'u m J')\n",
      "('intersect B M <sep> o r', '')\n",
      "('intersect j T l C i L m o j k v u L <sep> t S X K z n G', '')\n",
      "('intersect b I v m D D O b h a q j I t <sep> r n v c H E A', 'v')\n",
      "('intersect f Y N i b M <sep> j i G R l Z', 'i')\n",
      "('intersect c B a L <sep> q R E a v w V i R V n Y', 'a')\n",
      "('intersect K D y h m H J Q R p D I <sep> V j O', '')\n",
      "('intersect <sep> H H v H l', '')\n",
      "('intersect j <sep> X h h k H N', '')\n",
      "('intersect u H E u E h u X U C w b R m t z G z <sep>', '')\n",
      "('intersect M e S d m E b w c F <sep> e i J c r C W W T O X', 'c e')\n",
      "('intersect s y <sep> W f m W X V E p K Q h r b Y i d t I u p V', '')\n",
      "\n",
      "set_1_minus_2\n",
      "0\n",
      "('set_1_minus_2 R m k <sep> R', 'm k')\n",
      "('set_1_minus_2 <sep> L W M', '')\n",
      "('set_1_minus_2 a j M g <sep> O u C R', 'a g M j')\n",
      "('set_1_minus_2 G F Z J s b R C <sep> B o l u x n', 'b J s G F R Z C')\n",
      "('set_1_minus_2 c n F x S T F v h Q k l o W W n S O z c h K V R i <sep> s S F f', 'l k x i v W V z T c R n Q o h O K')\n",
      "('set_1_minus_2 M c R R g a j r w c t Z q r T n N G w U Z <sep> W e L K', 'g r w t T j c R N n G Z q a M U')\n",
      "('set_1_minus_2 C F p N V g Q <sep> S v Z t G p K i n I t T j d Y Z V F', 'C Q g N')\n",
      "('set_1_minus_2 Y p P L s <sep> p u W N', 's Y L P')\n",
      "('set_1_minus_2 z V o p <sep> f l e i', 'o V z p')\n",
      "('set_1_minus_2 b N t r K v L E <sep> f n e H M P Z A X e m e p', 'b v N t E K L r')\n",
      "('set_1_minus_2 i w X e p g w j Y P r J <sep> e x X', 'g J i w p P j Y r')\n",
      "('set_1_minus_2 v D <sep> x D k I X s u O C O M d y j G C g', 'v')\n",
      "('set_1_minus_2 D d G K H <sep> S', 'H G d D K')\n",
      "('set_1_minus_2 M R F r H V <sep>', 'H V F R M r')\n",
      "('set_1_minus_2 p Q W c S <sep>', 'S W c Q p')\n",
      "('set_1_minus_2 u d t L Z j A P o N c K <sep> s S Y k n N z s K Z', 't P j d u c o L A')\n",
      "('set_1_minus_2 a V <sep> F n T O e e a w W M D O h', 'V')\n",
      "('set_1_minus_2 W e F v j u e j H w S S n r <sep> f e w', 'H S v W j F u n r')\n",
      "('set_1_minus_2 <sep> j X z M J Y h A W', '')\n",
      "('set_1_minus_2 R U U A l w b r n N e V s f X q i m m <sep>', 'b i w N V R n q A f e r l s X m U')\n",
      "\n",
      "set_2_minus_1\n",
      "0\n",
      "('set_2_minus_1 T T G w n <sep> c T F S k H y N g e', 'H S g k y N c F e')\n",
      "('set_2_minus_1 i G d h U L D Q m N t R i X <sep> P i Y E s', 's Y E P')\n",
      "('set_2_minus_1 l R o <sep> x e r a z s I u P C o z J A U g h d i E r h', 'J i z d E h U C e r g x I s P u a A')\n",
      "('set_2_minus_1 l u <sep> c S G A W g K m b J w l Q q G s', 'S g b J s m G W w c Q q K A')\n",
      "('set_2_minus_1 D <sep> W y I B N l Z n Q i T o r G z Z c g O Y x M p U F Y P', 'B i W G N z n Z O U M r l g y x I P T F c Q o Y p')\n",
      "('set_2_minus_1 S N x i i B D f N m D t Z m w D h <sep>', '')\n",
      "('set_2_minus_1 i S W d <sep> y C', 'C y')\n",
      "('set_2_minus_1 T m Y j C <sep> a H T F M i C Q R', 'H i F R Q a M')\n",
      "('set_2_minus_1 O j H W k <sep> x F K N y', 'y x N F K')\n",
      "('set_2_minus_1 E v Q <sep> t h I x h n', 'x I t n h')\n",
      "('set_2_minus_1 F C m T <sep> R', 'R')\n",
      "('set_2_minus_1 O C h r A V c y Q s I V M S P Q u i l <sep> Y X p p u', 'Y X p')\n",
      "('set_2_minus_1 s O u <sep> W Q r o', 'Q o W r')\n",
      "('set_2_minus_1 <sep> a C s w N e j c b H T v C i n z e q e g v', 'H b i v w N z n q C e g s T j c a')\n",
      "('set_2_minus_1 Z V R K j t g i t U c x f O m k w W K Y q G M a <sep> S n I l', 'I l S n')\n",
      "('set_2_minus_1 R R <sep> D Y e D W', 'Y e W D')\n",
      "('set_2_minus_1 o X N L v O g A O O A v <sep> C', 'C')\n",
      "('set_2_minus_1 <sep> V l B', 'l B V')\n",
      "('set_2_minus_1 Q W Z W c y E I d T <sep>', '')\n",
      "('set_2_minus_1 z v J I R Y S <sep> U e l F', 'F l e U')\n"
     ]
    }
   ],
   "source": [
    "%load_ext autoreload\n",
    "%reload_ext autoreload\n",
    "%autoreload 2\n",
    "from IPython.core.display import display, HTML\n",
    "display(HTML(\"<style>.container { width:100% !important; }</style>\"))\n",
    "import json\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from prototypical_tasks import *\n",
    "from prototypical_tasks import _data_to_str, _str_to_data\n",
    "\n",
    "\n",
    "def show_sample_examples(func, num_examples=10, length_range=(2, 30), char_set=UPPER_AND_LOWER_LETTERS):\n",
    "    for _ in range(num_examples):\n",
    "        print(func(length_range=length_range, char_set=char_set))\n",
    "        \n",
    "BASIC_TASKS = ['copy', 'reverse', 'set', 'first_char', 'last_char', 'deduplicate', 'length', \n",
    "       'longest_word', 'duplicate', 'count', 'delete', 'filter', \n",
    "       'get_index', 'search', 'sort', 'replace', 'replace_many', 'union', 'intersect', 'set_1_minus_2', 'set_2_minus_1']\n",
    "\n",
    "\n",
    "def show_task_examples(task, is_natural_language=False, num_examples=10):\n",
    "    for count in range(num_examples):\n",
    "        \n",
    "        basic_task_sampling_args = {'length_range': (4,30), 'char_set':UPPER_AND_LOWER_LETTERS}\n",
    "        example = generate_example(TASK_REGISTRY[task], is_natural_language=is_natural_language, **basic_task_sampling_args)\n",
    "        if count % 100_000==0:\n",
    "            print(count)\n",
    "        if task == 'longest_word':\n",
    "            print(f'@input = {example[0]}')\n",
    "            print(f'output = {example[1]}')\n",
    "        else:\n",
    "            print(example)\n",
    "#             if len(example[1]) == 0 or example[1].isspace():\n",
    "#                 print(example[1])\n",
    "#                 raise RuntimeError()\n",
    "\n",
    "for task in BASIC_TASKS:\n",
    "# for task in ['get_index']:\n",
    "    print(f'\\n{task}')\n",
    "    show_task_examples(task, num_examples=20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "T.encode('yes')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len('   ')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'    '.isspace()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for func in SAMPLING_FUNCTIONS:\n",
    "    print(f'\\n{func.__name__}')\n",
    "    show_sample_examples(func, 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "replace_many\n",
      "0\n",
      "('replace_many p h b x h V e u m i o e k <sep> p m i d m T V L e d h T o d', 'm T b x T L d u T d d d k')\n",
      "('replace_many E F v x V K J i U X <sep> E W K J U Y F A i j J Q X g x i', 'W A v i V J Q j Y g')\n"
     ]
    }
   ],
   "source": [
    "# for task in BASIC_TASKS:\n",
    "for task in ['replace_many']:\n",
    "    print(f'\\n{task}')\n",
    "    show_task_examples(task, num_examples=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'h'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from prototypical_tasks import deduplicate\n",
    "deduplicate('h h h')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'v d V r q v d V r q'"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "deduplicate('v d V V r q v d V r q')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "100000\n",
      "200000\n",
      "300000\n",
      "400000\n",
      "500000\n",
      "600000\n",
      "700000\n",
      "800000\n",
      "900000\n"
     ]
    }
   ],
   "source": [
    "from prototypical_tasks import generate_batch_examples\n",
    "for task in [\"deduplicate\"]:\n",
    "    generate_batch_examples(task=TASK_REGISTRY[task], num_examples=1_000_000, is_natural_language=False, task_config_str=\"count2_30\", length_range=(2, 30), char_set=UPPER_AND_LOWER_LETTERS)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "('deduplicate L M b u', 'L M b u')\n",
      "0\n",
      "1\n",
      "('deduplicate s s s S S S S S R R R R R R Q Q Q Q Q C C C C C', 's S R Q C')\n",
      "2\n",
      "('deduplicate B B B N N N r r B B B P M M M M t t D', 'B N r B P M t D')\n",
      "3\n",
      "('deduplicate g g g g g g g g g g g g g g g g g g g g', 'g')\n",
      "4\n",
      "('deduplicate b b h V j k v B F p b b s h J a h P t f a v', 'b h V j k v B F p b s h J a h P t f a v')\n",
      "5\n",
      "('deduplicate B u Y S d l X h v A u', 'B u Y S d l X h v A u')\n",
      "6\n",
      "('deduplicate o o W x C C B x T H I j f f i m R', 'o W x C B x T H I j f i m R')\n",
      "7\n",
      "('deduplicate N s b M F Z i h F U U', 'N s b M F Z i h F U')\n",
      "8\n",
      "('deduplicate h h h h h h E E w w', 'h E w')\n",
      "9\n",
      "('deduplicate b b b b N N N N f f f f F X X X', 'b N f F X')\n",
      "10\n",
      "('deduplicate Y e O O Y V f f e j Y Y Y w f E K D', 'Y e O Y V f e j Y w f E K D')\n",
      "11\n",
      "('deduplicate D D D D D D D D R R R R R R R R R K K K K i i i i i i i', 'D R K i')\n",
      "12\n",
      "('deduplicate N N N N N N N M M M V V V V C C C C L L L L', 'N M V C L')\n",
      "13\n",
      "('deduplicate m m m m m m m m m m m m m m', 'm')\n",
      "14\n",
      "('deduplicate x x J J J J J J J J F F F F', 'x J F')\n",
      "15\n",
      "('deduplicate x x x x x', 'x')\n",
      "16\n",
      "('deduplicate y y y y y y E E E E E E E H H H H H h h h h h h D D D', 'y E H h D')\n",
      "17\n",
      "('deduplicate b b N S w I X X n d y w w Z Z Y Y', 'b N S w I X n d y w Z Y')\n",
      "18\n",
      "('deduplicate V I n J k', 'V I n J k')\n",
      "19\n",
      "('deduplicate d C C I J p p L s', 'd C I J p L s')\n",
      "20\n",
      "('deduplicate u u u u u u u u y y y y y y y y y y y y T T T T T T T T T', 'u y T')\n",
      "21\n",
      "('deduplicate R z z f f f Z T T O O O O C C g P F x K k', 'R z f Z T O C g P F x K k')\n",
      "22\n",
      "('deduplicate E E E E t t t t i i i i i Z Z', 'E t i Z')\n",
      "23\n",
      "('deduplicate o o w w T T W A a a A j j j V V Y Q X X d j l p p z', 'o w T W A a A j V Y Q X d j l p z')\n",
      "24\n",
      "('deduplicate s s F H Y', 's F H Y')\n",
      "25\n",
      "('deduplicate a U U j j W W W W b b b b b T T T H H', 'a U j W b T H')\n",
      "26\n",
      "('deduplicate t p G G h h C C M M u u K v v j q u E E r r h D K g G B B U', 't p G h C M u K v j q u E r h D K g G B U')\n",
      "27\n",
      "('deduplicate V x d M a W F D z b z I j Y Q d', 'V x d M a W F D z b z I j Y Q d')\n",
      "28\n",
      "('deduplicate l d d Z Z q p V t t R R b e J x m v', 'l d Z q p V t R b e J x m v')\n",
      "29\n",
      "('deduplicate d Z o u a N N T b i i O O f z z H C C K r t R n C', 'd Z o u a N T b i O f z H C K r t R n C')\n",
      "30\n",
      "('deduplicate A A N N N N N N N n n n W W F X X X Y p t t e h h h h', 'A N n W F X Y p t e h')\n",
      "31\n",
      "('deduplicate Q Q Q p p p p s s s a K K Q G W I I', 'Q p s a K Q G W I')\n",
      "32\n",
      "('deduplicate x x g g g g l y v v S O O F', 'x g l y v S O F')\n",
      "33\n",
      "('deduplicate c c c c c c c d d d d N N N t t t t g g g g g g g g g g', 'c d N t g')\n",
      "34\n",
      "('deduplicate j D f j t t R Q A B j Q T s F Y j t p F', 'j D f j t R Q A B j Q T s F Y j t p F')\n",
      "35\n",
      "('deduplicate e e e e e e e e e S S S S S S T T T T T', 'e S T')\n",
      "36\n",
      "('deduplicate B v n e k a W e Y H R f p F D C S m Q B B V Z H s Z V J l y', 'B v n e k a W e Y H R f p F D C S m Q B V Z H s Z V J l y')\n",
      "37\n",
      "('deduplicate P P P P P', 'P')\n",
      "38\n",
      "('deduplicate F F F P P P P P', 'F P')\n",
      "39\n",
      "('deduplicate D l o o o G T T M M M R P P N N N E f s', 'D l o G T M R P N E f s')\n",
      "40\n",
      "('deduplicate k k z w e N z F K a j j I A M V C e e S m m p l N y u L', 'k z w e N z F K a j I A M V C e S m p l N y u L')\n",
      "41\n",
      "('deduplicate c c a a a a n n n n n n Z Z', 'c a n Z')\n",
      "42\n",
      "('deduplicate k L A f t y g v y m h L f C C e V A m', 'k L A f t y g v y m h L f C e V A m')\n",
      "43\n",
      "('deduplicate B B m E E b b b b o o J J J x x D D k k', 'B m E b o J x D k')\n",
      "44\n",
      "('deduplicate w w w w N N N', 'w N')\n",
      "45\n",
      "('deduplicate P t O O O z z Y Y Y n', 'P t O z Y n')\n",
      "46\n",
      "('deduplicate x x x x d d d U U U U r r r r r r k k k k r r r r X X D D D', 'x d U r k r X D')\n",
      "47\n",
      "('deduplicate U z G v v Q L l G', 'U z G v Q L l G')\n",
      "48\n",
      "('deduplicate M c c I', 'M c I')\n",
      "49\n",
      "('deduplicate r r r r r V V Y Y Y m m m m m m S S S S g g g l l', 'r V Y m S g l')\n"
     ]
    }
   ],
   "source": [
    "for task in [\"deduplicate\"]:\n",
    "    generate_batch_examples(task=TASK_REGISTRY[task], num_examples=50, is_natural_language=False, task_config_str=\"count2_30\", length_range=(2, 30), char_set=UPPER_AND_LOWER_LETTERS)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "deduplicate L M b u\tL M b u\n",
      "deduplicate L M b u____________________L M b u\n",
      "1\n",
      "deduplicate s s s S S S S S R R R R R R Q Q Q Q Q C C C C C\ts S R Q C\n",
      "deduplicate s s s S S S S S R R R R R R Q Q Q Q Q C C C C C____________________s S R Q C\n",
      "2\n",
      "deduplicate B B B N N N r r B B B P M M M M t t D\tB N r B P M t D\n",
      "deduplicate B B B N N N r r B B B P M M M M t t D____________________B N r B P M t D\n",
      "3\n",
      "deduplicate g g g g g g g g g g g g g g g g g g g g\tg\n",
      "deduplicate g g g g g g g g g g g g g g g g g g g g____________________g\n",
      "4\n",
      "deduplicate b b h V j k v B F p b b s h J a h P t f a v\tb h V j k v B F p b s h J a h P t f a v\n",
      "deduplicate b b h V j k v B F p b b s h J a h P t f a v____________________b h V j k v B F p b s h J a h P t f a v\n",
      "5\n",
      "deduplicate B u Y S d l X h v A u\tB u Y S d l X h v A u\n",
      "deduplicate B u Y S d l X h v A u____________________B u Y S d l X h v A u\n",
      "6\n",
      "deduplicate o o W x C C B x T H I j f f i m R\to W x C B x T H I j f i m R\n",
      "deduplicate o o W x C C B x T H I j f f i m R____________________o W x C B x T H I j f i m R\n",
      "7\n",
      "deduplicate N s b M F Z i h F U U\tN s b M F Z i h F U\n",
      "deduplicate N s b M F Z i h F U U____________________N s b M F Z i h F U\n",
      "8\n",
      "deduplicate h h h h h h E E w w\th E w\n",
      "deduplicate h h h h h h E E w w____________________h E w\n",
      "9\n",
      "deduplicate b b b b N N N N f f f f F X X X\tb N f F X\n",
      "deduplicate b b b b N N N N f f f f F X X X____________________b N f F X\n",
      "10\n",
      "deduplicate Y e O O Y V f f e j Y Y Y w f E K D\tY e O Y V f e j Y w f E K D\n",
      "deduplicate Y e O O Y V f f e j Y Y Y w f E K D____________________Y e O Y V f e j Y w f E K D\n",
      "11\n",
      "deduplicate D D D D D D D D R R R R R R R R R K K K K i i i i i i i\tD R K i\n",
      "deduplicate D D D D D D D D R R R R R R R R R K K K K i i i i i i i____________________D R K i\n",
      "12\n",
      "deduplicate N N N N N N N M M M V V V V C C C C L L L L\tN M V C L\n",
      "deduplicate N N N N N N N M M M V V V V C C C C L L L L____________________N M V C L\n",
      "13\n",
      "deduplicate m m m m m m m m m m m m m m\tm\n",
      "deduplicate m m m m m m m m m m m m m m____________________m\n",
      "14\n",
      "deduplicate x x J J J J J J J J F F F F\tx J F\n",
      "deduplicate x x J J J J J J J J F F F F____________________x J F\n",
      "15\n",
      "deduplicate x x x x x\tx\n",
      "deduplicate x x x x x____________________x\n",
      "16\n",
      "deduplicate y y y y y y E E E E E E E H H H H H h h h h h h D D D\ty E H h D\n",
      "deduplicate y y y y y y E E E E E E E H H H H H h h h h h h D D D____________________y E H h D\n",
      "17\n",
      "deduplicate b b N S w I X X n d y w w Z Z Y Y\tb N S w I X n d y w Z Y\n",
      "deduplicate b b N S w I X X n d y w w Z Z Y Y____________________b N S w I X n d y w Z Y\n",
      "18\n",
      "deduplicate V I n J k\tV I n J k\n",
      "deduplicate V I n J k____________________V I n J k\n",
      "19\n",
      "deduplicate d C C I J p p L s\td C I J p L s\n",
      "deduplicate d C C I J p p L s____________________d C I J p L s\n",
      "20\n",
      "deduplicate u u u u u u u u y y y y y y y y y y y y T T T T T T T T T\tu y T\n",
      "deduplicate u u u u u u u u y y y y y y y y y y y y T T T T T T T T T____________________u y T\n",
      "21\n",
      "deduplicate R z z f f f Z T T O O O O C C g P F x K k\tR z f Z T O C g P F x K k\n",
      "deduplicate R z z f f f Z T T O O O O C C g P F x K k____________________R z f Z T O C g P F x K k\n",
      "22\n",
      "deduplicate E E E E t t t t i i i i i Z Z\tE t i Z\n",
      "deduplicate E E E E t t t t i i i i i Z Z____________________E t i Z\n",
      "23\n",
      "deduplicate o o w w T T W A a a A j j j V V Y Q X X d j l p p z\to w T W A a A j V Y Q X d j l p z\n",
      "deduplicate o o w w T T W A a a A j j j V V Y Q X X d j l p p z____________________o w T W A a A j V Y Q X d j l p z\n",
      "24\n",
      "deduplicate s s F H Y\ts F H Y\n",
      "deduplicate s s F H Y____________________s F H Y\n",
      "25\n",
      "deduplicate a U U j j W W W W b b b b b T T T H H\ta U j W b T H\n",
      "deduplicate a U U j j W W W W b b b b b T T T H H____________________a U j W b T H\n",
      "26\n",
      "deduplicate t p G G h h C C M M u u K v v j q u E E r r h D K g G B B U\tt p G h C M u K v j q u E r h D K g G B U\n",
      "deduplicate t p G G h h C C M M u u K v v j q u E E r r h D K g G B B U____________________t p G h C M u K v j q u E r h D K g G B U\n",
      "27\n",
      "deduplicate V x d M a W F D z b z I j Y Q d\tV x d M a W F D z b z I j Y Q d\n",
      "deduplicate V x d M a W F D z b z I j Y Q d____________________V x d M a W F D z b z I j Y Q d\n",
      "28\n",
      "deduplicate l d d Z Z q p V t t R R b e J x m v\tl d Z q p V t R b e J x m v\n",
      "deduplicate l d d Z Z q p V t t R R b e J x m v____________________l d Z q p V t R b e J x m v\n",
      "29\n",
      "deduplicate d Z o u a N N T b i i O O f z z H C C K r t R n C\td Z o u a N T b i O f z H C K r t R n C\n",
      "deduplicate d Z o u a N N T b i i O O f z z H C C K r t R n C____________________d Z o u a N T b i O f z H C K r t R n C\n",
      "30\n",
      "deduplicate A A N N N N N N N n n n W W F X X X Y p t t e h h h h\tA N n W F X Y p t e h\n",
      "deduplicate A A N N N N N N N n n n W W F X X X Y p t t e h h h h____________________A N n W F X Y p t e h\n",
      "31\n",
      "deduplicate Q Q Q p p p p s s s a K K Q G W I I\tQ p s a K Q G W I\n",
      "deduplicate Q Q Q p p p p s s s a K K Q G W I I____________________Q p s a K Q G W I\n",
      "32\n",
      "deduplicate x x g g g g l y v v S O O F\tx g l y v S O F\n",
      "deduplicate x x g g g g l y v v S O O F____________________x g l y v S O F\n",
      "33\n",
      "deduplicate c c c c c c c d d d d N N N t t t t g g g g g g g g g g\tc d N t g\n",
      "deduplicate c c c c c c c d d d d N N N t t t t g g g g g g g g g g____________________c d N t g\n",
      "34\n",
      "deduplicate j D f j t t R Q A B j Q T s F Y j t p F\tj D f j t R Q A B j Q T s F Y j t p F\n",
      "deduplicate j D f j t t R Q A B j Q T s F Y j t p F____________________j D f j t R Q A B j Q T s F Y j t p F\n",
      "35\n",
      "deduplicate e e e e e e e e e S S S S S S T T T T T\te S T\n",
      "deduplicate e e e e e e e e e S S S S S S T T T T T____________________e S T\n",
      "36\n",
      "deduplicate B v n e k a W e Y H R f p F D C S m Q B B V Z H s Z V J l y\tB v n e k a W e Y H R f p F D C S m Q B V Z H s Z V J l y\n",
      "deduplicate B v n e k a W e Y H R f p F D C S m Q B B V Z H s Z V J l y____________________B v n e k a W e Y H R f p F D C S m Q B V Z H s Z V J l y\n",
      "37\n",
      "deduplicate P P P P P\tP\n",
      "deduplicate P P P P P____________________P\n",
      "38\n",
      "deduplicate F F F P P P P P\tF P\n",
      "deduplicate F F F P P P P P____________________F P\n",
      "39\n",
      "deduplicate D l o o o G T T M M M R P P N N N E f s\tD l o G T M R P N E f s\n",
      "deduplicate D l o o o G T T M M M R P P N N N E f s____________________D l o G T M R P N E f s\n",
      "40\n",
      "deduplicate k k z w e N z F K a j j I A M V C e e S m m p l N y u L\tk z w e N z F K a j I A M V C e S m p l N y u L\n",
      "deduplicate k k z w e N z F K a j j I A M V C e e S m m p l N y u L____________________k z w e N z F K a j I A M V C e S m p l N y u L\n",
      "41\n",
      "deduplicate c c a a a a n n n n n n Z Z\tc a n Z\n",
      "deduplicate c c a a a a n n n n n n Z Z____________________c a n Z\n",
      "42\n",
      "deduplicate k L A f t y g v y m h L f C C e V A m\tk L A f t y g v y m h L f C e V A m\n",
      "deduplicate k L A f t y g v y m h L f C C e V A m____________________k L A f t y g v y m h L f C e V A m\n",
      "43\n",
      "deduplicate B B m E E b b b b o o J J J x x D D k k\tB m E b o J x D k\n",
      "deduplicate B B m E E b b b b o o J J J x x D D k k____________________B m E b o J x D k\n",
      "44\n",
      "deduplicate w w w w N N N\tw N\n",
      "deduplicate w w w w N N N____________________w N\n",
      "45\n",
      "deduplicate P t O O O z z Y Y Y n\tP t O z Y n\n",
      "deduplicate P t O O O z z Y Y Y n____________________P t O z Y n\n",
      "46\n",
      "deduplicate x x x x d d d U U U U r r r r r r k k k k r r r r X X D D D\tx d U r k r X D\n",
      "deduplicate x x x x d d d U U U U r r r r r r k k k k r r r r X X D D D____________________x d U r k r X D\n",
      "47\n",
      "deduplicate U z G v v Q L l G\tU z G v Q L l G\n",
      "deduplicate U z G v v Q L l G____________________U z G v Q L l G\n",
      "48\n",
      "deduplicate M c c I\tM c I\n",
      "deduplicate M c c I____________________M c I\n",
      "49\n",
      "deduplicate r r r r r V V Y Y Y m m m m m m S S S S g g g l l\tr V Y m S g l\n",
      "deduplicate r r r r r V V Y Y Y m m m m m m S S S S g g g l l____________________r V Y m S g l\n"
     ]
    }
   ],
   "source": [
    "with open('lime_datasets/deduplicate/count2_30_0M.txt', 'r') as f:\n",
    "    for c, l in enumerate(f):\n",
    "        print(c)\n",
    "        print(l[:-1])\n",
    "        print(l[:-1].replace('\\t', '____________________'))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
