{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>.container { width:100% !important; }</style>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%load_ext autoreload\n",
    "%reload_ext autoreload\n",
    "%autoreload 2\n",
    "from IPython.core.display import display, HTML\n",
    "display(HTML(\"<style>.container { width:100% !important; }</style>\"))\n",
    "import json\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from prototypical_tasks import *\n",
    "from prototypical_tasks import _data_to_str, _str_to_data\n",
    "from prototypical_tasks import UPPER_AND_LOWER_LETTERS"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* Original paper code data gen params: \n",
    "    * mean_numsents=10 -> \"sampling uniformly from 7 to 13 sentences\"\n",
    "    * mean_sentlen=10 -> \"sampling uniformly from 5 to 15 words\" \n",
    "* My code additional params\n",
    "    * length_range\n",
    "        * Does not affect actual data sampling. Code just resamples until it gets data whose input has number of nonspace chars within length_range[0] and length_range[1] inclusive.\n",
    "        * Example: if length_range = (0,512), no generated data will be greater than 512 \n",
    "    * vocab_size, chars_per_word, char_set\n",
    "        * In original paper code, sentences consist of words from vocabulary of size (vocab_size=5000) with (chars_per_word=3) chars per word. The vocabulary is: \"first 5000 3-letter character combinations using the English alphabet in lexical order starting from the right (aaa, baa, caa, ..., aab, bab, ...)\"\n",
    "        * char_set = possible chars that words could consist of. \n",
    "        * In original paper code, words consist of lower case letters only\n",
    "    * In my code, if vocab_size chars_per_word and char_set are all not None, then the data is such that:\n",
    "        * Sentences consist of words from vocabulary of size (vocab_size) with (chars_per_word) chars per word\n",
    "        * Each word in the vocabulary is  made by randomly sampling chars from char_set\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "from prototypical_tasks import summarization_tasks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['summ_CopyKeywordOneSentence',\n",
       " 'summ_CopyKeywordMultipleSentences',\n",
       " 'summ_CopyKeywordMultipleSentencesShuffled',\n",
       " 'summ_CopyKeywordMultipleSentencesSorted',\n",
       " 'summ_CopyQuoted',\n",
       " 'summ_CopyBulleted',\n",
       " 'summ_ClassifyKeyword',\n",
       " 'summ_ReplaceClassKeyword',\n",
       " 'summ_MajorityKeyword',\n",
       " 'summ_TopicSegregation',\n",
       " 'summ_ThresholdNumber',\n",
       " 'summ_LargestNumber',\n",
       " 'summ_ParaphraseWords',\n",
       " 'summ_JoinClauses',\n",
       " 'summ_BreakClauses',\n",
       " 'summ_TruncateSentence']"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "summarization_tasks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "CopyKeywordOneSentence\n",
      "___\n",
      "J k l y , L r F O , F M i k , A i l M , x o R n , X f e j , v r H Z , P D j T , i W s V , X A L S , T f R h , S z k N , . y g I V , i F E f , E k y K , E h a T , M J l Q , f P k E , q v N y , V t n a , . g u q A , p f S v , k O P k , n A i A , b U v B , i b L a , h F F P , z G Q y , i L t r , S V c v , l x U s , . J L b h , g P O G , t j v p , w S w t , X f p L , N A s j , . i O R S , d O p B , g m K s , i S t x , Q h h I , a d C m , Q Q f S , z H i B , n j x Z , y e A S , p K S y , . R O S G , i W s V , H t G r , T U x N , K w X s , n B P r , T R M u , V C s N , v c q e , l x U s , J b B W , t G M e , w h O c , F U f z , H Y Y s , . X a L F , R e o l , U b J a , _ _ d 3 _ _ k e y w o r d _ 2 _ _ , u V j B , a h m H , W S m P , k O P k , s H q t , X i E H , C Z i D , D O r K , a T l p , y S L b , . P D j T , Z b E j , T d A G , d s u I , u W G p , u Q V F , e L K y , b Y Y Y , Z n n Q , W S m P , S D y S , B l W O , X N g M , U W f Y , .\n",
      "___\n",
      "X a L F , R e o l , U b J a , _ _ d 3 _ _ k e y w o r d _ 2 _ _ , u V j B , a h m H , W S m P , k O P k , s H q t , X i E H , C Z i D , D O r K , a T l p , y S L b , .\n",
      "\n",
      "\n",
      "CopyKeywordMultipleSentences\n",
      "___\n",
      "H e D l , i L t r , x X Z E , X M N K , D D q w , u B K H , T U h v , q Z T k , E h a T , . q q t q , Y u V X , I f t Q , E h A z , B n a T , . f Z w S , E O y J , G E M T , P f D l , C W w Z , k W i e , N v I M , P n c S , d d d u , . j m y J , y g I V , H Y Y s , J L b h , h k S g , K q O o , u r r M , p x C P , D O O u , a N F l , Q j U Q , . n w F c , b P b M , t P X P , f T A n , T U h v , j m y J , o D l p , z h s n , E x G q , _ _ d 4 _ _ k e y w o r d _ 9 _ _ , i P I P , . k W i e , Z b E j , Z n n Q , u W G p , _ _ d 4 _ _ k e y w o r d _ 1 _ _ , . o v P V , q q t q , _ _ d 4 _ _ k e y w o r d _ 7 _ _ , x u S K , y I h V , U m M r , .\n",
      "___\n",
      "n w F c , b P b M , t P X P , f T A n , T U h v , j m y J , o D l p , z h s n , E x G q , _ _ d 4 _ _ k e y w o r d _ 9 _ _ , i P I P , . k W i e , Z b E j , Z n n Q , u W G p , _ _ d 4 _ _ k e y w o r d _ 1 _ _ , . o v P V , q q t q , _ _ d 4 _ _ k e y w o r d _ 7 _ _ , x u S K , y I h V , U m M r , .\n",
      "\n",
      "\n",
      "CopyKeywordMultipleSentencesShuffled\n",
      "___\n",
      "t L C N , Q k U B , d Y E U , m h N b , w o T K , i U b R , T Z G w , V D f I , k O P k , . d d d u , Y q p N , C p p O , E x G q , _ _ d 5 _ _ k e y w o r d _ 8 _ _ , X a L F , p q O G , U y L D , . I g S I , d s u I , K s b t , O s R c , g y W p , C f i c , C z G D , S E x P , p K S y , W r n J , _ _ d 5 _ _ k e y w o r d _ 3 _ _ , s s L y , . m T W D , z j S C , N J y y , t q G b , y i T r , X q L K , Z A E a , z S C a , C Q Y F , z j S C , N E K C , p K S y , E O y J , . b a i I , x M O F , Q w e A , Z m u X , I f t Q , p b L R , D O O u , q v N y , H C n K , Q C S y , f k u z , u l y z , b e z G , O k H D , A i l M , . g y W p , o K Y K , r X C G , B n O P , g L l A , h k S g , X r y F , _ _ d 5 _ _ k e y w o r d _ 8 _ _ , E x G q , . I g S I , P q C V , z u o O , K s b t , y M K u , z K h v , T C x O , z R a x , B x q i , w S w t , A l s m , P D j T , h h I X , .\n",
      "___\n",
      "d d d u , Y q p N , C p p O , E x G q , _ _ d 5 _ _ k e y w o r d _ 8 _ _ , X a L F , p q O G , U y L D , . I g S I , d s u I , K s b t , O s R c , g y W p , C f i c , C z G D , S E x P , p K S y , W r n J , _ _ d 5 _ _ k e y w o r d _ 3 _ _ , s s L y , . g y W p , o K Y K , r X C G , B n O P , g L l A , h k S g , X r y F , _ _ d 5 _ _ k e y w o r d _ 8 _ _ , E x G q , .\n",
      "\n",
      "\n",
      "CopyKeywordMultipleSentencesSorted\n",
      "___\n",
      "N V V g , _ _ d 6 _ _ k e y w o r d _ 6 _ _ , x h j B , k W i e , J k l y , . x M O F , o N T I , Z m u X , s G z k , _ _ d 6 _ _ k e y w o r d _ 8 _ _ , t q G b , J b B W , O s y z , N h v g , C R P N , . K B Q a , v Y p E , l x U s , h m D g , B n a T , D O O u , Y I I b , B C K o , . F q z c , t r G j , e y B d , X f e j , N T E J , x t K r , R d j W , Q C S y , C p p O , w P o j , L r F O , . F R Q w , u B K H , f W v b , c C Q j , t j v p , O C B S , l x U s , D G Z x , . M S d J , N v I M , C p p O , o K Y K , x r d a , v r H Z , _ _ d 6 _ _ k e y w o r d _ 5 _ _ , Q j U Q , X i E H , R u w b , R X H r , J z k w , B x q i , . y S L b , e O U D , g y W p , w j t W , O s y z , u r r M , C O N q , i P I P , q P E Q , .\n",
      "___\n",
      "M S d J , N v I M , C p p O , o K Y K , x r d a , v r H Z , _ _ d 6 _ _ k e y w o r d _ 5 _ _ , Q j U Q , X i E H , R u w b , R X H r , J z k w , B x q i , . N V V g , _ _ d 6 _ _ k e y w o r d _ 6 _ _ , x h j B , k W i e , J k l y , . x M O F , o N T I , Z m u X , s G z k , _ _ d 6 _ _ k e y w o r d _ 8 _ _ , t q G b , J b B W , O s y z , N h v g , C R P N , .\n",
      "\n",
      "\n",
      "CopyQuoted\n",
      "___\n",
      "Q h h I , G S d J , e y B d , h F F P , I g S I , T p E P , u B K H , P f D l , . V t n a , Z O t N , f K a A , x m t m , F f q f , i I U o , m s n d , o N T I , h F F P , O h I v , H r O r , W S m P , V D f I , _ _ d 7 _ _ b e g i n _ q u o t e _ _ , u B K H , _ _ d 7 _ _ e n d _ q u o t e _ _ , . P D j T , q P E Q , g m K s , b E R f , m T W D , p h c H , . X r y F , E k j T , h h I X , n x X Z , q u V J , Z n n Q , V B C s , n x X Z , d u N x , y g I V , I N W F , g k R p , T p E P , T R M u , . e W D u , S E x P , X q L K , L r F O , h h I X , a d C m , p q O G , E s D p , x t K r , S z k N , H m l Y , k c B p , . b n m p , Z m u X , K B Q a , q q t q , w V s V , . s t E H , X r y F , O t T j , J z k w , k c B p , k O P k , b O g U , n x X Z , O s y z , . h m D g , E E H x , e u B P , s w f L , x r d a , k c B p , . I b m o , k k j p , Z t x E , Q k U B , r m Y k , i L v Q , z S C a , U b J a , n w F c , W r n J , b O g U , t W s p , N v I M , .\n",
      "___\n",
      "u B K H , .\n",
      "\n",
      "\n",
      "CopyBulleted\n",
      "___\n",
      "I g Q T , S V c v , I z i a , a U d w , J I g V , C X m N , O t T j , O s R c , J F H a , T Z G w , q y M Y , U G v G , h m D g , . _ _ d 8 _ _ b u l l e t _ _ , X N g M , i P I P , a D b b , N h v g , U o R S , p f S v , W g h J , Z A E a , z p h L , O N l v , u G r t , C S U o , R p O H , p h c H , . T b A D , x U A h , p f S v , i L v Q , K u g x , T d A G , e I x G , Z O t N , M J l Q , Z O t N , q q t q , y I h V , s t E H , h F F P , . i U b R , H c h G , S E x P , N A s j , O D n X , l L t l , Z O t N , V G l L , k p P g , . S K T P , N u L X , b n m p , r x i Z , d B g a , . x m t m , x o R n , d d d u , h D o y , u r r M , K w X s , V G l L , H b E H , R e o l , q Y C g , R u w b , . i U b R , E k y K , d u N x , V B C s , a M P o , C L E x , L r F O , P f D l , I f t Q , w V s V , i s x t , u Q V F , . u G r t , w L p Q , S x m W , b r c a , q y M Y , J M G i , q Z T k , g P O G , T p E P , U o R S , T Z G w , D p G Q , .\n",
      "___\n",
      "X N g M , i P I P , a D b b , N h v g , U o R S , p f S v , W g h J , Z A E a , z p h L , O N l v , u G r t , C S U o , R p O H , p h c H , .\n",
      "\n",
      "\n",
      "ClassifyKeyword\n",
      "___\n",
      "Y o L m , _ _ d 1 0 _ _ k e y w o r d _ 3 _ _ , n L c d , r X C G , S E x P , e o i B , h p J H , e r o h , n K Z E , x h j B , . i c u q , x m t m , c g L S , S K T P , h D o y , . b x u d , P q C V , d s u I , E h a T , h F F P , I g Q T , i O R S , . f V G v , y e A S , i U K h , d s T x , H b E H , X i E H , W g h J , p q O G , t r G j , B C G g , T U x N , U W f Y , E h A z , . g k R p , R u w b , p U j Z , o N T I , y I h V , . C m z x , j f B B , v Y p E , N j W U , a N F l , . l L t l , x m t m , X f p L , d B g a , H C n K , v r H Z , f P k E , R y T q , x P b a , p b L R , . F R Q w , h h J J , k k j p , z h s n , B n O P , l L t l , C p p O , S c l T , .\n",
      "___\n",
      "t h e , k e y w o r d , w a s , p o s i t i v e , .\n",
      "\n",
      "\n",
      "ReplaceClassKeyword\n",
      "___\n",
      "p L h o , n Q Z K , x w M L , z g v O , T U h v , y C k o , Z q N w , t W s p , y i T r , I Q f H , j f B B , q q t q , H f r z , N j W U , . h D o y , W K u m , U o R S , D p G Q , e p l V , s w f L , Y m g G , s D z u , A l s m , J L b h , K k M x , n B P r , n A i A , p q O G , . C X m N , t P X P , V E G r , F q z c , l a f o , E E H x , e o i B , B l W O , C p p O , . Q w e L , Z b E j , v Y p E , B C G g , i E B q , t P X P , X r y F , . T p E P , Q h h I , z u o O , S z k N , X N g M , x u S K , H m l Y , n j x Z , X v V R , . x h j B , T U h v , X A L S , A l s m , p h l X , _ _ d 1 1 _ _ c l a s s _ 0 _ k w _ 2 _ _ , Q k U B , D D q w , s t E H , s G z k , X v V R , W S m P , R O S G , h m D g , . v v d e , L r F O , J Z V a , B u J v , d Y E U , X q L K , .\n",
      "___\n",
      "x h j B , T U h v , X A L S , A l s m , p h l X , _ _ d 1 1 _ _ c l a s s _ 0 _ _ , Q k U B , D D q w , s t E H , s G z k , X v V R , W S m P , R O S G , h m D g , .\n",
      "\n",
      "\n",
      "MajorityKeyword\n",
      "___\n",
      "T f R h , q s U J , _ _ d 1 2 _ _ k e y w o r d _ 1 _ _ , Z t x E , r x i Z , V U Z H , T L f H , t L C N , s b r h , s m j r , p h l X , I L Q G , v r H Z , e o V O , O V V V , . c t N M , S c t m , N A s j , y e A S , p h l X , _ _ d 1 2 _ _ k e y w o r d _ 1 _ _ , . p x C P , H f r z , d u N x , S E x P , _ _ d 1 2 _ _ k e y w o r d _ 1 _ _ , F J o v , . b R N W , g m K s , J M G i , _ _ d 1 2 _ _ k e y w o r d _ 2 _ _ , c g L S , e u m y , D p G Q , P T D V , t q G b , . z p h L , I N W F , E x G q , z G Q y , C f i c , s c N M , i W s V , X M N K , h p J H , Z O t N , A l s m , . t G M e , _ _ d 1 2 _ _ k e y w o r d _ 1 _ _ , v E A e , I f t Q , X a L F , h k S g , b x u d , . l w E b , t G M e , w o T K , R y T q , X S G T , c t N M , .\n",
      "___\n",
      "T h e , k e y w o r d , 1 , o c c u r r e d , m o r e , n u m b e r , o f , t i m e s , .\n",
      "\n",
      "\n",
      "TopicSegregation\n",
      "___\n",
      "y M K u , Z t x E , k K K n , O V V V , C X m N , J K D M , . z H i B , C E t T , g y W p , t P X P , i c u q , P f D l , u l y z , o K Y K , X a L F , X V Q K , J z z G , Z O t N , c l g D , J U G s , . i L v Q , L V F u , w n J F , p K S y , C X m N , V t n a , O s y z , _ _ d 1 3 _ _ c l a s s _ 2 _ k w _ 0 _ _ , B l W O , p x C P , d Y E U , m y u H , . p x C P , O D n X , d s T x , b G Z w , G a U E , N y V Q , s D z u , N A s j , . t L C N , L m x o , d B g a , t q G b , o X O S , . m p k K , t Q W l , F R Q w , _ _ d 1 3 _ _ c l a s s _ 0 _ k w _ 0 _ _ , I g Q T , s c N M , i c u q , Z O t N , F U f z , g M S s , . P r m U , Q w e L , b P b M , Z z I V , z g v O , X v V R , f K a A , G E M T , p q O G , h D q Y , S E x P , . b R N W , v v d e , W g h J , C Y Y y , T L f H , I z i a , b x s p , b x u d , . h d Z j , E O y J , V V z B , I b m o , W S m P , b L k Z , O k H D , Z b E j , p q O G , M S d J , . l W f Q , t Q W l , b G Z w , x U A h , l E x M , I g Q T , Q h h I , r I G N , e u B P , .\n",
      "___\n",
      "s e c t i o n , _ _ d 1 3 _ _ c l a s s _ 0 _ _ m p k K , t Q W l , F R Q w , _ _ d 1 3 _ _ c l a s s _ 0 _ k w _ 0 _ _ , I g Q T , s c N M , i c u q , Z O t N , F U f z , g M S s , . s e c t i o n , _ _ d 1 3 _ _ c l a s s _ 1 _ _ s e c t i o n , _ _ d 1 3 _ _ c l a s s _ 2 _ _ i L v Q , L V F u , w n J F , p K S y , C X m N , V t n a , O s y z , _ _ d 1 3 _ _ c l a s s _ 2 _ k w _ 0 _ _ , B l W O , p x C P , d Y E U , m y u H , .\n",
      "\n",
      "\n",
      "ThresholdNumber\n",
      "___\n",
      "F f q f , I L Q G , I V k o , h f n x , o X O S , . T b A D , D I v c , Z A E a , t G M e , I L Q G , u G r t , . L m x o , n L c d , V E G r , o D l p , C S U o , N j W U , z H i B , w V s V , X r y F , i s x t , b x u d , O k H D , . N v I M , b Y Y Y , _ _ d 1 4 _ _ 4 1 _ _ , i N A T , K w X s , N E K C , S V c v , b E R f , x o e I , F U f z , . p b L R , Y M g M , W O W N , f P k E , m J Q h , R O S G , X d O T , J b B W , j d U A , p q O G , . Z O t N , z Z c T , z u o O , h p J H , r Y j q , g P O G , y C k o , H m l Y , p q O G , D I v c , O V V V , C A O x , . w n J F , h R F E , U o R S , B C K o , U W f Y , n x X Z , X r y F , U S X L , h D q Y , I t Y m , F R Q w , S V c v , Z t x E , g I U u , y g I V , .\n",
      "___\n",
      "t h e , n u m b e r , w a s , b e l o w , t h r e s h o l d , .\n",
      "\n",
      "\n",
      "LargestNumber\n",
      "___\n",
      "o N T I , m B c V , p b L R , N u L X , v r H Z , . K P j W , w o T K , C z G D , M S d J , _ _ d 1 6 _ _ 3 1 _ _ , L k F T , . k k j p , k c B p , s c N M , E M h w , L V F u , b Y Y Y , s s L y , b G Z w , e W D u , i W s V , H e D l , Q C S y , x w M L , g m K s , . n p Q W , _ _ d 1 6 _ _ 6 3 _ _ , L r F O , A l s m , X i E H , t L C N , . o v P V , i L t r , b E R f , T L F h , C z g j , G S d J , . t j v p , e p l V , s c N M , a U d w , j m y J , Y u V X , o v P V , s b r h , A l s m , z G Q y , V E G r , p b C G , p h l X , e L K y , v r H Z , . D G Z x , _ _ d 1 6 _ _ 3 7 _ _ , L m x o , l w E b , f W v b , . h k S g , p K S y , g P O G , X i E H , V G l L , P D j T , k Z f E , V E G r , O V V V , V B C s , o v P V , K B Q a , a U d w , F R Q w , e O U D , . P g e Y , p U j Z , X A L S , D G Z x , e o V O , X r y F , i N A T , P D j T , n x X Z , X i E H , H c h G , v Y p E , K B Q a , .\n",
      "___\n",
      "T h e , l a r g e s t , n u m b e r , i s , 6 3 , .\n",
      "\n",
      "\n",
      "ParaphraseWords\n",
      "___\n",
      "h f n x , h D o y , k y Q W , G S d J , u W G p , r x i Z , . h F F P , g w u a , w o T K , w S w t , t G M e , d Y E U , X j T D , A l s m , p k Z O , b B b B , . H Y Y s , Y q p N , k p P g , Z m p I , O N l v , X j T D , e o V O , J k l y , . B T S Y , C z g j , q Z T k , Z b E j , s w f L , L m x o , . q p O O , b R N W , v c q e , V B C s , d u N x , . q O E y , w n J F , W t q R , T R M u , t W s p , I c s o , . z G Q y , i b L a , H t G r , H r O r , Z A E a , b r c a , o d u p , N T O p , u G F q , O k H D , J b B W , F U f z , i b L a , . B n a T , L C v f , _ _ d 1 8 _ _ s r c _ 1 2 _ _ , s Z K H , P g e Y , q N U p , w m O f , T U x N , Y o L m , i I U o , e L K y , y e A S , Y U p j , N J y y , H b E H , . z R a x , h F z A , x r d a , E s D p , q y M Y , C W w Z , b L k Z , J M G i , D O O u , Z m G g , . e u m y , C E t T , W K u m , G n p E , J M G i , u V j B , N A s j , g P O G , I g Q T , I Q f H , a d C m , B Y J d , . I Q f H , D G Z x , r l Y x , G S d J , b U v B , f Z w S , .\n",
      "___\n",
      "B n a T , L C v f , _ _ d 1 8 _ _ t a r g e t _ 1 2 _ 1 _ _ , s Z K H , P g e Y , q N U p , w m O f , T U x N , Y o L m , i I U o , e L K y , y e A S , Y U p j , N J y y , H b E H , .\n",
      "\n",
      "\n",
      "JoinClauses\n",
      "___\n",
      "e y B d , M g t q , k Z f E , b a i I , k U P O , . b U v B , f k u z , F q z c , f k u z , t P X P , g w u a , n W I O , y I h V , C O N q , . _ _ d 1 9 _ _ j o i n _ 1 _ _ , J z z G , Z z I V , k Z f E , V C s N , v Y p E , L k F T , a N F l , T d A G , v v d e , . X r y F , d s u I , P T D V , r x i Z , i U b R , U S X L , B C K o , o d u p , S z k N , k W i e , w P o j , . D O O u , I f t Q , r I G N , u l y z , C z g j , _ _ d 1 9 _ _ j o i n _ 3 _ _ , b x u d , . I b m o , j f B B , m h N b , g M S s , B n O P , T U h v , u B K H , k k j p , . i P I P , h m D g , D I v c , h d Z j , X v V R , _ _ d 1 9 _ _ j o i n _ 4 _ _ , X a L F , R y T q , R y T q , Q D K o , f T A n , y C p D , I N W F , . R e o l , a M P o , s b r h , b B b B , i c u q , X N g M , x M O F , J T i D , z R a x , . X i E H , o K Y K , x o R n , r J K a , r J K a , N J y y , X j T D , . Q A c J , Z n n Q , p R z c , h h J J , b O g U , X A L S , A l s m , . R O S G , s m j r , T L f H , x U A h , R u w b , u l y z , e I x G , .\n",
      "___\n",
      "T h e , c o m b i n e d , s e n t e n c e , i s , _ _ d 1 9 _ _ j o i n _ 1 _ _ , J z z G , Z z I V , k Z f E , V C s N , v Y p E , L k F T , a N F l , T d A G , v v d e , _ _ d 1 9 _ _ j o i n _ 3 _ _ , b x u d , _ _ d 1 9 _ _ j o i n _ 4 _ _ , X a L F , R y T q , R y T q , Q D K o , f T A n , y C p D , I N W F , .\n",
      "\n",
      "\n",
      "BreakClauses\n",
      "___\n",
      "q Z T k , X v l s , d u N x , C z g j , i L t r , . r X C G , F f q f , t r G j , K u g x , l x U s , . I Q f H , y i T r , q O E y , C A O x , a U d w , N V V g , p f S v , b B b B , H Y Y s , b L k Z , a h C c , z Z c T , Q C S y , i S t x , . h R F E , Q h h I , e I x G , r I G N , s s L y , . N u L X , G a U E , C R P N , m s n d , i O R S , p p W X , C S U o , . I c s o , D G Z x , E E H x , E M h w , T L f H , P f D l , D G Z x , C f i c , n W I O , f W v b , . X v V R , C z G D , Y u V X , X f e j , _ _ d 2 0 _ _ j o i n _ 1 _ _ , k Z f E , i c u q , f P k E , _ _ d 2 0 _ _ j o i n _ 4 _ _ , j d U A , V t n a , l w E b , G n p E , k k j p , . F o o c , Y I I b , q q t q , C L E x , y C k o , X A L S , g m K s , G a U E , I z i a , n Q Z K , S V c v , i U K h , . w N E w , Q w e L , V t n a , b n m p , F o o c , X S G T , k k j p , s D z u , O s y z , z G Q y , t P X P , e p l V , k y Q W , i L t r , f Z w S , .\n",
      "___\n",
      "X v V R , C z G D , Y u V X , X f e j , _ _ d 2 0 _ _ j o i n _ 1 _ _ , k Z f E , i c u q , f P k E , . X v V R , C z G D , Y u V X , X f e j , _ _ d 2 0 _ _ j o i n _ 4 _ _ , j d U A , V t n a , l w E b , G n p E , k k j p , .\n",
      "\n",
      "\n",
      "TruncateSentence\n",
      "___\n",
      "U q Y Z , M J l Q , E x G q , i U b R , z Z c T , W K u m , i s x t , m T W D , p R z c , q O E y , N j W U , v r H Z , Y o L m , i U b R , C R P N , . g k R p , X f p L , p f S v , B x q i , H M K U , W U h K , X v V R , u W G p , p U j Z , h t R g , O s R c , J L b h , b R N W , n A i A , . m h N b , n j x Z , U W f Y , Z z I V , V D f I , Q w e L , k y Q W , E g X v , K u g x , H t G r , V D f I , B n O P , F e F Y , v E A e , . h F F P , X v l s , w P o j , f V G v , m p k K , . u B K H , b x s p , D I v c , q v N y , z S C a , b x u d , G E M T , S K T P , . d Y E U , N V V g , Z m p I , o D l p , i O R S , T L F h , n j x Z , b r c a , B Y J d , O C B S , L m x o , . M S d J , R X H r , _ _ d 2 1 _ _ k e y w o r d _ 8 _ _ , u B K H , h D q Y , K w X s , b a i I , O V V V , T R M u , . y C k o , x u S K , T R M u , Q j U Q , m s n d , u Q V F , C Y Y y , h p J H , k K K n , W U h K , K s b t , h h I X , .\n",
      "___\n",
      "M S d J , R X H r , .\n"
     ]
    }
   ],
   "source": [
    "from summarization_tasks_datagen.ourtasks import gen_one_summ_data, GENERATOR, BaseGenerator\n",
    "GENERATOR = BaseGenerator()\n",
    "for t in summarization_tasks:\n",
    "    print('\\n')\n",
    "    print(t)\n",
    "#     d = gen_one_summ_data(t, (0,512), UPPER_AND_LOWER_LETTERS, 500, 4, 10, 10)\n",
    "    d = gen_one_summ_data(t, (0,512), None, None, None, 10, 10)\n",
    "\n",
    "    print('___')\n",
    "    print(d[0])\n",
    "    print('___')\n",
    "    print(d[1])\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'ClpZ,jncf,URJE,zNvp,RxqJ,.NGvW,DvVP,__d21__keyword_5__,ClpZ,miRj,FmcO,mCMf,TdAz,obGn,.dgan,Xwvc,nOrq,IIJL,guZw,brLN,ZdfU,BrUX,HKUd,hxzU,.izIh,FjaO,pkPm,XCVp,KfXI,jxsl,.zilo,ADfx,ddDd,IcIV,QUEs,UsoP,mvdc,KfXI,glUs,SHgq,JByA,FYtq,jNql,Xwvc,kXSp,.CCtD,rMIo,bSob,wXMR,svMg,VxYg,.bVXo,RSuc,ubDq,svMg,kyaQ,idXm,TcII,eUjZ,.kXSp,zFuZ,zCmx,Jzhv,VSdd,zCmx,IadT,SEQm,dTeK,TaJE,XCVp,JByA,.'"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d[0].replace(' ', '')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'m u c , j l a , y h d , h d f , d x a , s b b , w i g , x l d , . r w e , t a d , r v e , f d a , a o e , f w a , n e c , t b g , . z n g , l i d , g i b , r o e , e g a , y q f , o k e , y y e , u b f , . y r a , h k g , q k g , e f h , o i h , w q f , j w g , t k e , k s c , y a f , f h a , . q p d , g t e , l c c , w l g , g y b , a f c , d i a , w s c , z a h , t e d , c e c , i t g , p p b , d v g , v q e , . d a a , z d c , z a e , p r a , k s d , u j f , _ _ d 2 1 _ _ k e y w o r d _ 6 _ _ , l z a , d z b , f q c , h q a , . u q g , i l b , p w b , g r b , c h h , s r a , j h d , e a b , e z b , w q a , b j h , p u c , r a h , . x d c , d o d , r j a , v q c , n s e , u p b , . z o f , s x b , s b h , c l c , g l f , w w a , m w d , p m f , r v b , b q e , g x f , o x g , . n y a , m p f , u v g , u z g , o v c , c l b , . t i e , e v f , f n d , k a a , z l c , .'"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "' '.join(d[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
