Search.setIndex({docnames:["agents","code","experiments","index","mdp","overview","planning","tasks","tutorial","utils"],envversion:52,filenames:["agents.rst","code.rst","experiments.rst","index.rst","mdp.rst","overview.rst","planning.rst","tasks.rst","tutorial.rst","utils.rst"],objects:{"":{simple_rl:[5,0,0,"-"]},"simple_rl.agents":{AgentClass:[0,0,0,"-"],BeliefAgentClass:[0,0,0,"-"],DelayedQAgentClass:[0,0,0,"-"],DoubleQAgentClass:[0,0,0,"-"],FixedPolicyAgentClass:[0,0,0,"-"],QLearningAgentClass:[0,0,0,"-"],RMaxAgentClass:[0,0,0,"-"],RandomAgentClass:[0,0,0,"-"]},"simple_rl.agents.AgentClass":{Agent:[0,1,1,""]},"simple_rl.agents.AgentClass.Agent":{act:[0,2,1,""],end_of_episode:[0,2,1,""],get_name:[0,2,1,""],get_parameters:[0,2,1,""],policy:[0,2,1,""],reset:[0,2,1,""],set_name:[0,2,1,""]},"simple_rl.agents.BeliefAgentClass":{BeliefAgent:[0,1,1,""]},"simple_rl.agents.BeliefAgentClass.BeliefAgent":{act:[0,2,1,""],policy:[0,2,1,""]},"simple_rl.agents.DelayedQAgentClass":{DelayedQAgent:[0,1,1,""]},"simple_rl.agents.DelayedQAgentClass.DelayedQAgent":{act:[0,2,1,""],end_of_episode:[0,2,1,""],get_action_distr:[0,2,1,""],get_max_q_action:[0,2,1,""],get_max_q_value:[0,2,1,""],get_parameters:[0,2,1,""],get_q_value:[0,2,1,""],greedy_q_policy:[0,2,1,""],reset:[0,2,1,""],set_q_function:[0,2,1,""],set_vmax:[0,2,1,""],update:[0,2,1,""]},"simple_rl.agents.DoubleQAgentClass":{DoubleQAgent:[0,1,1,""]},"simple_rl.agents.DoubleQAgentClass.DoubleQAgent":{act:[0,2,1,""],get_avg_q_value:[0,2,1,""],get_max_q_action:[0,2,1,""],get_max_q_value:[0,2,1,""],get_q_value:[0,2,1,""],reset:[0,2,1,""],update:[0,2,1,""]},"simple_rl.agents.FixedPolicyAgentClass":{FixedPolicyAgent:[0,1,1,""]},"simple_rl.agents.FixedPolicyAgentClass.FixedPolicyAgent":{NAME:[0,3,1,""],act:[0,2,1,""],set_policy:[0,2,1,""]},"simple_rl.agents.QLearningAgentClass":{QLearningAgent:[0,1,1,""]},"simple_rl.agents.QLearningAgentClass.QLearningAgent":{act:[0,2,1,""],end_of_episode:[0,2,1,""],epsilon_greedy_q_policy:[0,2,1,""],get_action_distr:[0,2,1,""],get_max_q_action:[0,2,1,""],get_max_q_value:[0,2,1,""],get_parameters:[0,2,1,""],get_q_value:[0,2,1,""],get_value:[0,2,1,""],reset:[0,2,1,""],soft_max_policy:[0,2,1,""],update:[0,2,1,""]},"simple_rl.agents.RMaxAgentClass":{RMaxAgent:[0,1,1,""]},"simple_rl.agents.RMaxAgentClass.RMaxAgent":{act:[0,2,1,""],get_max_q_action:[0,2,1,""],get_max_q_value:[0,2,1,""],get_num_known_sa:[0,2,1,""],get_q_value:[0,2,1,""],is_known:[0,2,1,""],reset:[0,2,1,""],update:[0,2,1,""]},"simple_rl.agents.RandomAgentClass":{RandomAgent:[0,1,1,""]},"simple_rl.agents.RandomAgentClass.RandomAgent":{act:[0,2,1,""]},"simple_rl.experiments":{ExperimentClass:[2,0,0,"-"],ExperimentParametersClass:[2,0,0,"-"]},"simple_rl.experiments.ExperimentClass":{Experiment:[2,1,1,""]},"simple_rl.experiments.ExperimentClass.Experiment":{EXP_PARAM_FILE_NAME:[2,3,1,""],FULL_EXP_FILE_NAME:[2,3,1,""],RESULTS_DIR:[2,3,1,""],add_experience:[2,2,1,""],end_of_episode:[2,2,1,""],end_of_instance:[2,2,1,""],get_agent_avg_cumulative_rew:[2,2,1,""],make_plots:[2,2,1,""],write_datum_to_file:[2,2,1,""],write_exp_info_to_file:[2,2,1,""]},"simple_rl.experiments.ExperimentParametersClass":{ExperimentParameters:[2,1,1,""]},"simple_rl.mdp":{MDPClass:[4,0,0,"-"],MDPDistributionClass:[4,0,0,"-"],StateClass:[4,0,0,"-"]},"simple_rl.mdp.MDPClass":{MDP:[4,1,1,""]},"simple_rl.mdp.MDPClass.MDP":{end_of_instance:[4,2,1,""],execute_agent_action:[4,2,1,""],get_actions:[4,2,1,""],get_curr_state:[4,2,1,""],get_gamma:[4,2,1,""],get_init_state:[4,2,1,""],get_num_state_feats:[4,2,1,""],get_parameters:[4,2,1,""],get_reward_func:[4,2,1,""],get_slip_prob:[4,2,1,""],get_transition_func:[4,2,1,""],reset:[4,2,1,""],set_gamma:[4,2,1,""],set_slip_prob:[4,2,1,""],set_step_cost:[4,2,1,""]},"simple_rl.mdp.MDPDistributionClass":{MDPDistribution:[4,1,1,""],main:[4,4,1,""]},"simple_rl.mdp.MDPDistributionClass.MDPDistribution":{get_actions:[4,2,1,""],get_all_mdps:[4,2,1,""],get_average_reward_func:[4,2,1,""],get_gamma:[4,2,1,""],get_horizon:[4,2,1,""],get_init_state:[4,2,1,""],get_mdps:[4,2,1,""],get_num_mdps:[4,2,1,""],get_parameters:[4,2,1,""],get_prob_of_mdp:[4,2,1,""],get_reward_func:[4,2,1,""],remove_mdp:[4,2,1,""],remove_mdps:[4,2,1,""],sample:[4,2,1,""],set_gamma:[4,2,1,""]},"simple_rl.mdp.StateClass":{State:[4,1,1,""]},"simple_rl.mdp.StateClass.State":{features:[4,2,1,""],get_data:[4,2,1,""],get_num_feats:[4,2,1,""],is_terminal:[4,2,1,""],set_terminal:[4,2,1,""]},"simple_rl.planning":{BeliefSparseSamplingClass:[6,0,0,"-"],BoundedRTDPClass:[6,0,0,"-"],MCTSClass:[6,0,0,"-"],PlannerClass:[6,0,0,"-"],ValueIterationClass:[6,0,0,"-"]},"simple_rl.planning.BeliefSparseSamplingClass":{BeliefSparseSampling:[6,1,1,""]},"simple_rl.planning.BeliefSparseSamplingClass.BeliefSparseSampling":{plan_from_state:[6,2,1,""],run:[6,2,1,""]},"simple_rl.planning.BoundedRTDPClass":{BoundedRTDP:[6,1,1,""]},"simple_rl.planning.BoundedRTDPClass.BoundedRTDP":{plan:[6,2,1,""],policy:[6,2,1,""],run_sample_trial:[6,2,1,""]},"simple_rl.planning.MCTSClass":{MCTS:[6,1,1,""]},"simple_rl.planning.MCTSClass.MCTS":{plan:[6,2,1,""],policy:[6,2,1,""]},"simple_rl.planning.PlannerClass":{Planner:[6,1,1,""]},"simple_rl.planning.ValueIterationClass":{ValueIteration:[6,1,1,""]},"simple_rl.planning.ValueIterationClass.ValueIteration":{get_gamma:[6,2,1,""],get_max_q_actions:[6,2,1,""],get_num_backups_in_recent_run:[6,2,1,""],get_num_states:[6,2,1,""],get_q_value:[6,2,1,""],get_states:[6,2,1,""],get_value:[6,2,1,""],plan:[6,2,1,""],policy:[6,2,1,""],print_value_func:[6,2,1,""],run_vi:[6,2,1,""]},"simple_rl.run_experiments":{choose_mdp:[5,4,1,""],evaluate_agent:[5,4,1,""],main:[5,4,1,""],parse_args:[5,4,1,""],play_markov_game:[5,4,1,""],reproduce_from_exp_file:[5,4,1,""],run_agents_lifelong:[5,4,1,""],run_agents_on_mdp:[5,4,1,""],run_single_agent_on_mdp:[5,4,1,""],run_single_belief_agent_on_pomdp:[5,4,1,""]},"simple_rl.utils":{additional_datastructures:[9,0,0,"-"],chart_utils:[9,0,0,"-"],make_mdp:[9,0,0,"-"]},"simple_rl.utils.additional_datastructures":{SimpleRLStack:[9,1,1,""]},"simple_rl.utils.additional_datastructures.SimpleRLStack":{is_empty:[9,2,1,""],peek:[9,2,1,""],pop:[9,2,1,""],push:[9,2,1,""],size:[9,2,1,""]},"simple_rl.utils.chart_utils":{average_data:[9,4,1,""],compute_conf_intervals:[9,4,1,""],compute_single_conf_interval:[9,4,1,""],drange:[9,4,1,""],load_data:[9,4,1,""],main:[9,4,1,""],make_plots:[9,4,1,""],parse_args:[9,4,1,""],plot:[9,4,1,""]},"simple_rl.utils.make_mdp":{make_markov_game:[9,4,1,""],make_mdp:[9,4,1,""],make_mdp_distr:[9,4,1,""]},simple_rl:{agents:[0,0,0,"-"],experiments:[2,0,0,"-"],mdp:[4,0,0,"-"],planning:[6,0,0,"-"],run_experiments:[5,0,0,"-"],tasks:[7,0,0,"-"],utils:[9,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","attribute","Python attribute"],"4":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:attribute","4":"py:function"},terms:{"27th":5,"abstract":[0,4,5,6],"class":[0,2,4,5,6,9],"default":0,"float":[0,4,5,6,9],"function":[0,4,6,9],"int":[0,4,5,6,9],"new":2,"return":[0,4,5,6,9],"true":[0,2,4,5,6,9],"while":6,For:[0,6,9],Near:6,Not:4,The:[0,6],Used:[2,4],_format_titl:9,_get_agent_color:9,_get_agent_nam:9,_is_disc_reward:9,_is_episod:9,_list:9,abel:[0,5,9],about:2,abov:9,access:6,accord:[0,5],across:9,act:0,action:[0,2,4,6],action_ab:5,activ:2,actor:0,add:[2,5],add_experi:2,additional_datastructur:1,advanc:0,after:5,agent:[1,2,3,4,5,9],agent_color:2,agent_l:5,agent_list:5,agent_nam:5,agentclass:[1,5],algorithm:[0,6,9],all:[2,4,5,9],alpha:0,ani:2,anneal:0,apach:5,approxim:[0,4],arg:[0,2,4,5,6,9],argument:9,arriv:0,associ:0,assum:[0,6],asteroid:5,august:5,author:[0,5,9],auto:[2,3],automat:5,averag:[0,9],average_data:9,avg:[0,4,5,9],axi:9,back:0,bandit:0,bar:5,base:[0,2,4,5,6,9],basic:[0,6,9],been:9,befor:5,belief_ag:5,belief_st:0,beliefag:0,beliefagentclass:1,beliefmdpclass:5,beliefsparsesampl:6,beliefsparsesamplingclass:[1,5],beliefst:0,beliefstateclass:5,beliefupdaterclass:5,bellman:0,best:0,beta:0,between:4,bool:[5,9],both:6,bound:[0,6],boundedrtdp:6,boundedrtdpclass:1,brafman:0,brown:[5,9],brtdp:6,brun:9,bss:6,bundl:2,call:[0,5],can:[2,6],carlo:6,cartpol:5,central:0,chain:5,chainmdpclass:5,chainstateclass:5,chang:9,chart:9,chart_util:[1,2,5],choose_mdp:5,classic:0,clear_old_result:[2,5],code:[5,9],codomain:0,color:9,com:[0,9],come:3,command:9,comput:[0,9],compute_conf_interv:9,compute_single_conf_interv:9,conf_interv:9,confid:[5,9],config:0,contact:5,contain:[0,2,4,6,9],content:[1,3],contextu:0,core:4,correspond:0,cost:9,count_r_per_n_timestep:2,creat:[5,9],csv:[5,9],cumul:[5,9],cumulative_plot:[2,5],cur_stat:6,current:[0,2,6],custom:9,dabel:[2,5,9],data:[2,4,5,9],datum:[2,9],david:[0,5,9],david_abel:5,ddyuudd:0,decim:9,decis:[0,4,6],defaultdict:6,delai:0,delayedqag:0,delayedqagentclass:1,delta:6,denot:0,depend:6,depth:0,determin:[5,9],dict:[0,2,4],differ:4,dimens:9,dir:[5,9],dir_for_plot:[2,5],directori:[2,9],discount:[5,9],displai:9,distribut:[4,5,9],doc:2,document:2,don:6,doubl:0,doubleqag:0,doubleqagentclass:1,drang:9,dure:0,dynam:6,each:[0,4,5,9],earlier:5,easi:2,edu:[5,9],either:0,element:9,end:5,end_of_episod:[0,2],end_of_inst:[2,4],env_nam:5,environ:5,episod:[2,5,9],epsilon1:0,epsilon:0,epsilon_greedy_q_polici:0,equat:0,equival:5,error:9,estim:6,evaluate_ag:5,even:6,everyth:9,execute_agent_act:4,exist:5,exp_funct:2,exp_info:2,exp_nam:5,exp_param_file_nam:[2,9],experi:[1,3,5,9],experiment_ag:9,experiment_dir:9,experiment_nam:9,experiment_name_prefix:[2,5],experimentclass:[1,5],experimentparamet:[2,5],experimentparametersclass:1,explor:0,explore_param:6,extra_dir:2,extract:5,facilit:4,fals:[0,2,4,5,6,9],featur:4,file:[2,5,9],fill:6,finish:5,fix:0,fixedpolicyag:[0,6],fixedpolicyagentclass:1,forthcom:8,fraction:6,free:0,friend:5,from:[0,2,4,5,6,9],full:[3,6],full_exp_file_nam:2,full_experi:2,functiona:4,game:5,gamma:[0,4,5,6,9],gen_model:6,gener:[2,3,5,6,9],get_act:4,get_action_distr:0,get_agent_avg_cumulative_rew:2,get_all_mdp:4,get_average_reward_func:4,get_avg_q_valu:0,get_curr_st:4,get_data:4,get_gamma:[4,6],get_horizon:4,get_init_st:4,get_max_q_act:[0,6],get_max_q_valu:0,get_mdp:4,get_nam:0,get_num_backups_in_recent_run:6,get_num_feat:4,get_num_known_sa:0,get_num_mdp:4,get_num_st:6,get_num_state_feat:4,get_paramet:[0,4],get_prob_of_mdp:4,get_q_valu:[0,6],get_reward_func:4,get_slip_prob:4,get_stat:6,get_transition_func:4,get_valu:[0,6],github:5,given:[0,2,5,6,9],gmail:0,grab:9,greedy_q_polici:0,grid:[5,9],grid_dim:9,grid_gam:9,grid_world:5,gridworldmpdclass:5,gridworldstateclass:5,guarante:[4,6],gym:5,gym_env_nam:5,has:[5,6],hasselt:0,have:[4,6,9],helper:9,highest:0,horizon:[0,4,5,6,9],http:9,ignor:5,implement:[0,6,9],index:[0,3],indic:[0,2,9],info:2,inform:[0,2],init:4,init_q:0,init_st:4,initi:0,instanc:[2,4,5,9],instruct:5,interact:[4,5],intern:0,interv:[5,9],is_empti:9,is_episod:2,is_known:0,is_lifelong:2,is_markov_gam:2,is_term:4,is_termin:4,iter:[4,6],its:0,itself:9,jinnai:0,just:9,kearn:6,kei:[0,4],label:9,langford:0,larg:6,last:5,learn:[0,2,3,5],level:0,librari:3,licens:5,lifelong:5,like:5,line:[2,6,9],linear:0,linearqagentclass:0,linucbagentclass:0,list:[0,4,5,6,9],littman:0,load:9,load_data:9,look:9,loop:5,lower:6,lower_values_init:6,main:[4,5,6,9],maintain:[5,6],make:[2,5,9],make_markov_gam:9,make_mdp:[1,5],make_mdp_distr:9,make_plot:[2,9],maker:0,manual:9,margin:9,marker:9,markov:[4,5,6],markov_gam:5,markov_game_class:9,markov_game_mdp:5,markovgamemdp:5,markovgamemdpclass:5,mass:0,matplotlib:9,matrix:9,max:[0,6],max_iter:6,max_reward:6,mcmahan:6,mct:6,mctsclass:[1,5],mdp:[0,1,2,3,5,6,9],mdp_class:9,mdp_distr:5,mdp_list:4,mdp_name:5,mdp_prob_dict:4,mdp_visual:1,mdpclass:[1,5],mdpdistribut:[4,5,9],mdpdistributionclass:1,method:[0,4],model:[0,6],modul:[1,3],monoton:6,mont:6,naiiv:6,name:[0,5,6,9],neural:0,new_gamma:4,new_polici:0,new_step_cost:4,next_stat:[0,2],non:5,none:[0,2,5,6,9],note:[0,4,9],num:[5,6],num_rollouts_per_step:6,num_times_to_writ:2,number:[5,6,9],object:[0,2,4,6,9],one:[5,9],onli:6,onto:2,open:[5,9],open_plot:[2,5,9],operand:9,opt:9,optim:6,option:9,order:9,other:6,otherwis:[5,9],over:[4,5],overflow:9,overrid:4,pac:0,packag:[1,3],page:3,pair:0,param:2,param_nam:[0,4],param_v:[0,4],paramet:[0,2,5,9],pars:9,parse_arg:[5,9],partial:6,path:[5,9],pdf:9,peek:9,per:[5,6],perform:[0,6],plan:[1,3,5],plan_from_st:6,planner:6,plannerclass:[1,5],play_markov_gam:5,plot:[2,5,9],plot_file_nam:9,plot_nam:9,point:9,pointer:[0,9],polici:[0,6],pomdp:5,pomdpclass:5,pop:9,prev_act:0,prev_stat:0,print:5,print_value_func:6,prior:0,prob_threshold:4,probabl:0,problem:6,process:[0,4,6],produc:6,program:6,purpos:2,push:9,put:9,pyplot:9,python:3,q_func:0,q_func_id:0,qlearningag:0,qlearningagentclass:[1,5],question:9,random:[0,9],randomag:0,randomagentclass:[1,5],randomli:0,rang:9,rasa:0,reach:5,real:6,recomput:4,record:[2,5],recurs:0,regular:9,reinforc:[0,3],rel:0,relev:[0,2,5,9],remov:[4,5],remove_mdp:4,replac:4,repres:4,reproduc:[2,5],reproduce_from_exp_fil:5,reproduce_i:5,requir:3,resample_at_termin:5,research:2,reset:[0,2,4],reset_at_termin:5,result:[2,5,9],results_dir:2,results_dir_nam:5,retriev:0,rew_step_count:5,reward:[0,2,4,5,9],reward_func:4,rmax:0,rmaxag:0,rmaxagentclass:[1,5],rollout_depth:6,rtdp:6,rtpdp:6,run:[5,6,9],run_agents_lifelong:5,run_agents_on_mdp:[2,5],run_experi:1,run_sample_tri:6,run_single_agent_on_mdp:5,run_single_belief_agent_on_pomdp:5,run_vi:6,s_a_threshold:0,sam:9,sampl:[4,5,6],sample_r:6,search:[3,6],see:[0,5],self:[0,2,4,6],send:5,set:[0,4,5],set_gamma:4,set_nam:0,set_polici:0,set_q_funct:0,set_slip_prob:4,set_step_cost:4,set_termin:4,set_vmax:0,share:4,should:[0,9],show:9,simple_rl:1,simplerlstack:9,singl:[5,9],size:9,skeleton:0,slip_prob:4,soft_max_polici:0,softmax:0,solver:6,soon:3,sourc:[0,2,4,5,6,9],space:6,spars:6,specif:5,specifi:0,sphinx:2,stack:9,stackoverflow:9,standard:[0,6],start:5,state:[0,2,4,5,6],state_ab:5,stateclass:[0,1,5],statu:5,step:[0,5,9],step_cost:[4,9],store:[2,5,9],str:[0,2,4,5,6,9],strehl:0,string:0,strong:6,structur:9,subclass:4,submodul:1,subpackag:1,summari:[0,2,4,5,6,9],system:0,tabula:0,take:0,taken:[5,6],task:[1,3,5],tau:6,taxi:5,temperatur:0,tennenholtz:0,term:9,termin:5,thi:[0,2,4,5,6],though:5,time:[0,5,6],time_taken:2,timestep:5,titl:9,tol:6,too:9,touch:6,track:5,track_disc_reward:[2,5,9],transit:6,transition_func:4,tree:6,trr:9,tupl:[4,5,6],tutori:3,two:9,txt:2,type:9,under:9,underli:6,uniform:0,unsupport:9,updat:[0,5],upper:[0,6],upper_values_init:6,use:[4,6,9],use_cost:9,user:[2,9],using:[0,2,9],usual:5,util:[1,3,5],val:[0,4],valu:[0,6],value_func:6,value_it:6,valueiter:6,valueiterationclass:[1,5],vector:9,verbos:[5,6],visit:6,vmax:0,when:5,where:[5,6,9],which:9,wiewiora:0,without:4,wlog:0,would:6,write:2,write_datum_to_fil:2,write_exp_info_to_fil:2,written:2,x_increment:9,x_max:9,x_min:9,you:6,yuu:0,zero:5},titles:["simple_rl.agents package","Auto Generated Documentation","simple_rl.experiments package","Welcome to simple_rl's (work in progress) documentation!","simple_rl.mdp package","simple_rl package","simple_rl.planning package","simple_rl.tasks package","Tutorial","simple_rl.utils package"],titleterms:{additional_datastructur:9,agent:0,agentclass:0,auto:1,beliefagentclass:0,beliefsparsesamplingclass:6,boundedrtdpclass:6,chart_util:9,content:[0,2,4,5,6,7,9],delayedqagentclass:0,document:[1,3],doubleqagentclass:0,experi:2,experimentclass:2,experimentparametersclass:2,fixedpolicyagentclass:0,gener:1,indic:3,make_mdp:9,mctsclass:6,mdp:4,mdp_visual:9,mdpclass:4,mdpdistributionclass:4,modul:[0,2,4,5,6,7,9],packag:[0,2,4,5,6,7,9],plan:6,plannerclass:6,progress:3,qlearningagentclass:0,randomagentclass:0,rmaxagentclass:0,run_experi:5,simple_rl:[0,2,3,4,5,6,7,9],stateclass:4,submodul:[0,2,4,5,6,9],subpackag:[0,4,5,7],tabl:3,task:7,tutori:8,util:9,valueiterationclass:6,welcom:3,work:3}})