function [M_arr,Q_arr] = gd(opts)

if ~isfield(opts, 'S') error('Missing S!'); end
if ~isfield(opts, 'A') error('Missing A!'); end
if ~isfield(opts, 'gamma') error('Missing gamma!'); end
S = opts.S; A = opts.A; gamma = opts.gamma;
if ~isfield(opts, 'del') opts.del = 1/S; end
% if ~isfield(opts, 'Q0') opts.Q0 = zeros(S,1); end
if ~isfield(opts, 's0') opts.s0 = 1; end
if ~isfield(opts, 'm_opt') error('Missing m_opt!'); end
if ~isfield(opts, 'r') error('Missing r!'); end
if ~isfield(opts, 'softmax') error('Missing softmax!'); end
if ~isfield(opts, 'get_softmax') error('Missing get_softmax!'); end
if ~isfield(opts, 'filter') error('Missing filter!'); end
if ~isfield(opts, 'epochs') opts.epochs = 10; end
if ~isfield(opts, 'T') opts.T = 1000; end
if ~isfield(opts, 'K') error('Missing K!'); end
if ~isfield(opts, 'M0') opts.M0 = ones(opts.S,1) / opts.S; end
if ~isfield(opts, 'alpha0') opts.alpha0 = 1e-2; end
if ~isfield(opts, 'beta0') opts.beta0 = 1e-2; end
if ~isfield(opts, 'temp') opts.temp = 1e-3; end
if ~isfield(opts, 'GLIE') opts.GLIE = false; end

del = opts.del;
epochs = opts.epochs;
m_opt = opts.m_opt; r = opts.r;
softmax = opts.softmax; get_softmax = opts.get_softmax;
filter = opts.filter;

T = opts.T; K = opts.K;
skip = T/K;
alpha0 = opts.alpha0; beta0 = opts.beta0;
temp = opts.temp;
GLIE = opts.GLIE;
% GLIE = opts.GLIE;

M_arr = zeros(S,K+1,epochs);
Q_arr = -inf(S,A,K+1,epochs);

for e = 1:epochs
    fprintf('epoch: %d\n', e)
		if isfield(opts, 'Q0') 
			Q0 = opts.Q0;
		else
			Q0 = -inf(S,A);
			Q0(filter == 1) = rand(size(Q0(filter == 1))) * 1e-2;
		end
    Q = Q0;
    M0 = normpdf(linspace(-1,1,S),0.5,1e-1)' + normpdf(linspace(-1,1,S),-0.5,1e-1)';
    M0 = circshift(M0, randi(S));
    M0 = M0 ./ sum(M0); % initial M
    M = M0;
    s = randi(S);              % fixed initial state
		M_arr(:,1,e) = M;
		Q_arr(:,:,1,e) = Q;
    
	for t = 1:T
		% Sample
		filtered_Q = Q(s, :); % Q as behavior policy
		filtered_Q(filter(s,:) ~= 1) = -inf;
		if GLIE; temp_mult = t; else temp_mult = 1; end
		a = get_softmax(filtered_Q, temp * temp_mult);
		% for probability 'soft', states are equally likely to be selected
		if rand < opts.soft
			s1 = find(filter(s,:) == 1);
			s1 = s1(randi(length(s1)));
		else
			s1 = a;
		end
		filtered_Q1 = Q(s1, :);
		filtered_Q1(filter(s1,:) ~= 1) = -inf;
		a1 = get_softmax(filtered_Q1, temp * temp_mult);
		
		% Update Q
		alpha = alpha0;
		% Q(s,a) = (1-alpha) * Q(s,a) + alpha * (r(s,a,M) + gamma * max(Q(s1,filter(s1,:) == 1)));
		Q(s,a) = (1-alpha) * Q(s,a) + alpha * (r(s,a,M) + gamma * Q(s1,a1));
		% Update M
		beta = beta0;
		M = (1-beta) * M; M(s1) = M(s1) + beta * 1;

		s = s1;
		% Log
		if mod(t,skip) == 0
			M_arr(:,t/skip+1,e) = M;
			Q_arr(:,:,t/skip+1,e) = Q;
		end
	end
end
end
