clc; clear all; 
close all;

%% Initializing variables: d = dimension of context
rng(120,"twister")
%ratings =  movie ratings from MovieLens 1M Dataset
%ratings_r = recovered full movie ratings of with matrix completition
load('movielens1M.mat');
ratings_r = single(ratings_r);

ratings_r = ratings_r/25 - 1;

a = single(ratings_r);

num_movies = size(ratings_r,1);
d = 10; % or 16

%% Feature extraction with PCA
[coeff,~,latent] = pca(ratings_r);
movie_features = ratings_r * coeff(:,1:d);

%% Fitting for ground truth model: Y = mean rating
Y = mean(ratings_r,2);
mdf = fitlm(movie_features,Y,'linear');
theta_star = table2array(mdf.Coefficients(2:end,1));
w_movie = movie_features*theta_star;

%% %% Computing maximum mean reward per time step with Robbins–Monro algorithm
n_steps = 50000;
step_coef = 1.0;
step_power= 5.0;
%Gamma_tr: maximum mean reward per time step
Gamma_tr = robbins_monro(n_steps, step_coef, step_power,theta_star,movie_features);



%% Online movie recommendation experiment experiment
itr = 2000;
Gamma_table = zeros(itr,1);
T=10000;
Rewchart = zeros(itr,T);

for i=1:itr
    t = 0;
    %Gamma = -1.0 + 2.0 * rand();
    Gamma = 0; %fixed Gamma initialization
    step_value = 0; %step size parameter initialization
    Rewtable = zeros(T+20,1);
    n = 0;
    while t <= T
        [Rwd, Na, Sa, Gamma_new, step_value_new] = oaf(Gamma, step_value, movie_features, theta_star);        Gamma = Gamma_new;
        Gamma = Gamma_new;
        step_value = step_value_new;
        t = t + Sa;
        n = n + Na;
        if Na >=1
            Rewtable(t + 1 : t + Na)=Rwd;
        end
        t = t + Na;
    end
    Gamma_table(i)= Gamma;
    Rewchart(i,:) = Rewtable(1:T);
end

Regchart = cumsum(Gamma_tr- Rewchart,2);

%% plot regret
time = round(linspace(1,T,200));
regret = Regchart(:,time);

risk1 = 5; % Plot quartiles (dark grey)
risk2 = 5;  % and upper 5 percents quantile (light grey)

OAF_mov_fig = figure('position', [10 10 320 280]);
h = area(time, [prctile(regret, risk1); prctile(regret, 100-risk2)- ...
                 prctile(regret, risk1); prctile(regret, 100)- ...
                 prctile(regret, 100-risk2)]');
set(h(1),'FaceColor', 0.55*[1 1 1]);
set(h(2),'FaceColor', 0.78*[1 1 1]);
set(h(3),'FaceColor', 0.96*[1 1 1]);
hold on;  
h = plot(time, mean(regret), 'k','LineWidth',2);
% set(h, 'LineWidth', 1.5);
xlabel('time', 'FontSize', 10);
ylabel('regret', 'FontSize', 10);
% legend({'lower 5%','middle 90%','upper 5%','mean'},'FontSize',10);
% legend('Location','northwest')
ylim([0 220])
saveas(OAF_mov_fig,'COAForc_mov.png')


%% Contextual bandit with MovieLens 1M dataset
function [Xa, Sa, La, Ida] = cbandit(movie_features)
    % Generate random decision set size between 6 and 20
    La = randi([6, 20]);
       
    % Generate random delay between 5 and 10
    Sa = randi([5, 10]);
    
    % Randomly select feature vector from movielens dataset  
    Ida = randi([1, length(movie_features)],La,1);
    Xa = movie_features(Ida,:);    
end

%% OAF
function [Rwd, Na, Sa, Gamma_new, step_value_new] = oaf(Gamma, step_value, movie_features, theta_star)
  
    % Request decision set Xa, observe delay Sa and cost Ca
    [Xa, Sa, ~, ~] = cbandit(movie_features);
    % With theta_star given, observe true expected reward
    Ya = Xa*theta_star;
    % Update parameter gamma_j in OAF
    step_value_new = step_value + Sa;
     
    % Find elements in Ya that are greater than or equal to threshold Gamma
    s_id = find(Ya >= Gamma);
    Rwd = Ya(s_id);
        
    
    % Get the number of elements in Rew
    Na = numel(s_id);
    
    y = (Sa + Na) * Gamma - sum(Rwd);    
    Gamma_new= Gamma - y/(step_value_new);
    Gamma_new = max(-1.0, min(1.0, Gamma_new));
    
end

%% robbins_monro algorithm
function Gamma_tr = robbins_monro(n_steps, step_coef, step_power,theta_star,movie_features)
   % Compute a sample path until `n_steps` steps by the Robbins-Monro algorithm.
    
    % Initialize x with a uniform random number between -1 and 1
    x = -1.0 + 2.0 * rand();

    for i = 1:n_steps
        a = step_coef / (i*step_power);
        [Xa, Sa, ~, ~] = cbandit(movie_features);
        Ya = Xa*theta_star;
        s_id = find(Ya >= x);
        Rwd = Ya(s_id);
        Na = numel(s_id);
        y = (Sa + Na) * x - sum(Rwd);    
        x = x - a * y;
        x = max(-1.0, min(1.0, x)); % Equivalent to np.clip(x, -1.0, 1.0)
    end
    Gamma_tr = x;
end