{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"id":"qo5hZ0IbxVAn"},"outputs":[],"source":["\n","import time\n","import numpy as np\n","import csv\n","import pandas as pd\n","from google.colab import files\n","np.random.seed(0)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"BJC-tQ3rBXV4"},"outputs":[],"source":["d = 730\n","n = 1068\n","A_train = pd.read_csv('A1.csv', header=None).to_numpy()\n","A_val = pd.read_csv('A2.csv', header=None).to_numpy()\n","\n","b_train = pd.read_csv('b1.csv', header=None).to_numpy().reshape(-1)\n","b_val = pd.read_csv('b2.csv', header=None).to_numpy().reshape(-1)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"HqFcrdFc5r66"},"outputs":[],"source":["import cvxpy as cp\n","lbd = 10\n","def gradg(x,S):\n","  S = list(S)\n","  AS = A_train[S,:]\n","  bS = b_train[S]\n","  return 2*AS.T@(AS@x-bS)/len(S)\n","\n","def gradf(x,S):\n","  S = list(S)\n","  AS = A_val[S,:]\n","  bS = b_val[S]\n","  return 2*AS.T@(AS@x-bS)/len(S)\n","\n","def g(x,S):\n","  S = list(S)\n","  AS = A_train[S,:]\n","  bS = b_train[S]\n","  return np.linalg.norm(AS@x-bS)**2/len(S)\n","\n","def f(x,S):\n","  S = list(S)\n","  AS = A_val[S,:]\n","  bS = b_val[S]\n","  return np.linalg.norm(AS@x-bS)**2/len(S)\n","\n","def lmo(c):\n","  i = np.argmax(abs(c))\n","  v = np.zeros(d)\n","  v[i] = -np.sign(c[i])*lbd\n","  return v\n","\n","def clmo(c,g,h):\n","  v = cp.Variable(d)\n","  prob = cp.Problem(cp.Minimize(c.T@v),[cp.norm(v,1) <= lbd, g.T@v <= h])\n","  prob.solve()\n","  return v.value"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"cbZss0HO-vEh"},"outputs":[],"source":["def simplex_projection(s):\n","  \"\"\"Projection onto the unit simplex.\"\"\"\n","  if np.sum(s) <=1 and np.all(s >= 0):\n","      return s\n","\n","  u = np.sort(s)[::-1]\n","  cssv = np.cumsum(u)\n","  rho = np.nonzero(u * np.arange(1, len(u)+1) > (cssv - 1))[0][-1]\n","  theta = (cssv[rho] - 1) / (rho + 1.0)\n","\n","  return np.maximum(s-theta, 0)\n","\n","def proj(c):\n","  sol = simplex_projection(np.abs(c)/lbd)\n","  sol = lbd*np.sign(c)*sol\n","  return sol"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"LlPh88u-JBtg"},"outputs":[],"source":["def spiderfw(init, budget):\n","  x = init\n","  t = 0\n","  q = np.ceil(np.sqrt(n/3))\n","  S = q\n","  oracle = 0\n","  while t <= budget:\n","\n","   eta = 0.1/(t+2)\n","   if t%q == 0:\n","      ind = range(int(n/3))\n","      hat_gradg = gradg(x, ind)\n","      oracle += n\n","   else:\n","      ind = np.random.randint(0, n/3-1, size=int(S))\n","      hat_gradg = hat_gradg + gradg(x,ind) - gradg(x_prev,ind)\n","      oracle += S\n","\n","   v = lmo(hat_gradg)\n","   x_prev = x\n","   x = x+eta*(v-x)\n","   #print('iteration: {}, inner-level: {}'.format(t, g(x,range(int(n/3)))))\n","   t += 1\n","  return x, oracle\n","\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"g3g1WGwWoFx3"},"outputs":[],"source":["\n","x = cp.Variable(d)\n","inner = cp.Problem(cp.Minimize(cp.sum_squares(A_train@x-b_train)),[cp.norm(x,1) <= lbd])\n","inner.solve()\n","g_opt = inner.value/(n/3)\n","print('Inner optimal value: {}'.format(g_opt))\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"WDc3tGlR5_fX"},"outputs":[],"source":["x = cp.Variable(d)\n","outer = cp.Problem(cp.Minimize(cp.sum_squares(A_val@x-b_val)),[cp.norm(x,1) <= lbd, cp.sum_squares(A_train@x-b_train)/(n/3) <=g_opt])\n","outer.solve()\n","f_opt = outer.value/(n/3)\n","print('Outer optimal value: {}'.format(f_opt))\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"X3VNVdJR52Dp"},"outputs":[],"source":["def irscg(init, budget,i):\n","  filename = 'irscg1_{}.csv'.format(i)\n","  # with open(filename, 'w', newline='') as file:\n","  #   writer = csv.writer(file)\n","  #   writer.writerow([\"t\",\"g_x\",\"f_x\",\"g_z\",\"f_z\",\"time\",\"time_elapsed\"])\n","  iters = []\n","  inner = []\n","  outer = []\n","  time_elapsed = []\n","  oracles = []\n","  oracle = 0\n","  count = 1\n","\n","  t = 0\n","\n","  x = init\n","  varsigma = 10\n","  S = 1\n","  z = 0\n","  s = 0\n","  elapsed_time = 0\n","  while elapsed_time <= budget:\n","\n","    start = time.time()\n","\n","    alpha = 2/(t+2)\n","    sigma = varsigma*(t+1)**(-0.25)\n","\n","    xi    = np.random.randint(0, n/3-1, size=int(S))\n","    theta = np.random.randint(0, n/3-1, size=int(S))\n","\n","\n","    if t == 0:\n","\n","      hat_gradf = gradf(x, theta)\n","      hat_gradg = gradg(x, xi)\n","\n","    else:\n","\n","      hat_gradf = (1-alpha)*hat_gradf + gradf(x, theta) - (1-alpha)*gradf(x_prev, theta)\n","      hat_gradg = (1-alpha)*hat_gradg +  gradg(x, xi)   - (1-alpha)* gradg(x_prev, xi)\n","\n","    v = lmo(sigma*hat_gradf +hat_gradg)\n","    x_prev = x\n","    x = x+alpha*(v-x)\n","\n","    s_prev = s\n","    s = s+(t+1)*sigma\n","    z = (s_prev*z-(t+1)*t*sigma*x_prev+(t+2)*(t+1)*sigma*x)/s\n","    t += 1\n","\n","    end = time.time()\n","\n","    elapsed_time += end-start\n","    oracle += 2\n","    if elapsed_time >= count:\n","      iters.append(t)\n","      inner.append(g(x,range(int(n/3)))-g_opt)\n","      outer.append(f(x,range(int(n/3)))-f_opt)\n","      time_elapsed.append(elapsed_time)\n","      oracles.append(oracle)\n","      count += 1\n","  print(t)\n","  df = pd.DataFrame({\n","    't': iters,\n","    'inner': inner,\n","    'outer': outer,\n","    'oracles': oracles,\n","    'time_elapsed': time_elapsed})\n","  df.to_csv(filename, index=False)\n","  files.download(filename)\n","\n","\n","\n","\n","def irfscg(init, budget,i):\n","  filename = 'irfscg1_{}.csv'.format(i)\n","\n","  iters = []\n","  inner = []\n","  outer = []\n","  time_elapsed = []\n","  oracles = []\n","  oracle = 0\n","  count = 1\n","\n","  t = 0\n","  q = np.floor((n/3)**0.5)\n","  S = q\n","  x = init\n","  z = 0\n","  s = 0\n","  varsigma = 10\n","\n","  elapsed_time = 0\n","  while elapsed_time <= budget:\n","\n","    start = time.time()\n","    if t < q:\n","      alpha = np.log(q)/q\n","    else:\n","      alpha = 2/(t+2)\n","\n","    sigma = varsigma*(max(t,q)+1)**(-0.5)\n","\n","    if t%q == 0:\n","      ind = range(int(n/3))\n","      hat_gradf = gradf(x, ind)\n","      hat_gradg = gradg(x, ind)\n","\n","    else:\n","      ind1 = np.random.randint(0, n/3-1, size=int(S))\n","      ind2 = np.random.randint(0, n/3-1, size=int(S))\n","      hat_gradf = hat_gradf + gradf(x,ind1) - gradf(x_prev,ind1)\n","      hat_gradg = hat_gradg + gradg(x,ind2) - gradg(x_prev,ind2)\n","\n","    v = lmo(sigma*hat_gradf +hat_gradg)\n","    x_prev = x\n","    x = x+alpha*(v-x)\n","\n","    s_prev = s\n","    s = s+(t+1)*sigma\n","    z = (s_prev*z-(t+1)*t*sigma*x_prev+(t+2)*(t+1)*sigma*x)/s\n","    t += 1\n","\n","    end = time.time()\n","\n","    elapsed_time += end-start\n","    if (t-1)%q == 0:\n","      oracle += 2*n\n","    else:\n","      oracle += 2*S\n","\n","    if elapsed_time >= count:\n","      iters.append(t)\n","      inner.append(g(x,range(int(n/3)))-g_opt)\n","      outer.append(f(x,range(int(n/3)))-f_opt)\n","      time_elapsed.append(elapsed_time)\n","      oracles.append(oracle)\n","      count += 1\n","  print(t)\n","  df = pd.DataFrame({\n","    't': iters,\n","    'inner': inner,\n","    'outer': outer,\n","    'oracles': oracles,\n","    'time_elapsed': time_elapsed})\n","  df.to_csv(filename, index=False)\n","  files.download(filename)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"NTlXkhu8Ln_e"},"outputs":[],"source":["def sbcgi(init,budget,i):\n","  filename = 'sbcgi1_{}.csv'.format(i)\n","\n","  iters = []\n","  inner = []\n","  outer = []\n","  time_elapsed = []\n","  oracles = []\n","  oracle = 0\n","  count = 1\n","\n","  start = time.time()\n","  x, oracle = spiderfw(init, 10**5)\n","  g0 = g(x,range(int(n/3)))\n","  end = time.time()\n","  elapsed_time = end-start\n","  t = 0\n","  while elapsed_time <= budget:\n","\n","    start = time.time()\n","\n","\n","    alpha = 0.01/(t+1)\n","\n","    xi    = np.random.randint(0, n/3-1, size=1)\n","    theta = np.random.randint(0, n/3-1, size=1)\n","\n","\n","    if t == 0:\n","\n","      hat_gradf = gradf(x, theta)\n","      hat_gradg = gradg(x, xi)\n","      hat_g     = g(x,xi)\n","\n","    else:\n","\n","      hat_gradf = (1-alpha)*hat_gradf + gradf(x, theta) - (1-alpha)*gradf(x_prev, theta)\n","      hat_gradg = (1-alpha)*hat_gradg +  gradg(x, xi)   - (1-alpha)* gradg(x_prev, xi)\n","      hat_g     = (1-alpha)* hat_g    +    g(x, xi)     - (1-alpha)*   g(x_prev, xi)\n","\n","    v = clmo(hat_gradf, hat_gradg, hat_gradg.T@x+g0-hat_g+10**(-4)/np.sqrt(t+1))\n","    x_prev = x\n","    x = x+alpha*(v-x)\n","    t += 1\n","\n","    end = time.time()\n","\n","    elapsed_time += end-start\n","    oracle += 2\n","    if elapsed_time >= count:\n","      iters.append(t)\n","      inner.append(g(x,range(int(n/3)))-g_opt)\n","      outer.append(f(x,range(int(n/3)))-f_opt)\n","      time_elapsed.append(elapsed_time)\n","      oracles.append(oracle)\n","      count += 1\n","\n","  df = pd.DataFrame({\n","    't': iters,\n","    'inner': inner,\n","    'outer': outer,\n","    'oracles': oracles,\n","    'time_elapsed': time_elapsed})\n","  df.to_csv(filename, index=False)\n","  files.download(filename)\n","\n","def sbcgf(init, budget, i):\n","  filename = 'sbcgf1_{}.csv'.format(i)\n","\n","  iters = []\n","  inner = []\n","  outer = []\n","  time_elapsed = []\n","  oracles = []\n","  oracle = 0\n","  count = 1\n","\n","  S = np.floor((n/3)**0.5)\n","  q = np.floor((n/3)**0.5)\n","\n","  start = time.time()\n","  x, oracle = spiderfw(init, 10**5)\n","  g0 = g(x,range(int(n/3)))\n","  end = time.time()\n","  elapsed_time = end-start\n","  t = 0\n","  while elapsed_time <= budget:\n","\n","    start = time.time()\n","\n","    alpha = 10**(-5)\n","\n","    if t%q == 0:\n","      ind = range(int(n/3))\n","      hat_gradf = gradf(x, ind)\n","      hat_gradg = gradg(x, ind)\n","      hat_g     =   g(x, ind)\n","\n","    else:\n","      ind1 = np.random.randint(0, n/3-1, size=int(S))\n","      ind2 = np.random.randint(0, n/3-1, size=int(S))\n","      hat_gradf = hat_gradf + gradf(x,ind1) - gradf(x_prev,ind1)\n","      hat_gradg = hat_gradg + gradg(x,ind2) - gradg(x_prev,ind2)\n","      hat_g     =  hat_g    +   g(x, ind2)   -   g(x_prev, ind2)\n","\n","    v = clmo(hat_gradf, hat_gradg, hat_gradg.T@x+g0-hat_g+10**(-4)/np.sqrt(t+1))\n","    x_prev = x\n","    x = x+alpha*(v-x)\n","    t += 1\n","\n","    end = time.time()\n","\n","    elapsed_time += end-start\n","    if (t-1)%q == 0:\n","      oracle += 2*n\n","    else:\n","      oracle += 2*S\n","\n","    if elapsed_time >= count:\n","      iters.append(t)\n","      inner.append(g(x,range(int(n/3)))-g_opt)\n","      outer.append(f(x,range(int(n/3)))-f_opt)\n","      time_elapsed.append(elapsed_time)\n","      oracles.append(oracle)\n","      count += 1\n","\n","  df = pd.DataFrame({\n","    't': iters,\n","    'inner': inner,\n","    'outer': outer,\n","    'oracles': oracles,\n","    'time_elapsed': time_elapsed})\n","  df.to_csv(filename, index=False)\n","  files.download(filename)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Oci7cYwl5CkL"},"outputs":[],"source":["def aripseg(init, budget, short,i):\n","  if short == True:\n","    gamma0 = 10**(-7)\n","    filename = 'aripseg_short1_{}.csv'.format(i)\n","  else:\n","    gamma0 = 10**(-2)\n","    filename = 'aripseg_long1_{}.csv'.format(i)\n","\n","\n","  iters = []\n","  inner = []\n","  outer = []\n","  time_elapsed = []\n","  oracles = []\n","  oracle = 0\n","  count = 1\n","\n","  t = 0\n","  x = init\n","\n","  rho0 = 10**3\n","  Gamma = 0\n","  r = 0.5\n","  elapsed_time = 0\n","  while elapsed_time <= budget:\n","\n","    start = time.time()\n","\n","    gamma = gamma0/(t+1)**0.75\n","    rho = rho0*(t+1)**0.25\n","\n","    xi    = np.random.randint(0, n/3-1, size=1)\n","    theta = np.random.randint(0, n/3-1, size=1)\n","\n","    hat_gradf = gradf(x,xi)\n","    hat_gradg = gradg(x,theta)\n","\n","    y = proj(x-gamma*(hat_gradf+rho*hat_gradg))\n","\n","    xi    = np.random.randint(0, n/3-1, size=1)\n","    theta = np.random.randint(0, n/3-1, size=1)\n","\n","    hat_gradf = gradf(y,xi)\n","    hat_gradg = gradg(y,theta)\n","\n","    x = proj(x-gamma*(hat_gradf+rho*hat_gradg))\n","\n","    Gamma_prev = Gamma\n","    Gamma = Gamma+(gamma*rho)**r\n","\n","    if t == 0:\n","      bar_y = y\n","    else:\n","      bar_y = (Gamma_prev*bar_y+((gamma*rho)**r)*y)/Gamma\n","    t += 1\n","    end = time.time()\n","\n","    elapsed_time += end-start\n","    oracle += 4\n","    if elapsed_time >= count:\n","      iters.append(t)\n","      inner.append(g(bar_y,range(int(n/3)))-g_opt)\n","      outer.append(f(bar_y,range(int(n/3)))-f_opt)\n","      time_elapsed.append(elapsed_time)\n","      oracles.append(oracle)\n","      count += 1\n","\n","  df = pd.DataFrame({\n","    't': iters,\n","    'inner': inner,\n","    'outer': outer,\n","    'oracles': oracles,\n","    'time_elapsed': time_elapsed})\n","  df.to_csv(filename, index=False)\n","  files.download(filename)\n","\n","def dbgdsto(init, budget,short ,i):\n","\n","  if short == True:\n","    gamma0 = 10**(-6)\n","    filename = 'dbgdsto_short1_{}.csv'.format(i)\n","  else:\n","    gamma0 = 10**(-2)\n","    filename = 'dbgdsto_long1_{}.csv'.format(i)\n","\n","\n","  iters = []\n","  inner = []\n","  outer = []\n","  time_elapsed = []\n","  oracles = []\n","  oracle = 0\n","  count = 1\n","\n","  t = 0\n","  x = init\n","  alpha = 1\n","  beta = 1\n","  gamma = 10**(-6)\n","  elapsed_time = 0\n","  while elapsed_time <= budget:\n","\n","    start = time.time()\n","\n","    xi    = np.random.randint(0, n/3-1, size=1)\n","    theta = np.random.randint(0, n/3-1, size=1)\n","\n","    hat_gradf = gradf(x,xi)\n","    hat_gradg = gradg(x,theta)\n","    hat_g = g(x,theta)\n","\n","    norm_sq = np.linalg.norm(hat_gradg)**2\n","    hat_phi = min(alpha*(hat_g), beta*np.linalg.norm(norm_sq)**2)\n","    nu = max((hat_phi-hat_gradf.T@hat_gradg)/norm_sq, 0)\n","    x = proj(x -gamma*(hat_gradf+nu*hat_gradg))\n","\n","    t += 1\n","    end = time.time()\n","\n","    elapsed_time += end-start\n","    oracle += 2\n","    if elapsed_time >= count:\n","      iters.append(t)\n","      inner.append(g(x,range(int(n/3)))-g_opt)\n","      outer.append(f(x,range(int(n/3)))-f_opt)\n","      time_elapsed.append(elapsed_time)\n","      oracles.append(oracle)\n","      count += 1\n","\n","  df = pd.DataFrame({\n","    't': iters,\n","    'inner': inner,\n","    'outer': outer,\n","    'oracles': oracles,\n","    'time_elapsed': time_elapsed})\n","  df.to_csv(filename, index=False)\n","  files.download(filename)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"WKDxYx0wTgqg"},"outputs":[],"source":["budget = 240\n","for i in range(10):\n","  init = np.random.rand(d)\n","  if np.linalg.norm(init,1) > lbd:\n","    init = init/np.linalg.norm(init,1)\n","\n","  irscg(init, budget, i)\n","  irfscg(init, budget, i)\n","  sbcgi(init, budget, i)\n","  sbcgf(init, budget, i)\n","  aripseg(init, budget, True, i)\n","  aripseg(init, budget, False, i)\n","  dbgdsto(init, budget, True, i)\n","  dbgdsto(init, budget, False, i)"]}],"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyNORxN8LPjlY+GLq1T2s4rp"},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0}