#!/bin/bash
mapper=`printenv mapred_task_id | cut -d "_" -f 5`
rm -f temp.cache
date +"%F %T Start training mapper=$mapper" > /dev/stderr
vwcmd="./vw -b 24 --total $mapred_map_tasks --node $mapper --cache_file temp.cache --span_server $mapreduce_job_submithost --loss_function=logistic"
mapred_job_id=`echo $mapred_job_id | tr -d 'job_'`
gdcmd="$vwcmd --unique_id $mapred_job_id --passes 1 --adaptive --exact_adaptive_norm -d /dev/stdin -f tempmodel"
mapred_job_id=`expr $mapred_job_id \* 2` #create new nonce
bfgscmd="$vwcmd --unique_id $mapred_job_id --bfgs --mem 5 --passes 20 -f model -i tempmodel"
if [ "$mapper" == '000000' ]; then
    $gdcmd > mapperout 2>&1
    if [ $? -ne 0 ]; then
      date +"%F %T Failed mapper=$mapper cmd=$gdcmd" > /dev/stderr
      exit 1
    fi
    $bfgscmd >> mapperout 2>&1
    outfile=$mapred_output_dir/model
    mapperfile=$mapred_output_dir/mapperout
    found=`hadoop fs -lsr | grep $mapred_output_dir | grep mapperout`
    if [ "$found" != "" ]; then
      hadoop fs -rm -r $mapperfile
    fi
    found=`hadoop fs -lsr | grep $mapred_output_dir | grep model`
    if [ "$found" != "" ]; then
      hadoop fs -rm -r $outfile
    fi
    date +"%F %T outfile=$outfile" > /dev/stderr
    hadoop fs -put model $outfile
    hadoop fs -put mapperout $mapperfile
else
    $gdcmd
    if [ $? -ne 0 ]; then
      date +"%F %T Failed mapper=$mapper cmd=$gdcmd" > /dev/stderr
      exit 1
    fi
    $bfgscmd
fi
date +"%F %T Done mapper=$mapper" > /dev/stderr
