#!/bin/bash

TOP_FIVE_RES_SUFFIX='top_five_results.txt'

cd LibMultiLabel

write_top_five_results(){
    model_name=$1
    seed=$2
    combined_res_file='combined_res.txt'
    seed_res_file="${model_name}_${seed}_${TOP_FIVE_RES_SUFFIX}"

    for json_result in `ls runs/*${model_name}*/logs.json`
    do
        jq -r --arg seed $seed '
                select(.config.seed|tostring==$seed)
                | "\(.config|."seed")\t\(.val|max_by(."P@5")|[."Macro*-F1", ."Micro-F1", ."P@5"]|@tsv)\t\(.test[0]|[."Macro*-F1", ."Micro-F1", ."P@5"]|@tsv)\t\(.config|."num_filter_per_size")\t\(.config.filter_sizes[0])\t\(.config|[."dropout", ."learning_rate"]|@tsv)"
                ' $json_result  >> $combined_res_file
    done
    sort -k4 -nr $combined_res_file | head -5 > $seed_res_file
    values=`awk -F '\t' '{
        if (NR == "3") {
            printf "%5d & %9.4f & %9.4f & %9.4f & %9.4f & %9.4f & %9.4f & %6d & %6d & %9.4f & %9.4f \\\\\\\\ \\n", $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11
        } else {
            printf "      & %9.4f & %9.4f & %9.4f & %9.4f & %9.4f & %9.4f & %6d & %6d & %9.4f & %9.4f \\\\\\\\ \\n", $2, $3, $4, $5, $6, $7, $8, $9, $10, $11
        }
    }
    END {
        printf "\\\\bottomrule"
    }' $seed_res_file`

    cat >> ../${model_name}_all.tex <<EOL
${values}
EOL
    rm $combined_res_file
}

write_scores() {
    model_name=$1
    seed=$2
    seed_res_file="${model_name}_${seed}_${TOP_FIVE_RES_SUFFIX}"

    values=`awk -F '\t' '{
        for(i=1;i<=NF;i++) {
            square_sums[i] += $i * $i
            sums[i] += $i
        }
    } END {
        for(i=5;i<=7;i++) {
            means[i] = sums[i]/NR
            std[i] = sqrt((square_sums[i] - sums[i]*sums[i]/NR)/NR)
        }
        printf "\\t%d\\t%.3f\\t%.3f\\t%.3f\\t%.3f\\t%.3f\\t%.3f\\n", $1, means[5], std[5], means[6], std[6], means[7], std[7]
    }' $seed_res_file`

    if [ $seed = 1331 ]; then
        model_name_uppercase=$(echo $model_name | tr a-z A-Z)
        values="${model_name_uppercase} ${values}"
    fi

    echo "$values" >> all_scores.txt
    rm $seed_res_file
}

write_avg_tables() {
    # write Table 4
    echo "Generating Table 4 to cnn_caml_avg.tex ..."
    values=`awk -F '\t' '{
        i++;
        if(i % 3 == 1)  printf "\\\\hline\\n"
        printf "%s & %d & %.3f \$\\\\pm\$ %.3f & %.3f \$\\\\pm\$ %.3f & %.3f \$\\\\pm\$ %.3f \\\\\\\\ \\n", $1, $2, $3, $4, $5, $6, $7, $8
    }' all_scores.txt`

    cat > ../cnn_caml_avg.tex <<EOL
\\begin{table*}[tb]
  \\centering
  \\caption{MIMIC-III-50 results after parameter selection. We
    consider three random seeds, where 1,337 was used in \\citet{JM18a}. Under each seed, we select the five models achieving the best validation precision@5, use them to predict the test set, and report mean/variance.}
  \\begin{tabular}{lr|rrrrrrr}
& Seed    & Macro-F1 & Micro-F1 & P@5 \\\\
${values}
\\end{tabular}
\\label{tab:newresults}
\\end{table*}
EOL

    # write average scores (Macro-F1, Micro-F1, and P@5) to the bottom of Table 2
    echo "Generating CAML/CNN avg scores in Table 2 to table_two_avg_scores.lex ..."
    avg_scores=`awk -F '\t' '{
        cnt++;
        for(i=1;i<=NF;i++)  sums[i] += $i
        if(cnt==2 || cnt==5){
            model_name=$1
        } else if(cnt==3){
            printf "%s & %.3f & %.3f & %.3f & Y &   ", model_name, sums[3]/3, sums[5]/3, sums[7]/3
            printf "\\\\multirow{2}{*}{parameter selection applied} \\\\\\\\ \\n"
            for(i=1;i<=NF;i++)  sums[i] = 0
        } else if(cnt==6){
            printf "%s & %.3f & %.3f & %.3f & Y & \\\\\\\\ \\n", model_name, sums[3]/3, sums[5]/3, sums[7]/3
        }
    }' all_scores.txt`
    cat > ../table_two_avg_scores.lex << EOL
${avg_scores}
EOL
    rm all_scores.txt
}

generate_supplement_table() {
    for model_name in 'cnn' 'caml'
    do
        echo "Generating the $model_name latex table to ${model_name}_all.tex ..."
        # table header
        model_name_uppercase=$(echo $model_name | tr a-z A-Z)
        cat > ../${model_name}_all.tex <<EOL
\\begin{table*}[ht]
\\centering
\\caption{${model_name_uppercase} parameter-selection results on MIMIC-III-50, ordered according to validation P@5}
    \\label{table:${model_name}_grid}

\\begin{tabular}{c|ccc|ccc|ccccc}
\\toprule
\\multicolumn{1}{c|}{ } &
\\multicolumn{3}{c|}{Validation performance} &
\\multicolumn{3}{c|}{Test performance} &
\\multicolumn{4}{c}{Parameters selected} \\\\
Seed & Macro-F1 &  Micro-F1 &  P@5 &  Macro-F1 &  Micro-F1 &    P@5 &  \$d_c\$ &  \$k\$ &    \$q\$ &  \$\eta\$ \\\\
\\midrule
EOL
        # table value
        for seed in 1337 1331 42
        do
            write_top_five_results $model_name $seed
        done
        # table footer
        cat >> ../${model_name}_all.tex <<EOL
\\end{tabular}
\\end{table*}
EOL
    done
}

generate_avg_table() {
    # calculate table values (mean & std)
    for model_name in 'cnn' 'caml'
    do
        for seed in 1337 1331 42
        do
            write_scores $model_name $seed
        done
    done
    write_avg_tables
}

generate_supplement_table
generate_avg_table
