#include <stdlib.h>
#include<stdio.h>
#include<string.h>
#include<ctype.h>
#include<math.h>
#define fileName 100
#define bufLen 80920

typedef struct node_t{
	short index;
	float weight;
}node;

int read_problem(FILE *fp, double **prob){
	int index, i = 0;
	double t;
	
	while(1){
begin:
		if (fscanf(fp,"%lf",&prob[i][0]) == EOF)
			break;
		
		while(1){
			int c;
			do{
				c = fgetc(fp);
				if (c == '\n'){
					++i;
					goto begin;
				}
			}while(isspace(c));
			ungetc(c,fp);
			fscanf(fp,"%d:%lf",&index,&t);
			prob[i][index] = t;
		}
	}

	return i;
}

void prompt(){
    printf("usage: linear-feasel -f #features -ft #features_you_want model_file data_file logfile_name\n");
    printf("-f #features: number of features(dimensions) in the raw data\n");
    printf("-ft #features_you_want: number of features(dimensions) you want\n");
    printf("model_file: the *.model file generated by svm-train\n");
    printf("data_file: the raw data file from which features are to be selected\n");
    printf("logfile_name: a file to which the indices of the selected features will be output\n");
}
int main(int argc, char **argv){
	int l, dim, datanum,odim;
        FILE *fp, *fp_model, *fp_tgt, *fp1;
	char model[fileName], tgt[fileName], buf[bufLen];
	
	if(argc != 8){
                prompt(); 
		return 0;
	}
		
	dim = atoi(argv[2]) + 1;
	odim = atoi(argv[4]);
	strcpy(model, argv[5]);
	strcpy(tgt, argv[6]);
	
	fp_model = fopen(model, "r+");
        
	while(fgets(buf, bufLen, fp_model) != NULL){
		if(strncmp("total_sv", buf, 8) == 0)
			break;
	}
        char *s = strchr(buf, ' '); 
	datanum = atoi(s);
        double **prob = (double **)calloc(datanum, sizeof(double *));
	for(int i = 0; i < datanum; ++i)
		prob[i] = (double *)calloc(dim, sizeof(double));
	node *list = (node *)calloc(dim, sizeof(node));
	
	while(fgets(buf, bufLen, fp_model) != NULL){
		if(strncmp("SV", buf, 2) == 0)
			break;
	}
        
	
	l = read_problem(fp_model,prob);	
	
	for(int i = 1; i < dim; ++i){
		float acc = 0.0;
		for(int j = 0; j < l; ++j){
			acc += prob[j][0] * prob[j][i];
		}
		list[i].weight = fabs(acc);
		list[i].index = i;
	}

	for(int i = 0; i < dim; ++i){
		node tmp;
		for(int j = i + 1; j < dim; ++j){
			if(list[i].weight < list[j].weight){
				tmp = list[i];
				list[i] = list[j];
				list[j] = tmp;
			}
		}
	}
	
	fp = fopen(argv[7], "w+");
	for(int i = 0; i < dim; ++i){
		fprintf(fp, "%d\n", list[i].index);
	}
	fclose(fp_model);
	fclose(fp);
	
	fp1 = fopen("linear.log", "w+");
	for(int i = 0; i < dim; ++i){
		fprintf(fp, "%d %f\n", list[i].index, list[i].weight);
	}
	fclose(fp1);
	
	for(int i = 0; i < datanum; ++i){
		free(prob[i]); 
	}
	
	
	
	fp_tgt = fopen(tgt, "r+"); 
	double *onepoint = (double *)calloc((size_t)dim, sizeof(double));
	char c;
	int index;
	double t;
	
	while(1){
begin2:
		if (fscanf(fp_tgt,"%lf",&onepoint[0]) == EOF)
			break;
		
		while(1){
			int c;
			do{
				c = fgetc(fp_tgt);
				if (c == '\n'){
					printf("%f", onepoint[0]);
					for(int i = 0; i < odim; ++i){
						printf(" %d:%f", i+1, onepoint[list[i].index]);
					}
					printf("\n");
					goto begin2;
				}
			}while(isspace(c));
			ungetc(c,fp_tgt);
			fscanf(fp_tgt,"%d:%lf",&index,&t);
			onepoint[index] = t;
		}
	}
	fclose(fp_tgt);
	
	return 0;

}
