#!/bin/sh

wget -N http://jmlr.csail.mit.edu/papers/volume5/lewis04a/a01-list-of-topics/rcv1.topics.txt
wget -N http://jmlr.csail.mit.edu/papers/volume5/lewis04a/a08-topic-qrels/rcv1-v2.topics.qrels.gz

wget -N http://jmlr.csail.mit.edu/papers/volume5/lewis04a/a04-list-of-industries/rcv1.industries.txt
wget -N http://jmlr.csail.mit.edu/papers/volume5/lewis04a/a09-industry-qrels/rcv1-v2.industries.qrels.gz

wget -N http://jmlr.csail.mit.edu/papers/volume5/lewis04a/a06-list-of-regions/rcv1.regions.txt
wget -N http://jmlr.csail.mit.edu/papers/volume5/lewis04a/a10-region-qrels/rcv1-v2.regions.qrels.gz


wget -N http://jmlr.csail.mit.edu/papers/volume5/lewis04a/a13-vector-files/lyrl2004_vectors_train.dat.gz
gunzip lyrl2004_vectors_train.dat.gz

for i in 0 1 2 3
do
	wget -N http://jmlr.csail.mit.edu/papers/volume5/lewis04a/a13-vector-files/lyrl2004_vectors_test_pt$i.dat.gz
	gunzip lyrl2004_vectors_test_pt$i.dat.gz
done

for target in topics industries regions
do
	gunzip rcv1-v2.$target.qrels.gz

	./gen_id_label.py rcv1.$target.txt rcv1-v2.$target.qrels 

	#rm rcv1.$target.txt rcv1-v2.$target.qrels

	./dat2svm.py lyrl2004_vectors_train.dat id_label > rcv1_${target}_train.svm
	wc -l rcv1_${target}_train.svm
	#bzip2 rcv1_${target}_train.svm
	#/bin/mv rcv1_${target}_train.svm.bz2 ../../../../htdocs/libsvmtools/datasets/multilabel

	for i in 0 1 2 3
	do
		./dat2svm.py lyrl2004_vectors_test_pt$i.dat id_label > rcv1_${target}_test_$i.svm
		wc -l rcv1_${target}_test_$i.svm
		bzip2 rcv1_${target}_test_$i.svm
		#/bin/mv rcv1_test_$i.svm.bz2 ../../../../htdocs/libsvmtools/datasets/multilabel
		#/bin/rm lyrl2004_vectors_test_pt$i.dat
	done
done
/bin/rm id_label
#/bin/rm lyrl2004_vectors_train.dat


