diff -r -U2 libsvm-2.3.orig/Makefile libsvm-2.3/Makefile
--- libsvm-2.3.orig/Makefile	Tue Mar 13 11:53:14 2001
+++ libsvm-2.3/Makefile	Tue Mar 27 10:27:24 2001
@@ -1,12 +1,16 @@
 CC = gcc
 CXXC = g++
-CFLAGS = -Wall -O3 -g
+CFLAGS = -Wall -O3 -g -DUSE_THREADS
+#CFLAGS = -Wall -g -DUSE_THREADS
 
-all: svm-train svm-predict svm-scale
+all: svm-train svm-predict svm-scale test
 
+test: test.cpp svm.o
+	$(CXXC) $(CFLAGS) test.cpp svm.o -o test -lm -lpthread
 svm-predict: svm-predict.c svm.o
-	$(CC) $(CFLAGS) svm-predict.c svm.o -o svm-predict -lm
-svm-train: svm-train.c svm.o
-	$(CC) $(CFLAGS) svm-train.c svm.o -o svm-train -lm
+	$(CC) $(CFLAGS) svm-predict.c svm.o -o svm-predict -lm -lpthread
+svm-train: svm-train.cpp svm.o
+	$(CXXC) $(CFLAGS) svm-train.cpp svm.o -o svm-train -lm -lpthread
+#	$(CXXC) $(CFLAGS) svm-train.cpp svm.o -o svm-train -lm -lpthread
 svm.o: svm.cpp svm.h
 	$(CXXC) $(CFLAGS) -c svm.cpp
diff -r -U2 libsvm-2.3.orig/svm.cpp libsvm-2.3/svm.cpp
--- libsvm-2.3.orig/svm.cpp	Tue Mar 13 22:42:36 2001
+++ libsvm-2.3/svm.cpp	Tue Mar 27 12:03:15 2001
@@ -1,2 +1,32 @@
+#if defined WIN32
+#include <windows.h>
+#include <process.h>
+#define thread_start_f void
+typedef HANDLE sem_t;
+typedef HANDLE pthread_t;
+#define sem_init(psem, pshared, value) \
+	*(psem) = CreateSemaphore(NULL, value, threads, NULL)
+#define sem_post(psem) \
+	ReleaseSemaphore(*(psem), 1, NULL)
+#define sem_wait(psem) \
+	WaitForSingleObject(*(psem), INFINITE)
+#define sem_destroy(psem) \
+	CloseHandle(*(psem))
+#define pthread_create(pThread, pAttr, fStart, pArg) \
+	(*(pThread)) = (HANDLE)_beginthread(fStart, 0, pArg)
+#define pthread_exit(code)
+#else /* WIN32 */
+#if defined USE_THREADS
+#define _REENTRANT
+#if defined SOLARIS_REV || defined X86_REV
+#include <thread.h>
+#include <synch.h>
+#else /* SOLARIS_REV */
+#include <pthread.h>
+#include <semaphore.h>
+#define thread_start_f void*
+#endif /* SOLARIS_REV */
+#endif /* USE_THREADS */
+#endif /* WIN32 */
 #include <math.h>
 #include <stdio.h>
@@ -164,4 +194,24 @@
 }
 
+#if defined(USE_THREADS)
+class Kernel;
+class Kernel_thread_data {
+public:
+	Kernel_thread_data(int threads, Kernel* k);
+	~Kernel_thread_data();
+
+	int	threads;
+	Kernel*	kernel;
+	int	do_kernel_i;
+	int*	do_kernel_start;
+	int*	do_kernel_end;
+	double*	do_kernel_data;
+	pthread_t* thread;
+	sem_t	work;			/* wait for work */
+	sem_t	done;			/* wait for completion */
+	int	thread_suicide;
+};
+#endif /* USE_THREADS */
+
 //
 // Kernel evaluation
@@ -179,4 +229,6 @@
 				 const svm_parameter& param);
 	virtual double *get_Q(int column, int len) const = 0;
+	virtual void do_kernel(int i, int start, int end, double* data) const;
+	void do_kernel_wrap(int i, int start, int end, double* data) const;
 	virtual void swap_index(int i, int j) const	// no so const...
 	{
@@ -184,4 +236,9 @@
 		if(x_square) swap(x_square[i],x_square[j]);
 	}
+
+#if defined(USE_THREADS)
+	Kernel_thread_data*	d;
+#endif
+
 protected:
 
@@ -217,4 +274,69 @@
 };
 
+#if defined(USE_THREADS)
+thread_start_f kernel_thread_start(void* arg)
+{
+	int tid;
+	Kernel_thread_data* d = (Kernel_thread_data*)arg;
+	Kernel* kernel = (Kernel*)d->kernel;
+
+	tid = d->do_kernel_i;
+	sem_post(&d->done);
+
+	while (!d->thread_suicide) {
+		sem_wait(&d->work);
+		if (!d->thread_suicide) {
+			kernel->do_kernel(d->do_kernel_i, 
+				  d->do_kernel_start[tid],
+				  d->do_kernel_end[tid],
+				  d->do_kernel_data
+				  );
+		}
+		sem_post(&d->done);
+	}
+	pthread_exit(0);
+}
+
+Kernel_thread_data::Kernel_thread_data(int threads, Kernel* k)
+{
+	int i;
+
+	this->threads = threads >= 1 ? threads : 1;
+	thread_suicide = 0;
+	kernel = k;
+	do_kernel_start = new int[threads];
+	do_kernel_end = new int[threads];
+	do_kernel_data = NULL;
+	thread = new pthread_t[threads];
+	sem_init(&work, 0, 0);
+	sem_init(&done, 0, 0);
+
+	for (i = 0; i < threads - 1; ++i) {
+		do_kernel_i = i;
+		pthread_create(&thread[i], NULL, kernel_thread_start, (void*)this);
+		sem_wait(&done);
+	}
+}
+
+Kernel_thread_data::~Kernel_thread_data()
+{
+	thread_suicide = 1;
+	int i;
+	for (i = 1; i < threads; ++i) {
+		sem_post(&work);
+	}
+	for (i = 1; i < threads; ++i) {
+		sem_wait(&done);
+	}
+	if (threads > 1) {
+		delete [] thread;
+		delete [] do_kernel_start;
+		delete [] do_kernel_end;
+	}
+	sem_destroy(&work);
+	sem_destroy(&done);
+}
+#endif /* USE_THREADS */
+
 Kernel::Kernel(int l, const svm_node * const * x_, const svm_parameter& param)
 :kernel_type(param.kernel_type), degree(param.degree),
@@ -250,4 +372,11 @@
 	else
 		x_square = 0;
+
+#if defined(USE_THREADS)
+	d = NULL;
+	if (param.threads > 1) {
+		d = new Kernel_thread_data(param.threads, this);
+	}
+#endif /* USE_THREADS */
 }
 
@@ -256,4 +385,7 @@
 	delete[] x;
 	delete[] x_square;
+#if defined(USE_THREADS)
+	delete d;
+#endif /* USE_THREADS */
 }
 
@@ -339,4 +471,37 @@
 }
 
+void Kernel::do_kernel(int i, int start, int end, double* data) const
+{
+	int	j;
+	for(j = start; j < end; j++)
+		data[j] = (this->*kernel_function)(i,j);
+}
+
+void Kernel::do_kernel_wrap(int i, int start, int end, double* data) const
+{
+#if defined USE_THREADS
+	if (d && d->threads > 1) {
+		int chunk = (end - start) / d->threads;
+		int t;
+	
+		for (t = 0; t < d->threads - 1; t++, start += chunk) {
+			d->do_kernel_i = i;
+			d->do_kernel_data = data;
+			d->do_kernel_start[t] = start;
+			d->do_kernel_end[t] = start + chunk;
+		}
+		for (t = 0; t < d->threads - 1; t++) {
+			sem_post(&d->work);
+		}
+	}
+#endif /* USE_THREADS */
+	do_kernel(i, start, end, data);
+#if defined USE_THREADS
+	for (t = 0; d && t < d->threads - 1; t++) {
+		sem_wait(&d->done);
+	}
+#endif /* USE_THREADS */
+}
+
 // Generalized SMO+SVMlight algorithm
 // Solves:
@@ -359,6 +524,6 @@
 class Solver {
 public:
-	Solver() {};
-	virtual ~Solver() {};
+	Solver();
+	virtual ~Solver();
 
 	struct SolutionInfo {
@@ -409,6 +574,126 @@
 	virtual double calculate_rho();
 	virtual void do_shrinking();
+	int	threads;
+#if defined(USE_THREADS)
+public:
+	int*	select_working_set_start;
+	int*	select_working_set_end;
+	int*	select_working_set_Gmax1_idx;
+	int*	select_working_set_Gmax2_idx;
+	int*	select_working_set_Gmax3_idx;
+	int*	select_working_set_Gmax4_idx;
+	double*	select_working_set_Gmax1;
+	double*	select_working_set_Gmax2;
+	double*	select_working_set_Gmax3;
+	double*	select_working_set_Gmax4;
+	pthread_t* thread;
+	sem_t	work;			/* wait for work */
+	sem_t	done;			/* wait for completion */
+	int	thread_suicide;
+#endif /* USE_THREADS */
+public:
+	void select_working_set_inner(int start, 
+				      int end, 
+				      int& Gmax1_idx, 
+				      int& Gmax2_idx, 
+				      int& Gmax3_idx, 
+				      int&Gmax4_idx, 
+				      double& Gmax1, 
+				      double& Gmax2, 
+				      double& Gmax3, 
+				      double& Gmax4);
 };
 
+#if defined(USE_THREADS)
+thread_start_f solver_thread_start(void* arg)
+{
+	int tid;
+	Solver* solver = (Solver*)arg;
+
+	tid = solver->select_working_set_start[0];
+	sem_post(&solver->done);
+
+	while (!solver->thread_suicide) {
+		sem_wait(&solver->work);
+		if (!solver->thread_suicide) {
+			solver->select_working_set_inner(
+				solver->select_working_set_start[tid], 
+				solver->select_working_set_end[tid],
+				solver->select_working_set_Gmax1_idx[tid],
+				solver->select_working_set_Gmax2_idx[tid],
+				solver->select_working_set_Gmax3_idx[tid],
+				solver->select_working_set_Gmax4_idx[tid],
+				solver->select_working_set_Gmax1[tid],
+				solver->select_working_set_Gmax2[tid],
+				solver->select_working_set_Gmax3[tid],
+				solver->select_working_set_Gmax4[tid]
+				);
+		}
+		sem_post(&solver->done);
+	}
+	pthread_exit(0);
+}
+#endif /* USE_THREADS */
+
+Solver::Solver()
+{
+	threads = 1;
+#if defined(USE_THREADS)
+	threads = 2;
+	if (threads > 1) {
+		int i;
+		select_working_set_start = new int[threads];
+		select_working_set_end = new int[threads];
+		select_working_set_Gmax1_idx = new int[threads];
+		select_working_set_Gmax2_idx = new int[threads];
+		select_working_set_Gmax3_idx = new int[threads];
+		select_working_set_Gmax4_idx = new int[threads];
+		select_working_set_Gmax1 = new double[threads];
+		select_working_set_Gmax2 = new double[threads];
+		select_working_set_Gmax3 = new double[threads];
+		select_working_set_Gmax4 = new double[threads];
+		thread = new pthread_t[threads];
+		sem_init(&work, 0, 0);
+		sem_init(&done, 0, 0);
+		thread_suicide = 0;
+
+		for (i = 0; i < threads - 1; ++i) {
+			select_working_set_start[0] = i;
+			pthread_create(&thread[i], NULL, solver_thread_start, (void*)this);
+			sem_wait(&done);
+		}
+	}
+#endif /* USE_THREADS */
+}
+
+Solver::~Solver()
+{
+#if defined(USE_THREADS)
+	thread_suicide = 1;
+	int i;
+	for (i = 1; i < threads; ++i) {
+		sem_post(&work);
+	}
+	for (i = 1; i < threads; ++i) {
+		sem_wait(&done);
+	}
+	if (threads > 1) {
+		delete []	thread;
+		delete []	select_working_set_start;
+		delete []	select_working_set_end;
+		delete []	select_working_set_Gmax1_idx;
+		delete []	select_working_set_Gmax2_idx;
+		delete []	select_working_set_Gmax3_idx;
+		delete []	select_working_set_Gmax4_idx;
+		delete []	select_working_set_Gmax1;
+		delete []	select_working_set_Gmax2;
+		delete []	select_working_set_Gmax3;
+		delete []	select_working_set_Gmax4;
+	}
+	sem_destroy(&work);
+	sem_destroy(&done);
+#endif /* USE_THREADS */
+}
+
 void Solver::swap_index(int i, int j)
 {
@@ -703,20 +988,33 @@
 }
 
-// return 1 if already optimal, return 0 otherwise
-int Solver::select_working_set(int &out_i, int &out_j)
+void Solver::select_working_set_inner(int start, 
+				      int end, 
+				      int& Gmax1_idx, 
+				      int& Gmax2_idx, 
+				      int& Gmax3_idx, 
+				      int&Gmax4_idx, 
+				      double& Gmax1, 
+				      double& Gmax2, 
+				      double& Gmax3, 
+				      double& Gmax4)
 {
-	// return i,j which maximize -grad(f)^T d , under constraint
-	// if alpha_i == C, d != +1
-	// if alpha_i == 0, d != -1
 
-	double Gmax1 = -INF;		// max { -grad(f)_i * d | y_i*d = +1 }
-	int Gmax1_idx = -1;
+	int i;
 
-	double Gmax2 = -INF;		// max { -grad(f)_i * d | y_i*d = -1 }
-	int Gmax2_idx = -1;
+	Gmax1 = -INF;	// max { -grad(f)_i * d | y_i = +1, d = +1 }
+	Gmax1_idx = -1;
 
-	for(int i=0;i<active_size;i++)
+	Gmax2 = -INF;	// max { -grad(f)_i * d | y_i = +1, d = -1 }
+	Gmax2_idx = -1;
+
+	Gmax3 = -INF;	// max { -grad(f)_i * d | y_i = -1, d = +1 }
+	Gmax3_idx = -1;
+
+	Gmax4 = -INF;	// max { -grad(f)_i * d | y_i = -1, d = -1 }
+	Gmax4_idx = -1;
+
+	for(i = start; i < end; i++)
 	{
-		if(y[i]==+1)	// y = +1
+		if(y[i]==+1)	// y == +1
 		{
 			if(!is_upper_bound(i))	// d = +1
@@ -737,30 +1035,94 @@
 			}
 		}
-		else		// y = -1
+		else		// y == -1
 		{
 			if(!is_upper_bound(i))	// d = +1
 			{
-				if(-G[i] > Gmax2)
+				if(-G[i] > Gmax3)
 				{
-					Gmax2 = -G[i];
-					Gmax2_idx = i;
+					Gmax3 = -G[i];
+					Gmax3_idx = i;
 				}
 			}
 			if(!is_lower_bound(i))	// d = -1
 			{
-				if(G[i] > Gmax1)
+				if(G[i] > Gmax4)
 				{
-					Gmax1 = G[i];
-					Gmax1_idx = i;
+					Gmax4 = G[i];
+					Gmax4_idx = i;
 				}
 			}
 		}
 	}
+}
 
-	if(Gmax1+Gmax2 < eps)
- 		return 1;
+// return 1 if already optimal, return 0 otherwise
+int Solver::select_working_set(int &out_i, int &out_j)
+{
+	// return i,j which maximize -grad(f)^T d , under constraint
+	// if alpha_i == C, d != +1
+	// if alpha_i == 0, d != -1
+
+	int start = 0;
+	double Gmax1, Gmax2, Gmax3, Gmax4;
+	int Gmax1_idx, Gmax2_idx, Gmax3_idx, Gmax4_idx;
+#if defined USE_THREADS
+	int t, chunk = active_size / threads;
+
+	for (t = 0; t < threads - 1; t++, start += chunk) {
+		select_working_set_start[t] = start;
+		select_working_set_end[t] = start + chunk;
+	}
+	for (t = 0; t < threads - 1; t++) {
+		sem_post(&work);
+	}
+#endif /* USE_THREADS */
+	//int orig_i, orig_j;
+	//	select_working_set_orig(orig_i, orig_j);
+	select_working_set_inner(start, active_size, 
+				 Gmax1_idx, Gmax2_idx, Gmax3_idx, Gmax4_idx, 
+				 Gmax1, Gmax2, Gmax3, Gmax4);
+	if (Gmax4 > Gmax1) {
+		Gmax1 = Gmax4;
+		Gmax1_idx = Gmax4_idx;
+	}
+	if (Gmax3 > Gmax2) {
+		Gmax2 = Gmax3;
+		Gmax2_idx = Gmax3_idx;
+	}
+#if defined USE_THREADS
+	for (t = 0; t < threads - 1; t++) {
+		sem_wait(&done);
+	}
+	for (t = 0; t < threads - 1; t++) {
+		double g1 = select_working_set_Gmax1[t];
+		double g2 = select_working_set_Gmax2[t];
+		double g3 = select_working_set_Gmax3[t];
+		double g4 = select_working_set_Gmax4[t];
+		if (g1 > Gmax1) {
+			Gmax1 = g1;
+			Gmax1_idx = select_working_set_Gmax1_idx[t];
+		}
+		if (g2 > Gmax2) {
+			Gmax2 = g2;
+			Gmax2_idx = select_working_set_Gmax2_idx[t];
+		}
+		if (g3 > Gmax2) {
+			Gmax2 = g3;
+			Gmax2_idx = select_working_set_Gmax3_idx[t];
+		}
+		if (g4 > Gmax1) {
+			Gmax1 = g4;
+			Gmax1_idx = select_working_set_Gmax4_idx[t];
+		}
+	}
+#endif /* USE_THREADS */
 
 	out_i = Gmax1_idx;
 	out_j = Gmax2_idx;
+
+	if(Gmax1+Gmax2 < eps)
+ 		return 1;
+
 	return 0;
 }
@@ -900,57 +1262,48 @@
 	// if alpha_i == 0, d != -1
 
-	double Gmax1 = -INF;	// max { -grad(f)_i * d | y_i = +1, d = +1 }
-	int Gmax1_idx = -1;
-
-	double Gmax2 = -INF;	// max { -grad(f)_i * d | y_i = +1, d = -1 }
-	int Gmax2_idx = -1;
-
-	double Gmax3 = -INF;	// max { -grad(f)_i * d | y_i = -1, d = +1 }
-	int Gmax3_idx = -1;
-
-	double Gmax4 = -INF;	// max { -grad(f)_i * d | y_i = -1, d = -1 }
-	int Gmax4_idx = -1;
-
-	for(int i=0;i<active_size;i++)
-	{
-		if(y[i]==+1)	// y == +1
-		{
-			if(!is_upper_bound(i))	// d = +1
-			{
-				if(-G[i] > Gmax1)
-				{
-					Gmax1 = -G[i];
-					Gmax1_idx = i;
-				}
-			}
-			if(!is_lower_bound(i))	// d = -1
-			{
-				if(G[i] > Gmax2)
-				{
-					Gmax2 = G[i];
-					Gmax2_idx = i;
-				}
-			}
-		}
-		else		// y == -1
-		{
-			if(!is_upper_bound(i))	// d = +1
-			{
-				if(-G[i] > Gmax3)
-				{
-					Gmax3 = -G[i];
-					Gmax3_idx = i;
-				}
-			}
-			if(!is_lower_bound(i))	// d = -1
-			{
-				if(G[i] > Gmax4)
-				{
-					Gmax4 = G[i];
-					Gmax4_idx = i;
-				}
-			}
+	int start = 0;
+	double Gmax1, Gmax2, Gmax3, Gmax4;
+	int Gmax1_idx, Gmax2_idx, Gmax3_idx, Gmax4_idx;
+#if defined USE_THREADS
+	int t, chunk = active_size / threads;
+
+	for (t = 0; t < threads - 1; t++, start += chunk) {
+		select_working_set_start[t] = start;
+		select_working_set_end[t] = start + chunk;
+	}
+	for (t = 0; t < threads - 1; t++) {
+		sem_post(&work);
+	}
+#endif /* USE_THREADS */
+	select_working_set_inner(start, active_size, 
+				 Gmax1_idx, Gmax2_idx, Gmax3_idx, Gmax4_idx, 
+				 Gmax1, Gmax2, Gmax3, Gmax4);
+#if defined USE_THREADS
+	for (t = 0; t < threads - 1; t++) {
+		sem_wait(&done);
+	}
+	for (t = 0; t < threads - 1; t++) {
+		double g1 = select_working_set_Gmax1[t];
+		double g2 = select_working_set_Gmax2[t];
+		double g3 = select_working_set_Gmax3[t];
+		double g4 = select_working_set_Gmax4[t];
+		if (g1 > Gmax1) {
+			Gmax1 = g1;
+			Gmax1_idx = select_working_set_Gmax1_idx[t];
+		}
+		if (g2 > Gmax2) {
+			Gmax2 = g2;
+			Gmax2_idx = select_working_set_Gmax2_idx[t];
+		}
+		if (g3 > Gmax3) {
+			Gmax3 = g3;
+			Gmax3_idx = select_working_set_Gmax3_idx[t];
+		}
+		if (g4 > Gmax4) {
+			Gmax4 = g4;
+			Gmax3_idx = select_working_set_Gmax4_idx[t];
 		}
 	}
+#endif /* USE_THREADS */
 
 	if(max(Gmax1+Gmax2,Gmax3+Gmax4) < eps)
@@ -1124,4 +1477,11 @@
 	}
 	
+	virtual void do_kernel(int i, int start, int end, double* data) const
+	{
+		int	j;
+		for(j = start; j < end; j++)
+			data[j] = y[i]*y[j]*(this->*kernel_function)(i,j);
+	}
+
 	double *get_Q(int i, int len) const
 	{
@@ -1129,8 +1489,5 @@
 		int start;
 		if((start = cache->get_data(i,&data,len)) < len)
-		{
-			for(int j=start;j<len;j++)
-				data[j] = y[i]*y[j]*(this->*kernel_function)(i,j);
-		}
+			do_kernel_wrap(i, start, len, data);
 		return data;
 	}
@@ -1167,8 +1524,5 @@
 		int start;
 		if((start = cache->get_data(i,&data,len)) < len)
-		{
-			for(int j=start;j<len;j++)
-				data[j] = (this->*kernel_function)(i,j);
-		}
+			do_kernel_wrap(i, start, len, data);
 		return data;
 	}
@@ -1215,8 +1569,5 @@
 		int real_i = index[i];
 		if(cache->get_data(real_i,&data,l) < l)
-		{
-			for(int j=0;j<l;j++)
-				data[j] = (this->*kernel_function)(real_i,j);
-		}
+			do_kernel_wrap(real_i, 0, l, data);
 
 		// reorder and copy
diff -r -U2 libsvm-2.3.orig/svm.h libsvm-2.3/svm.h
--- libsvm-2.3.orig/svm.h	Tue Mar 13 22:42:02 2001
+++ libsvm-2.3/svm.h	Sun Mar 25 13:56:55 2001
@@ -40,4 +40,5 @@
 	double p;	// for EPSILON_SVR
 	int shrinking;	// use the shrinking heuristics
+	int threads;	// number of threads to use
 };