diff -r -U2 libsvm-2.3.orig/Makefile libsvm-2.3/Makefile --- libsvm-2.3.orig/Makefile Tue Mar 13 11:53:14 2001 +++ libsvm-2.3/Makefile Tue Mar 27 10:27:24 2001 @@ -1,12 +1,16 @@ CC = gcc CXXC = g++ -CFLAGS = -Wall -O3 -g +CFLAGS = -Wall -O3 -g -DUSE_THREADS +#CFLAGS = -Wall -g -DUSE_THREADS -all: svm-train svm-predict svm-scale +all: svm-train svm-predict svm-scale test +test: test.cpp svm.o + $(CXXC) $(CFLAGS) test.cpp svm.o -o test -lm -lpthread svm-predict: svm-predict.c svm.o - $(CC) $(CFLAGS) svm-predict.c svm.o -o svm-predict -lm -svm-train: svm-train.c svm.o - $(CC) $(CFLAGS) svm-train.c svm.o -o svm-train -lm + $(CC) $(CFLAGS) svm-predict.c svm.o -o svm-predict -lm -lpthread +svm-train: svm-train.cpp svm.o + $(CXXC) $(CFLAGS) svm-train.cpp svm.o -o svm-train -lm -lpthread +# $(CXXC) $(CFLAGS) svm-train.cpp svm.o -o svm-train -lm -lpthread svm.o: svm.cpp svm.h $(CXXC) $(CFLAGS) -c svm.cpp diff -r -U2 libsvm-2.3.orig/svm.cpp libsvm-2.3/svm.cpp --- libsvm-2.3.orig/svm.cpp Tue Mar 13 22:42:36 2001 +++ libsvm-2.3/svm.cpp Tue Mar 27 12:03:15 2001 @@ -1,2 +1,32 @@ +#if defined WIN32 +#include +#include +#define thread_start_f void +typedef HANDLE sem_t; +typedef HANDLE pthread_t; +#define sem_init(psem, pshared, value) \ + *(psem) = CreateSemaphore(NULL, value, threads, NULL) +#define sem_post(psem) \ + ReleaseSemaphore(*(psem), 1, NULL) +#define sem_wait(psem) \ + WaitForSingleObject(*(psem), INFINITE) +#define sem_destroy(psem) \ + CloseHandle(*(psem)) +#define pthread_create(pThread, pAttr, fStart, pArg) \ + (*(pThread)) = (HANDLE)_beginthread(fStart, 0, pArg) +#define pthread_exit(code) +#else /* WIN32 */ +#if defined USE_THREADS +#define _REENTRANT +#if defined SOLARIS_REV || defined X86_REV +#include +#include +#else /* SOLARIS_REV */ +#include +#include +#define thread_start_f void* +#endif /* SOLARIS_REV */ +#endif /* USE_THREADS */ +#endif /* WIN32 */ #include #include @@ -164,4 +194,24 @@ } +#if defined(USE_THREADS) +class Kernel; +class Kernel_thread_data { +public: + Kernel_thread_data(int threads, Kernel* k); + ~Kernel_thread_data(); + + int threads; + Kernel* kernel; + int do_kernel_i; + int* do_kernel_start; + int* do_kernel_end; + double* do_kernel_data; + pthread_t* thread; + sem_t work; /* wait for work */ + sem_t done; /* wait for completion */ + int thread_suicide; +}; +#endif /* USE_THREADS */ + // // Kernel evaluation @@ -179,4 +229,6 @@ const svm_parameter& param); virtual double *get_Q(int column, int len) const = 0; + virtual void do_kernel(int i, int start, int end, double* data) const; + void do_kernel_wrap(int i, int start, int end, double* data) const; virtual void swap_index(int i, int j) const // no so const... { @@ -184,4 +236,9 @@ if(x_square) swap(x_square[i],x_square[j]); } + +#if defined(USE_THREADS) + Kernel_thread_data* d; +#endif + protected: @@ -217,4 +274,69 @@ }; +#if defined(USE_THREADS) +thread_start_f kernel_thread_start(void* arg) +{ + int tid; + Kernel_thread_data* d = (Kernel_thread_data*)arg; + Kernel* kernel = (Kernel*)d->kernel; + + tid = d->do_kernel_i; + sem_post(&d->done); + + while (!d->thread_suicide) { + sem_wait(&d->work); + if (!d->thread_suicide) { + kernel->do_kernel(d->do_kernel_i, + d->do_kernel_start[tid], + d->do_kernel_end[tid], + d->do_kernel_data + ); + } + sem_post(&d->done); + } + pthread_exit(0); +} + +Kernel_thread_data::Kernel_thread_data(int threads, Kernel* k) +{ + int i; + + this->threads = threads >= 1 ? threads : 1; + thread_suicide = 0; + kernel = k; + do_kernel_start = new int[threads]; + do_kernel_end = new int[threads]; + do_kernel_data = NULL; + thread = new pthread_t[threads]; + sem_init(&work, 0, 0); + sem_init(&done, 0, 0); + + for (i = 0; i < threads - 1; ++i) { + do_kernel_i = i; + pthread_create(&thread[i], NULL, kernel_thread_start, (void*)this); + sem_wait(&done); + } +} + +Kernel_thread_data::~Kernel_thread_data() +{ + thread_suicide = 1; + int i; + for (i = 1; i < threads; ++i) { + sem_post(&work); + } + for (i = 1; i < threads; ++i) { + sem_wait(&done); + } + if (threads > 1) { + delete [] thread; + delete [] do_kernel_start; + delete [] do_kernel_end; + } + sem_destroy(&work); + sem_destroy(&done); +} +#endif /* USE_THREADS */ + Kernel::Kernel(int l, const svm_node * const * x_, const svm_parameter& param) :kernel_type(param.kernel_type), degree(param.degree), @@ -250,4 +372,11 @@ else x_square = 0; + +#if defined(USE_THREADS) + d = NULL; + if (param.threads > 1) { + d = new Kernel_thread_data(param.threads, this); + } +#endif /* USE_THREADS */ } @@ -256,4 +385,7 @@ delete[] x; delete[] x_square; +#if defined(USE_THREADS) + delete d; +#endif /* USE_THREADS */ } @@ -339,4 +471,37 @@ } +void Kernel::do_kernel(int i, int start, int end, double* data) const +{ + int j; + for(j = start; j < end; j++) + data[j] = (this->*kernel_function)(i,j); +} + +void Kernel::do_kernel_wrap(int i, int start, int end, double* data) const +{ +#if defined USE_THREADS + if (d && d->threads > 1) { + int chunk = (end - start) / d->threads; + int t; + + for (t = 0; t < d->threads - 1; t++, start += chunk) { + d->do_kernel_i = i; + d->do_kernel_data = data; + d->do_kernel_start[t] = start; + d->do_kernel_end[t] = start + chunk; + } + for (t = 0; t < d->threads - 1; t++) { + sem_post(&d->work); + } + } +#endif /* USE_THREADS */ + do_kernel(i, start, end, data); +#if defined USE_THREADS + for (t = 0; d && t < d->threads - 1; t++) { + sem_wait(&d->done); + } +#endif /* USE_THREADS */ +} + // Generalized SMO+SVMlight algorithm // Solves: @@ -359,6 +524,6 @@ class Solver { public: - Solver() {}; - virtual ~Solver() {}; + Solver(); + virtual ~Solver(); struct SolutionInfo { @@ -409,6 +574,126 @@ virtual double calculate_rho(); virtual void do_shrinking(); + int threads; +#if defined(USE_THREADS) +public: + int* select_working_set_start; + int* select_working_set_end; + int* select_working_set_Gmax1_idx; + int* select_working_set_Gmax2_idx; + int* select_working_set_Gmax3_idx; + int* select_working_set_Gmax4_idx; + double* select_working_set_Gmax1; + double* select_working_set_Gmax2; + double* select_working_set_Gmax3; + double* select_working_set_Gmax4; + pthread_t* thread; + sem_t work; /* wait for work */ + sem_t done; /* wait for completion */ + int thread_suicide; +#endif /* USE_THREADS */ +public: + void select_working_set_inner(int start, + int end, + int& Gmax1_idx, + int& Gmax2_idx, + int& Gmax3_idx, + int&Gmax4_idx, + double& Gmax1, + double& Gmax2, + double& Gmax3, + double& Gmax4); }; +#if defined(USE_THREADS) +thread_start_f solver_thread_start(void* arg) +{ + int tid; + Solver* solver = (Solver*)arg; + + tid = solver->select_working_set_start[0]; + sem_post(&solver->done); + + while (!solver->thread_suicide) { + sem_wait(&solver->work); + if (!solver->thread_suicide) { + solver->select_working_set_inner( + solver->select_working_set_start[tid], + solver->select_working_set_end[tid], + solver->select_working_set_Gmax1_idx[tid], + solver->select_working_set_Gmax2_idx[tid], + solver->select_working_set_Gmax3_idx[tid], + solver->select_working_set_Gmax4_idx[tid], + solver->select_working_set_Gmax1[tid], + solver->select_working_set_Gmax2[tid], + solver->select_working_set_Gmax3[tid], + solver->select_working_set_Gmax4[tid] + ); + } + sem_post(&solver->done); + } + pthread_exit(0); +} +#endif /* USE_THREADS */ + +Solver::Solver() +{ + threads = 1; +#if defined(USE_THREADS) + threads = 2; + if (threads > 1) { + int i; + select_working_set_start = new int[threads]; + select_working_set_end = new int[threads]; + select_working_set_Gmax1_idx = new int[threads]; + select_working_set_Gmax2_idx = new int[threads]; + select_working_set_Gmax3_idx = new int[threads]; + select_working_set_Gmax4_idx = new int[threads]; + select_working_set_Gmax1 = new double[threads]; + select_working_set_Gmax2 = new double[threads]; + select_working_set_Gmax3 = new double[threads]; + select_working_set_Gmax4 = new double[threads]; + thread = new pthread_t[threads]; + sem_init(&work, 0, 0); + sem_init(&done, 0, 0); + thread_suicide = 0; + + for (i = 0; i < threads - 1; ++i) { + select_working_set_start[0] = i; + pthread_create(&thread[i], NULL, solver_thread_start, (void*)this); + sem_wait(&done); + } + } +#endif /* USE_THREADS */ +} + +Solver::~Solver() +{ +#if defined(USE_THREADS) + thread_suicide = 1; + int i; + for (i = 1; i < threads; ++i) { + sem_post(&work); + } + for (i = 1; i < threads; ++i) { + sem_wait(&done); + } + if (threads > 1) { + delete [] thread; + delete [] select_working_set_start; + delete [] select_working_set_end; + delete [] select_working_set_Gmax1_idx; + delete [] select_working_set_Gmax2_idx; + delete [] select_working_set_Gmax3_idx; + delete [] select_working_set_Gmax4_idx; + delete [] select_working_set_Gmax1; + delete [] select_working_set_Gmax2; + delete [] select_working_set_Gmax3; + delete [] select_working_set_Gmax4; + } + sem_destroy(&work); + sem_destroy(&done); +#endif /* USE_THREADS */ +} + void Solver::swap_index(int i, int j) { @@ -703,20 +988,33 @@ } -// return 1 if already optimal, return 0 otherwise -int Solver::select_working_set(int &out_i, int &out_j) +void Solver::select_working_set_inner(int start, + int end, + int& Gmax1_idx, + int& Gmax2_idx, + int& Gmax3_idx, + int&Gmax4_idx, + double& Gmax1, + double& Gmax2, + double& Gmax3, + double& Gmax4) { - // return i,j which maximize -grad(f)^T d , under constraint - // if alpha_i == C, d != +1 - // if alpha_i == 0, d != -1 - double Gmax1 = -INF; // max { -grad(f)_i * d | y_i*d = +1 } - int Gmax1_idx = -1; + int i; - double Gmax2 = -INF; // max { -grad(f)_i * d | y_i*d = -1 } - int Gmax2_idx = -1; + Gmax1 = -INF; // max { -grad(f)_i * d | y_i = +1, d = +1 } + Gmax1_idx = -1; - for(int i=0;i Gmax2) + if(-G[i] > Gmax3) { - Gmax2 = -G[i]; - Gmax2_idx = i; + Gmax3 = -G[i]; + Gmax3_idx = i; } } if(!is_lower_bound(i)) // d = -1 { - if(G[i] > Gmax1) + if(G[i] > Gmax4) { - Gmax1 = G[i]; - Gmax1_idx = i; + Gmax4 = G[i]; + Gmax4_idx = i; } } } } +} - if(Gmax1+Gmax2 < eps) - return 1; +// return 1 if already optimal, return 0 otherwise +int Solver::select_working_set(int &out_i, int &out_j) +{ + // return i,j which maximize -grad(f)^T d , under constraint + // if alpha_i == C, d != +1 + // if alpha_i == 0, d != -1 + + int start = 0; + double Gmax1, Gmax2, Gmax3, Gmax4; + int Gmax1_idx, Gmax2_idx, Gmax3_idx, Gmax4_idx; +#if defined USE_THREADS + int t, chunk = active_size / threads; + + for (t = 0; t < threads - 1; t++, start += chunk) { + select_working_set_start[t] = start; + select_working_set_end[t] = start + chunk; + } + for (t = 0; t < threads - 1; t++) { + sem_post(&work); + } +#endif /* USE_THREADS */ + //int orig_i, orig_j; + // select_working_set_orig(orig_i, orig_j); + select_working_set_inner(start, active_size, + Gmax1_idx, Gmax2_idx, Gmax3_idx, Gmax4_idx, + Gmax1, Gmax2, Gmax3, Gmax4); + if (Gmax4 > Gmax1) { + Gmax1 = Gmax4; + Gmax1_idx = Gmax4_idx; + } + if (Gmax3 > Gmax2) { + Gmax2 = Gmax3; + Gmax2_idx = Gmax3_idx; + } +#if defined USE_THREADS + for (t = 0; t < threads - 1; t++) { + sem_wait(&done); + } + for (t = 0; t < threads - 1; t++) { + double g1 = select_working_set_Gmax1[t]; + double g2 = select_working_set_Gmax2[t]; + double g3 = select_working_set_Gmax3[t]; + double g4 = select_working_set_Gmax4[t]; + if (g1 > Gmax1) { + Gmax1 = g1; + Gmax1_idx = select_working_set_Gmax1_idx[t]; + } + if (g2 > Gmax2) { + Gmax2 = g2; + Gmax2_idx = select_working_set_Gmax2_idx[t]; + } + if (g3 > Gmax2) { + Gmax2 = g3; + Gmax2_idx = select_working_set_Gmax3_idx[t]; + } + if (g4 > Gmax1) { + Gmax1 = g4; + Gmax1_idx = select_working_set_Gmax4_idx[t]; + } + } +#endif /* USE_THREADS */ out_i = Gmax1_idx; out_j = Gmax2_idx; + + if(Gmax1+Gmax2 < eps) + return 1; + return 0; } @@ -900,57 +1262,48 @@ // if alpha_i == 0, d != -1 - double Gmax1 = -INF; // max { -grad(f)_i * d | y_i = +1, d = +1 } - int Gmax1_idx = -1; - - double Gmax2 = -INF; // max { -grad(f)_i * d | y_i = +1, d = -1 } - int Gmax2_idx = -1; - - double Gmax3 = -INF; // max { -grad(f)_i * d | y_i = -1, d = +1 } - int Gmax3_idx = -1; - - double Gmax4 = -INF; // max { -grad(f)_i * d | y_i = -1, d = -1 } - int Gmax4_idx = -1; - - for(int i=0;i Gmax1) - { - Gmax1 = -G[i]; - Gmax1_idx = i; - } - } - if(!is_lower_bound(i)) // d = -1 - { - if(G[i] > Gmax2) - { - Gmax2 = G[i]; - Gmax2_idx = i; - } - } - } - else // y == -1 - { - if(!is_upper_bound(i)) // d = +1 - { - if(-G[i] > Gmax3) - { - Gmax3 = -G[i]; - Gmax3_idx = i; - } - } - if(!is_lower_bound(i)) // d = -1 - { - if(G[i] > Gmax4) - { - Gmax4 = G[i]; - Gmax4_idx = i; - } - } + int start = 0; + double Gmax1, Gmax2, Gmax3, Gmax4; + int Gmax1_idx, Gmax2_idx, Gmax3_idx, Gmax4_idx; +#if defined USE_THREADS + int t, chunk = active_size / threads; + + for (t = 0; t < threads - 1; t++, start += chunk) { + select_working_set_start[t] = start; + select_working_set_end[t] = start + chunk; + } + for (t = 0; t < threads - 1; t++) { + sem_post(&work); + } +#endif /* USE_THREADS */ + select_working_set_inner(start, active_size, + Gmax1_idx, Gmax2_idx, Gmax3_idx, Gmax4_idx, + Gmax1, Gmax2, Gmax3, Gmax4); +#if defined USE_THREADS + for (t = 0; t < threads - 1; t++) { + sem_wait(&done); + } + for (t = 0; t < threads - 1; t++) { + double g1 = select_working_set_Gmax1[t]; + double g2 = select_working_set_Gmax2[t]; + double g3 = select_working_set_Gmax3[t]; + double g4 = select_working_set_Gmax4[t]; + if (g1 > Gmax1) { + Gmax1 = g1; + Gmax1_idx = select_working_set_Gmax1_idx[t]; + } + if (g2 > Gmax2) { + Gmax2 = g2; + Gmax2_idx = select_working_set_Gmax2_idx[t]; + } + if (g3 > Gmax3) { + Gmax3 = g3; + Gmax3_idx = select_working_set_Gmax3_idx[t]; + } + if (g4 > Gmax4) { + Gmax4 = g4; + Gmax3_idx = select_working_set_Gmax4_idx[t]; } } +#endif /* USE_THREADS */ if(max(Gmax1+Gmax2,Gmax3+Gmax4) < eps) @@ -1124,4 +1477,11 @@ } + virtual void do_kernel(int i, int start, int end, double* data) const + { + int j; + for(j = start; j < end; j++) + data[j] = y[i]*y[j]*(this->*kernel_function)(i,j); + } + double *get_Q(int i, int len) const { @@ -1129,8 +1489,5 @@ int start; if((start = cache->get_data(i,&data,len)) < len) - { - for(int j=start;j*kernel_function)(i,j); - } + do_kernel_wrap(i, start, len, data); return data; } @@ -1167,8 +1524,5 @@ int start; if((start = cache->get_data(i,&data,len)) < len) - { - for(int j=start;j*kernel_function)(i,j); - } + do_kernel_wrap(i, start, len, data); return data; } @@ -1215,8 +1569,5 @@ int real_i = index[i]; if(cache->get_data(real_i,&data,l) < l) - { - for(int j=0;j*kernel_function)(real_i,j); - } + do_kernel_wrap(real_i, 0, l, data); // reorder and copy diff -r -U2 libsvm-2.3.orig/svm.h libsvm-2.3/svm.h --- libsvm-2.3.orig/svm.h Tue Mar 13 22:42:02 2001 +++ libsvm-2.3/svm.h Sun Mar 25 13:56:55 2001 @@ -40,4 +40,5 @@ double p; // for EPSILON_SVR int shrinking; // use the shrinking heuristics + int threads; // number of threads to use };