00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #if defined(_MSC_VER)
00025 #pragma once
00026 #endif
00027
00028 #ifndef PBRT_CORE_PARALLEL_H
00029 #define PBRT_CORE_PARALLEL_H
00030
00031
00032 #include "pbrt.h"
00033 #if defined(__APPLE__) && !(defined(__i386__) || defined(__amd64__))
00034 #include <libkern/OSAtomic.h>
00035 #endif // __APPLE__ and not x86
00036 #if defined(PBRT_IS_WINDOWS)
00037 #include <windows.h>
00038 #else
00039 #include <pthread.h>
00040 #include <semaphore.h>
00041 #endif
00042 #include "core/probes.h"
00043
00044
00045 #if defined(PBRT_IS_WINDOWS)
00046 #if _MSC_VER >= 1300
00047 extern "C" void _ReadWriteBarrier();
00048 #pragma intrinsic(_ReadWriteBarrier)
00049 #else
00050 #define _ReadWriteBarrier()
00051 #endif
00052
00053 typedef volatile LONG AtomicInt32;
00054
00055 #ifdef PBRT_HAS_64_BIT_ATOMICS
00056 typedef volatile LONGLONG AtomicInt64;
00057 #endif // 64-bit
00058 #else
00059 typedef volatile int32_t AtomicInt32;
00060 #ifdef PBRT_HAS_64_BIT_ATOMICS
00061 typedef volatile int64_t AtomicInt64;
00062 #endif
00063 #endif // !PBRT_IS_WINDOWS
00064 inline int32_t AtomicAdd(AtomicInt32 *v, int32_t delta) {
00065 PBRT_ATOMIC_MEMORY_OP();
00066 #if defined(PBRT_IS_WINDOWS)
00067
00068 #if (PBRT_POINTER_SIZE == 8)
00069 return InterlockedAdd(v, delta);
00070 #else
00071 int32_t result;
00072 _ReadWriteBarrier();
00073 __asm {
00074 __asm mov edx, v
00075 __asm mov eax, delta
00076 __asm lock xadd [edx], eax
00077 __asm mov result, eax
00078 }
00079 _ReadWriteBarrier();
00080 return result + delta;
00081 #endif
00082 #elif defined(__APPLE__) && !(defined(__i386__) || defined(__amd64__))
00083 return OSAtomicAdd32Barrier(delta, v);
00084 #else
00085
00086 int32_t origValue;
00087 __asm__ __volatile__("lock\n"
00088 "xaddl %0,%1"
00089 : "=r"(origValue), "=m"(*v) : "0"(delta)
00090 : "memory");
00091 return origValue + delta;
00092 #endif
00093 }
00094
00095
00096 inline int32_t AtomicCompareAndSwap(AtomicInt32 *v, int32_t newValue,
00097 int32_t oldValue);
00098 inline int32_t AtomicCompareAndSwap(AtomicInt32 *v, int32_t newValue, int32_t oldValue) {
00099 PBRT_ATOMIC_MEMORY_OP();
00100 #if defined(PBRT_IS_WINDOWS)
00101 return InterlockedCompareExchange(v, newValue, oldValue);
00102 #elif defined(__APPLE__) && !(defined(__i386__) || defined(__amd64__))
00103 return OSAtomicCompareAndSwap32Barrier(oldValue, newValue, v);
00104 #else
00105 int32_t result;
00106 __asm__ __volatile__("lock\ncmpxchgl %2,%1"
00107 : "=a"(result), "=m"(*v)
00108 : "q"(newValue), "0"(oldValue)
00109 : "memory");
00110 return result;
00111 #endif
00112 }
00113
00114
00115 template <typename T>
00116 inline T *AtomicCompareAndSwapPointer(T **v, T *newValue, T *oldValue) {
00117 PBRT_ATOMIC_MEMORY_OP();
00118 #if defined(PBRT_IS_WINDOWS)
00119 return InterlockedCompareExchange(v, newValue, oldValue);
00120 #elif defined(__APPLE__) && !(defined(__i386__) || defined(__amd64__))
00121 #ifdef PBRT_HAS_64_BIT_ATOMICS
00122 return OSAtomicCompareAndSwap64Barrier(oldValue, newValue, v);
00123 #else
00124 return OSAtomicCompareAndSwap32Barrier(oldValue, newValue, v);
00125 #endif
00126 #else
00127 T *result;
00128 __asm__ __volatile__("lock\ncmpxchg"
00129 #ifdef PBRT_HAS_64_BIT_ATOMICS
00130 "q"
00131 #else
00132 "l"
00133 #endif
00134 " %2,%1"
00135 : "=a"(result), "=m"(*v)
00136 : "q"(newValue), "0"(oldValue)
00137 : "memory");
00138 return result;
00139 #endif
00140 }
00141
00142
00143 #ifdef PBRT_HAS_64_BIT_ATOMICS
00144 inline int64_t AtomicAdd(AtomicInt64 *v, int64_t delta) {
00145 PBRT_ATOMIC_MEMORY_OP();
00146 #ifdef PBRT_IS_WINDOWS
00147 return InterlockedAdd64(v, delta);
00148 #elif defined(__APPLE__) && !(defined(__i386__) || defined(__amd64__))
00149 return OSAtomicAdd64Barrier(delta, v);
00150 #else
00151 int64_t result;
00152 __asm__ __volatile__("lock\nxaddq %0,%1"
00153 : "=r"(result), "=m"(*v)
00154 : "0"(delta)
00155 : "memory");
00156 return result + delta;
00157 #endif
00158 }
00159
00160
00161
00162 inline int64_t AtomicCompareAndSwap(AtomicInt64 *v, int64_t newValue, int64_t oldValue) {
00163 PBRT_ATOMIC_MEMORY_OP();
00164 #if defined(PBRT_IS_WINDOWS)
00165 return InterlockedCompareExchange64(v, newValue, oldValue);
00166 #elif defined(__APPLE__) && !(defined(__i386__) || defined(__amd64__))
00167 return OSAtomicCompareAndSwap64Barrier(oldValue, newValue, v);
00168 #else
00169 int64_t result;
00170 __asm__ __volatile__("lock\ncmpxchgq %2,%1"
00171 : "=a"(result), "=m"(*v)
00172 : "q"(newValue), "0"(oldValue)
00173 : "memory");
00174 return result;
00175 #endif
00176 }
00177
00178
00179 #endif // PBRT_HAS_64_BIT_ATOMICS
00180 inline float AtomicAdd(volatile float *val, float delta) {
00181 PBRT_ATOMIC_MEMORY_OP();
00182 union bits { float f; int32_t i; };
00183 bits oldVal, newVal;
00184 do {
00185
00186
00187 #if (defined(__i386__) || defined(__amd64__))
00188 __asm__ __volatile__ ("pause\n");
00189 #endif
00190 oldVal.f = *val;
00191 newVal.f = oldVal.f + delta;
00192 } while (AtomicCompareAndSwap(((AtomicInt32 *)val),
00193 newVal.i, oldVal.i) != oldVal.i);
00194 return newVal.f;
00195 }
00196
00197
00198 struct MutexLock;
00199 class Mutex {
00200 public:
00201 static Mutex *Create();
00202 static void Destroy(Mutex *m);
00203 private:
00204
00205 Mutex();
00206 ~Mutex();
00207 friend struct MutexLock;
00208 Mutex(Mutex &);
00209 Mutex &operator=(const Mutex &);
00210
00211
00212 #if defined(PBRT_IS_WINDOWS)
00213 CRITICAL_SECTION criticalSection;
00214 #else
00215 pthread_mutex_t mutex;
00216 #endif
00217 };
00218
00219
00220 struct MutexLock {
00221 MutexLock(Mutex &m);
00222 ~MutexLock();
00223 private:
00224 Mutex &mutex;
00225 MutexLock(const MutexLock &);
00226 MutexLock &operator=(const MutexLock &);
00227 };
00228
00229
00230 class RWMutex {
00231 public:
00232 static RWMutex *Create();
00233 static void Destroy(RWMutex *m);
00234 private:
00235
00236 RWMutex();
00237 ~RWMutex();
00238 friend struct RWMutexLock;
00239 RWMutex(RWMutex &);
00240 RWMutex &operator=(const RWMutex &);
00241
00242
00243 #if defined(PBRT_IS_WINDOWS)
00244 void AcquireRead();
00245 void ReleaseRead();
00246 void AcquireWrite();
00247 void ReleaseWrite();
00248
00249 LONG numWritersWaiting;
00250 LONG numReadersWaiting;
00251
00252
00253
00254 DWORD activeWriterReaders;
00255
00256 HANDLE hReadyToRead;
00257 HANDLE hReadyToWrite;
00258 CRITICAL_SECTION cs;
00259 #else
00260 pthread_rwlock_t mutex;
00261 #endif
00262 };
00263
00264
00265 enum RWMutexLockType { READ, WRITE };
00266 struct RWMutexLock {
00267 RWMutexLock(RWMutex &m, RWMutexLockType t);
00268 ~RWMutexLock();
00269 void UpgradeToWrite();
00270 void DowngradeToRead();
00271 private:
00272 RWMutexLockType type;
00273 RWMutex &mutex;
00274 RWMutexLock(const RWMutexLock &);
00275 RWMutexLock &operator=(const RWMutexLock &);
00276 };
00277
00278
00279 class Semaphore {
00280 public:
00281
00282 Semaphore();
00283 ~Semaphore();
00284 void Post(int count = 1);
00285 void Wait();
00286 bool TryWait();
00287 private:
00288
00289 #if defined(PBRT_IS_WINDOWS)
00290 HANDLE handle;
00291 #else
00292 sem_t *sem;
00293 static int count;
00294 #endif
00295 };
00296
00297
00298 class ConditionVariable {
00299 public:
00300
00301 ConditionVariable();
00302 ~ConditionVariable();
00303 void Lock();
00304 void Unlock();
00305 void Wait();
00306 void Signal();
00307 private:
00308
00309 #if !defined(PBRT_IS_WINDOWS)
00310 pthread_mutex_t mutex;
00311 pthread_cond_t cond;
00312 #else
00313
00314 uint32_t waitersCount;
00315
00316 CRITICAL_SECTION waitersCountMutex, conditionMutex;
00317
00318 enum { SIGNAL = 0, BROADCAST=1, NUM_EVENTS=2 };
00319 HANDLE events[NUM_EVENTS];
00320 #endif
00321 };
00322
00323
00324 void TasksInit();
00325 void TasksCleanup();
00326 class Task {
00327 public:
00328 virtual ~Task();
00329 virtual void Run() = 0;
00330 };
00331
00332
00333 void EnqueueTasks(const vector<Task *> &tasks);
00334 void WaitForAllTasks();
00335 int NumSystemCores();
00336
00337 #endif // PBRT_CORE_PARALLEL_H