First

2024-07-02 18:10:39 +02:00
commit 48ab06b1d9
733 changed files with 321088 additions and 0 deletions
@@ -0,0 +1,226 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+#ifndef EATHREAD_GCC_EATHREAD_ATOMIC_ANDROID_H
+#define EATHREAD_GCC_EATHREAD_ATOMIC_ANDROID_H
+
+
+#include <EABase/eabase.h>
+#include <stddef.h>
+#include <sys/atomics.h>
+#include <eathread/internal/eathread_atomic_standalone.h>
+
+#define EA_THREAD_ATOMIC_IMPLEMENTED
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// android_fake_atomics_*
+		///
+		int64_t android_fake_atomic_swap_64(int64_t value, volatile int64_t* addr);
+		int android_fake_atomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue, volatile int64_t* addr);
+		int64_t android_fake_atomic_read_64(volatile int64_t* addr);
+
+		/// class AtomicInt
+		/// Actual implementation may vary per platform. May require certain alignments, sizes, 
+		/// and declaration specifications per platform.
+		template <class T>
+		class AtomicInt
+		{
+		public:
+			typedef AtomicInt<T> ThisType;
+			typedef T            ValueType;
+
+			/// AtomicInt
+			/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+			/// This is done so that an AtomicInt acts like a standard built-in integer.
+			AtomicInt()
+				{}
+
+			AtomicInt(ValueType n) 
+				{ SetValue(n); }
+
+			AtomicInt(const ThisType& x)
+				: mValue(x.GetValue()) {}
+
+			AtomicInt& operator=(const ThisType& x)
+				{ mValue = x.GetValue(); return *this; }
+
+			ValueType GetValue() const
+				{ return mValue; }
+
+			ValueType GetValueRaw() const
+				{ return mValue; }
+
+			ValueType SetValue(ValueType n);
+			bool      SetValueConditional(ValueType n, ValueType condition);
+			ValueType Increment();
+			ValueType Decrement();
+			ValueType Add(ValueType n);
+
+			// operators
+			inline            operator const ValueType() const { return GetValue(); }
+			inline ValueType  operator =(ValueType n)          {        SetValue(n); return n; }
+			inline ValueType  operator+=(ValueType n)          { return Add(n);}
+			inline ValueType  operator-=(ValueType n)          { return Add(-n);}
+			inline ValueType  operator++()                     { return Increment();}
+			inline ValueType  operator++(int)                  { return Increment() - 1;}
+			inline ValueType  operator--()                     { return Decrement(); }
+			inline ValueType  operator--(int)                  { return Decrement() + 1;}
+
+		protected:
+			volatile ValueType mValue;
+		};
+
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+			{ return __atomic_swap(n, &mValue); }
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+			{ return __atomic_swap(n, (volatile int*)&mValue); }
+
+		template <> inline
+		bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+			{ return (__atomic_cmpxchg(condition, n, &mValue) == 0); }
+
+		template <> inline
+		bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+			{ return (__atomic_cmpxchg(condition, n, (volatile int*)&mValue) == 0); }
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+			{ return __atomic_inc(&mValue) + 1; }
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+			{ return __atomic_inc((volatile int*)&mValue) + 1; }
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+			{ return __atomic_dec(&mValue) - 1; }
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+			{ return __atomic_dec((volatile int*)&mValue) - 1; }
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+			{
+				// http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Atomic-Builtins.html
+				return __sync_add_and_fetch(&mValue, n); 
+			}
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+			{ 
+				// http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Atomic-Builtins.html
+				return __sync_add_and_fetch(&mValue, n); 
+			}
+
+
+		///////////////////////////////////////////////////////////
+		/// 64 bit, simulated
+		///
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const
+			{ return android_fake_atomic_read_64((volatile int64_t*)&mValue); }
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const
+			{ return android_fake_atomic_read_64((volatile int64_t*)&mValue); }
+
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n)
+		{
+			const ValueType nOldValue(mValue);
+			android_fake_atomic_swap_64((int64_t)n, (volatile int64_t*)&mValue);
+			return nOldValue;
+		}
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n)
+		{
+			const ValueType nOldValue(mValue);
+			android_fake_atomic_swap_64((int64_t)n, (volatile int64_t*)&mValue);
+			return nOldValue;
+		}
+
+		template <> inline
+		bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition)
+		{
+			return android_fake_atomic_cmpxchg_64(condition, n, (volatile int64_t*)&mValue) == 0;
+		}
+
+		template <> inline
+		bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition)
+		{
+			return android_fake_atomic_cmpxchg_64(condition, n, (volatile int64_t*)&mValue) == 0;
+		}
+
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n)
+		{
+			int64_t old;
+
+			do {
+				old = mValue;
+			}
+			while (android_fake_atomic_cmpxchg_64((int64_t)old, (int64_t)old+n, (volatile int64_t*)&mValue) != 0);
+
+			return mValue;
+		}
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n)
+		{
+			uint64_t old;
+
+			do {
+				old = mValue;
+			}
+			while (android_fake_atomic_cmpxchg_64((int64_t)old, (int64_t)old+n, (volatile int64_t*)&mValue) != 0);
+
+			return mValue;
+		}
+
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment()
+			{ return Add(1); }
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment()
+			{ return Add(1); }
+
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement()
+			{ return Add(-1); }
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement()
+			{ return Add(-1); }
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+#endif // EATHREAD_GCC_EATHREAD_ATOMIC_ANDROID_H
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,221 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+#ifndef EATHREAD_ATOMIC_ANDROID_C11_H
+#define EATHREAD_ATOMIC_ANDROID_C11_H
+
+
+#include <EABase/eabase.h>
+#include <stddef.h>
+#include <stdatomic.h>
+#include <eathread/internal/eathread_atomic_standalone.h>
+
+#define EA_THREAD_ATOMIC_IMPLEMENTED
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class AtomicInt
+		/// Actual implementation may vary per platform. May require certain alignments, sizes, 
+		/// and declaration specifications per platform.
+		template <class T>
+		class AtomicInt
+		{
+		public:
+			typedef AtomicInt<T> ThisType;
+			typedef T ValueType;
+			typedef _Atomic(T) AtomicValueType;
+
+			/// AtomicInt
+			/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+			/// This is done so that an AtomicInt acts like a standard built-in integer.
+			AtomicInt()
+				{}
+
+			AtomicInt(ValueType n) 
+				{ SetValue(n); }
+
+			AtomicInt(const ThisType& x) 
+				{ SetValue(x.GetValue()); }
+
+			AtomicInt& operator=(const ThisType& x)
+				{ SetValue(x.GetValue()); return *this; }
+
+			ValueType GetValue() const
+				{ return atomic_load_explicit(const_cast<AtomicValueType*>(&mValue), memory_order_relaxed); }
+
+			ValueType GetValueRaw() const
+				{ return atomic_load_explicit(const_cast<AtomicValueType*>(&mValue), memory_order_relaxed); }
+
+			ValueType SetValue(ValueType n);
+			bool      SetValueConditional(ValueType n, ValueType condition);
+			ValueType Increment();
+			ValueType Decrement();
+			ValueType Add(ValueType n);
+
+			// operators
+			inline            operator const ValueType() const { return GetValue(); }
+			inline ValueType  operator =(ValueType n)          {        SetValue(n); return n; }
+			inline ValueType  operator+=(ValueType n)          { return Add(n);}
+			inline ValueType  operator-=(ValueType n)          { return Add(-n);}
+			inline ValueType  operator++()                     { return Increment();}
+			inline ValueType  operator++(int)                  { return Increment() - 1;}
+			inline ValueType  operator--()                     { return Decrement(); }
+			inline ValueType  operator--(int)                  { return Decrement() + 1;}
+
+		protected:
+			AtomicValueType mValue;
+		};
+
+
+		///////////////////////////////////////////////////////////
+		/// 32 bit
+		///
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+			{ return atomic_exchange_explicit(&mValue, n, memory_order_relaxed); }
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+			{ return atomic_exchange_explicit(&mValue, n, memory_order_relaxed); }
+
+		template <> inline
+		bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+			{ return atomic_compare_exchange_strong_explicit(&mValue, &condition, n, memory_order_relaxed, memory_order_relaxed); }
+
+		template <> inline
+		bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+			{ return atomic_compare_exchange_strong_explicit(&mValue, &condition, n, memory_order_relaxed, memory_order_relaxed); }
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+			{ return atomic_fetch_add_explicit(&mValue, 1, memory_order_relaxed) + 1; }  
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+			{ return atomic_fetch_add_explicit(&mValue, 1u, memory_order_relaxed) + 1u; }
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+			{ return atomic_fetch_sub_explicit(&mValue, 1, memory_order_relaxed) - 1; }
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+			{ return atomic_fetch_sub_explicit(&mValue, 1u, memory_order_relaxed) - 1u; }  
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+			{ return atomic_fetch_add_explicit(&mValue, n, memory_order_relaxed) + n; } 
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+			{ return atomic_fetch_add_explicit(&mValue, n, memory_order_relaxed) + n; }
+
+
+		///////////////////////////////////////////////////////////
+		/// 64 bit
+		///
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const
+			{ return atomic_load_explicit(const_cast<AtomicValueType*>(&mValue), memory_order_relaxed); }
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const
+			{ return atomic_load_explicit(const_cast<AtomicValueType*>(&mValue), memory_order_relaxed); }
+
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n)
+			{ return atomic_exchange_explicit(&mValue, n, memory_order_relaxed); }
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n)
+			{ return atomic_exchange_explicit(&mValue, n, memory_order_relaxed); }
+
+		template <> inline
+		bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition)
+			{ return atomic_compare_exchange_strong_explicit(&mValue, &condition, n, memory_order_relaxed, memory_order_relaxed); }
+
+		template <> inline
+		bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition)
+			{ return atomic_compare_exchange_strong_explicit(&mValue, &condition, n, memory_order_relaxed, memory_order_relaxed); }
+
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n)
+			{ return atomic_fetch_add_explicit(&mValue, n, memory_order_relaxed) + n; }  
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n)
+			{ return atomic_fetch_add_explicit(&mValue, n, memory_order_relaxed) + n; } 
+
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment()
+			{ return Add(1); }
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment()
+			{ return Add(1); }
+
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement()
+			{ return Add(-1); }
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement()
+			{ return Add(-1); }
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Use of the C11 atomics API on Android is problematic because the platform 
+// implements the atomics API via macro wrappers around their platform specific
+// functions.  Unfortunately, macros affect header files outside of its
+// scoped namespace and will be applied to areas of the code in undesirable
+// ways.  One instance of this is the C11 atomics colliding with the atomic
+// functions of C++11 std::shared_ptr.
+// 
+// We attempt to prevent external impact of the stdatomics.h by undefining the
+// relevant functions.
+// 
+// Note:  If you #include <stdatomic.h> above an eathread header it will undefined macros.
+//
+// http://en.cppreference.com/w/cpp/memory/shared_ptr
+//
+// std::atomic_compare_exchange_strong(std::shared_ptr)
+// std::atomic_compare_exchange_strong_explicit(std::shared_ptr)
+// std::atomic_compare_exchange_weak(std::shared_ptr)
+// std::atomic_compare_exchange_weak_explicit(std::shared_ptr)
+// std::atomic_exchange(std::shared_ptr)
+// std::atomic_exchange_explicit(std::shared_ptr)
+// std::atomic_is_lock_free(std::shared_ptr)
+// std::atomic_load(std::shared_ptr)
+// std::atomic_load_explicit(std::shared_ptr)
+// std::atomic_store(std::shared_ptr)
+// std::atomic_store_explicit(std::shared_ptr)
+//
+
+#undef atomic_compare_exchange_strong
+#undef atomic_compare_exchange_strong_explicit
+#undef atomic_compare_exchange_weak
+#undef atomic_compare_exchange_weak_explicit
+#undef atomic_exchange
+#undef atomic_exchange_explicit
+#undef atomic_is_lock_free
+#undef atomic_load
+#undef atomic_load_explicit
+#undef atomic_store
+#undef atomic_store_explicit
+
+#endif // EATHREAD_ATOMIC_ANDROID_C11_H
+
+
+
@@ -0,0 +1,259 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Defines functionality for threadsafe primitive operations.
+/////////////////////////////////////////////////////////////////////////////
+
+#ifndef EATHREAD_APPLE_EATHREAD_ATOMIC_APPLE_H
+#define EATHREAD_APPLE_EATHREAD_ATOMIC_APPLE_H
+
+#include <EABase/eabase.h>
+#include <stddef.h>
+#include <libkern/OSAtomic.h>
+#include "eathread/internal/atomic.h"
+#include "eathread/internal/eathread_atomic_standalone.h"
+
+#define EA_THREAD_ATOMIC_IMPLEMENTED
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class AtomicInt
+		/// Actual implementation may vary per platform. May require certain alignments, sizes, 
+		/// and declaration specifications per platform.
+		template <class T>
+		class AtomicInt
+		{
+		public:
+			typedef AtomicInt<T> ThisType;
+			typedef T            ValueType;
+
+			/// AtomicInt
+			/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+			/// This is done so that an AtomicInt acts like a standard built-in integer.
+			AtomicInt()
+				{ }
+
+			AtomicInt(ValueType n) 
+				{ SetValue(n); }
+
+			AtomicInt(const ThisType& x)
+				: mValue(x.GetValue()) { }
+
+			AtomicInt& operator=(const ThisType& x)
+				{ mValue = x.GetValue(); return *this; }
+
+			ValueType GetValue() const
+				{ return mValue; }
+
+			ValueType GetValueRaw() const
+				{ return mValue; }
+
+			ValueType SetValue(ValueType n);
+			bool      SetValueConditional(ValueType n, ValueType condition);
+			ValueType Increment();
+			ValueType Decrement();
+			ValueType Add(ValueType n);
+
+			// operators
+			inline            operator const ValueType() const { return GetValue(); }
+			inline ValueType  operator =(ValueType n)          {        SetValue(n); return n; }
+			inline ValueType  operator+=(ValueType n)          { return Add(n);}
+			inline ValueType  operator-=(ValueType n)          { return Add(-n);}
+			inline ValueType  operator++()                     { return Increment();}
+			inline ValueType  operator++(int)                  { return Increment() - 1;}
+			inline ValueType  operator--()                     { return Decrement(); }
+			inline ValueType  operator--(int)                  { return Decrement() + 1;}
+
+		protected:
+			volatile ValueType mValue;
+		};
+		
+		template <>
+		class AtomicInt<uint64_t>
+		{
+		public:
+			typedef AtomicInt<uint64_t> ThisType;
+			typedef uint64_t          ValueType;
+			
+			/// AtomicInt
+			/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+			/// This is done so that an AtomicInt acts like a standard built-in integer.
+			AtomicInt()
+			{}
+			
+			AtomicInt(ValueType n)
+			{ SetValue(n); }
+			
+			AtomicInt(const ThisType& x)
+			: mValue(x.GetValue()) {}
+			
+			AtomicInt& operator=(const ThisType& x)
+			{ mValue = x.GetValue(); return *this; }
+			
+			ValueType GetValue() const
+			{ return (uint64_t)AtomicGetValue64((volatile int64_t *)&mValue); }
+			
+			ValueType GetValueRaw() const
+			{ return mValue; }
+			
+			ValueType SetValue(ValueType n)
+			{ return (uint64_t)AtomicSetValue64((volatile int64_t *)&mValue, n); }
+			
+			bool      SetValueConditional(ValueType n, ValueType condition)
+			{ return AtomicSetValueConditional64((volatile int64_t *)&mValue, n, condition); }
+			
+			ValueType Increment()
+			{ return (uint64_t)AtomicAdd64((volatile int64_t *)&mValue, 1); }
+			
+			ValueType Decrement()
+			{ return (uint64_t)AtomicAdd64((volatile int64_t *)&mValue, -1); }
+			
+			ValueType Add(ValueType n)
+			{ return (uint64_t)AtomicAdd64((volatile int64_t *)&mValue, n); }
+			
+			// operators
+			inline            operator const ValueType() const { return GetValue(); }
+			inline ValueType  operator =(ValueType n)          {        SetValue(n); return n; }
+			inline ValueType  operator+=(ValueType n)          { return Add(n);}
+			inline ValueType  operator-=(ValueType n)          { return Add(-n);}
+			inline ValueType  operator++()                     { return Increment();}
+			inline ValueType  operator++(int)                  { return Increment() - 1;}
+			inline ValueType  operator--()                     { return Decrement(); }
+			inline ValueType  operator--(int)                  { return Decrement() + 1;}
+			
+		protected:
+			volatile ValueType mValue;
+		}__attribute__((aligned(8)));
+		
+		template <>
+		class AtomicInt<int64_t>
+		{
+		public:
+			typedef AtomicInt<int64_t> ThisType;
+			typedef int64_t          ValueType;
+			
+			/// AtomicInt
+			/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+			/// This is done so that an AtomicInt acts like a standard built-in integer.
+			AtomicInt()
+			{}
+			
+			AtomicInt(ValueType n)
+			{ SetValue(n); }
+			
+			AtomicInt(const ThisType& x)
+			: mValue(x.GetValue()) {}
+			
+			AtomicInt& operator=(const ThisType& x)
+			{ mValue = x.GetValue(); return *this; }
+			
+			ValueType GetValue() const
+			{ return AtomicGetValue64((volatile int64_t *)&mValue); }
+			
+			ValueType GetValueRaw() const
+			{ return mValue; }
+			
+			ValueType SetValue(ValueType n)
+			{ return AtomicSetValue64((volatile int64_t *)&mValue, n); }
+			
+			bool      SetValueConditional(ValueType n, ValueType condition)
+			{ return AtomicSetValueConditional64((volatile int64_t *)&mValue, n, condition); }
+			
+			ValueType Increment()
+			{ return AtomicAdd64((volatile int64_t *)&mValue, 1); }
+			
+			ValueType Decrement()
+			{ return AtomicAdd64((volatile int64_t *)&mValue, -1); }
+			
+			ValueType Add(ValueType n)
+			{ return AtomicAdd64((volatile int64_t *)&mValue, n); }
+			
+			// operators
+			inline            operator const ValueType() const { return GetValue(); }
+			inline ValueType  operator =(ValueType n)          {        SetValue(n); return n; }
+			inline ValueType  operator+=(ValueType n)          { return Add(n);}
+			inline ValueType  operator-=(ValueType n)          { return Add(-n);}
+			inline ValueType  operator++()                     { return Increment();}
+			inline ValueType  operator++(int)                  { return Increment() - 1;}
+			inline ValueType  operator--()                     { return Decrement(); }
+			inline ValueType  operator--(int)                  { return Decrement() + 1;}
+			
+		protected:
+			volatile ValueType mValue;
+		}__attribute__((aligned(8)));
+		
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::GetValue() const
+			{ return OSAtomicAdd32(0, reinterpret_cast<volatile int32_t*>(const_cast<ValueType*>(&mValue))); }
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::GetValue() const
+			{ return OSAtomicAdd32(0, reinterpret_cast<volatile int32_t*>(const_cast<ValueType*>(&mValue))); }
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+		{ 
+			int32_t old;
+			do
+			{
+				old = mValue; 
+			}
+			while ( ! OSAtomicCompareAndSwap32(old, n, reinterpret_cast<volatile int32_t*>(&mValue)));
+			return old; 
+		}
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+		{
+			uint32_t old;
+			do
+			{
+				old = mValue;
+			} while ( ! OSAtomicCompareAndSwap32(old, n, reinterpret_cast<volatile int32_t*>(&mValue)));
+			return old;
+		}
+
+		template <> inline
+		bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+			{ return OSAtomicCompareAndSwap32(condition, n, reinterpret_cast<volatile int32_t*>(&mValue)); }
+
+		template <> inline
+		bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+			{ return OSAtomicCompareAndSwap32(condition, n, reinterpret_cast<volatile int32_t*>(&mValue)); }
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+			{ return OSAtomicIncrement32(reinterpret_cast<volatile int32_t*>(&mValue)); }
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+			{ return OSAtomicIncrement32(reinterpret_cast<volatile int32_t*>(&mValue)); }
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+			{ return OSAtomicDecrement32(reinterpret_cast<volatile int32_t*>(&mValue)); }
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+			{ return OSAtomicDecrement32(reinterpret_cast<volatile int32_t*>(&mValue)); }
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+			{ return OSAtomicAdd32(n, reinterpret_cast<volatile int32_t*>(&mValue)); }
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+			{ return OSAtomicAdd32(n, reinterpret_cast<volatile int32_t*>(&mValue)); }
+	}
+}
+
+#endif
@@ -0,0 +1,69 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+#ifndef EATHREAD_CALLSTACK_APPLE_H
+#define EATHREAD_CALLSTACK_APPLE_H
+
+
+#include <eathread/eathread_callstack.h>
+
+namespace EA
+{
+namespace Thread
+{
+
+	/// ModuleInfoApple
+	///
+	/// This struct is based on the EACallstack ModuleInfo struct, but that can't be used here because
+	/// this package is a lower level package than EACallstack.
+	///
+	struct ModuleInfoApple
+	{
+		char8_t  mPath[256];        /// File name or file path
+		char8_t  mName[256];        /// Module name. Usually the same as the file name without the extension.
+		uint64_t mBaseAddress;      /// Base address in memory.
+		uint64_t mSize;             /// Module size in memory.
+		char     mType[32];         /// The type field (e.g. __TEXT) from the vmmap output.
+		char     mPermissions[16];  /// The permissions "r--/rwx" kind of string from the vmmap output.
+	};
+
+
+#if EATHREAD_APPLE_GETMODULEINFO_ENABLED
+	/// GetModuleInfoApple
+	///
+	/// This function exists for the purpose of being a central module/VM map info collecting function,
+	/// used by a couple functions within this package.
+	/// Writes as many entries as possible to the user-supplied array, up to the capacity of the array.
+	/// Returns the required number of entries, which may be more than the user-supplied capacity in the
+	/// case that the user didn't supply enough.
+	///
+	size_t GetModuleInfoApple(ModuleInfoApple* pModuleInfoAppleArray, size_t moduleInfoAppleArrayCapacity, 
+								const char* pTypeFilter = NULL, bool bEnableCache = true);
+#endif
+
+
+} // namespace Callstack
+
+} // namespace EA
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,61 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Functionality related to memory and code generation synchronization.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_APPLE_EATHREAD_SYNC_APPLE_H
+#define EATHREAD_APPLE_EATHREAD_SYNC_APPLE_H
+
+
+#include <EABase/eabase.h>
+#include <libkern/OSAtomic.h>
+
+
+#define EA_THREAD_SYNC_IMPLEMENTED
+
+
+// EAProcessorPause
+// Intel has defined a 'pause' instruction for x86 processors starting with the P4, though this simply
+// maps to the otherwise undocumented 'rep nop' instruction. This pause instruction is important for
+// high performance spinning, as otherwise a high performance penalty incurs.
+
+#if defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)
+	#define EAProcessorPause() __asm__ __volatile__ ("rep ; nop")
+#else
+	#define EAProcessorPause()
+#endif
+
+
+
+// EAReadBarrier / EAWriteBarrier / EAReadWriteBarrier
+
+#define EAReadBarrier      OSMemoryBarrier
+#define EAWriteBarrier     OSMemoryBarrier
+#define EAReadWriteBarrier OSMemoryBarrier
+
+
+
+// EACompilerMemoryBarrier
+
+#define EACompilerMemoryBarrier() __asm__ __volatile__ ("":::"memory")
+
+
+
+
+#endif // EATHREAD_APPLE_EATHREAD_SYNC_APPLE_H
+
+
+
+
+
+
+
+
@@ -0,0 +1,54 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Functionality related to memory and code generation synchronization.
+//
+// Created by Rob Parolin 
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_ARM_EATHREAD_SYNC_ARM_H
+#define EATHREAD_ARM_EATHREAD_SYNC_ARM_H
+
+#include <EABase/eabase.h>
+
+
+#if defined(EA_COMPILER_CLANG)
+	#define EA_THREAD_SYNC_IMPLEMENTED
+
+	#define EAProcessorPause()
+
+	#define EAReadBarrier      __sync_synchronize
+	#define EAWriteBarrier     __sync_synchronize
+	#define EAReadWriteBarrier __sync_synchronize
+
+	#define EACompilerMemoryBarrier() __asm__ __volatile__ ("" : : : "memory")
+
+
+#elif defined(EA_COMPILER_GNUC)
+	#define EA_THREAD_SYNC_IMPLEMENTED
+
+	#define EAProcessorPause()
+
+	#if (((__GNUC__ * 100) + __GNUC_MINOR__) >= 401) // GCC 4.1 or later
+		#define EAReadBarrier      __sync_synchronize
+		#define EAWriteBarrier     __sync_synchronize
+		#define EAReadWriteBarrier __sync_synchronize
+	#else
+		#define EAReadBarrier      EACompilerMemoryBarrier
+		#define EAWriteBarrier     EACompilerMemoryBarrier
+		#define EAReadWriteBarrier EACompilerMemoryBarrier
+	#endif
+
+	#define EACompilerMemoryBarrier() __asm__ __volatile__ ("" : : : "memory")
+
+#endif
+
+#endif // EATHREAD_ARM_EATHREAD_SYNC_ARM_H
+
@@ -0,0 +1,26 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Functionality related to memory and code generation synchronization.
+//
+// Created by Rob Parolin 
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_ARMGCC_EATHREAD_SYNC_ARMGCC_H
+#define EATHREAD_ARMGCC_EATHREAD_SYNC_ARMGCC_H
+
+// Header file should not be included directly.  Provided here for backwards compatibility.
+// Please use eathread_sync.h
+
+#if defined(EA_PROCESSOR_ARM) 
+	#include <eathread/arm/eathread_sync_arm.h>
+#endif
+
+#endif
@@ -0,0 +1,208 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+#ifndef EATHREAD_ATOMIC_CPP11_H
+#define EATHREAD_ATOMIC_CPP11_H
+
+EA_DISABLE_VC_WARNING(4265 4365 4836 4571 4625 4626 4628 4193 4127 4548 4574 4731)
+#include <atomic>
+EA_RESTORE_VC_WARNING()
+
+namespace EA
+{
+	namespace Thread
+	{
+		#define EA_THREAD_ATOMIC_IMPLEMENTED
+
+		/// Non-member atomic functions
+		/// These act the same as the class functions below.
+		/// The T return values are the new value, except for the AtomicSwap function which returns the swapped out value.
+		///
+		/// todo: Implement me when we have a platform to test this on.  C++11 atomics are disabled on all platforms. 
+		///
+
+		template <class T>
+		class EATHREADLIB_API AtomicInt
+		{
+		public:
+			typedef AtomicInt<T> ThisType;
+			typedef T ValueType;
+
+			/// AtomicInt
+			/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+			/// This is done so that an AtomicInt acts like a standard built-in integer.
+			AtomicInt() {}
+
+			/// AtomicInt
+			/// Constructs with an intial value. 
+			AtomicInt(ValueType n) : mValue(n) {}
+
+			/// AtomicInt
+			/// Copy ctor. Uses GetValue to read the value, and thus is synchronized. 
+			AtomicInt(const ThisType& x) : mValue(x.GetValue()) {}
+
+			/// AtomicInt
+			/// Assignment operator. Uses GetValue to read the value, and thus is synchronized. 
+			AtomicInt& operator=(const ThisType& x)
+			{ mValue = x.GetValue(); return *this; }
+
+			/// GetValue
+			/// Safely gets the current value. A platform-specific version of 
+			/// this might need to do something more than just read the value.
+			ValueType GetValue() const volatile { return mValue; }
+
+			/// GetValueRaw
+			/// "Unsafely" gets the current value. This is useful for algorithms 
+			/// that want to poll the value in a high performance way before 
+			/// reading or setting the value in a more costly thread-safe way. 
+			/// You should not use this function when attempting to do thread-safe
+			/// atomic operations.
+			ValueType GetValueRaw() const { return mValue; }
+
+			/// SetValue
+			/// Safely sets a new value. Returns the old value. Note that due to 
+			/// expected multithreaded accesses, a call to GetValue after SetValue
+			/// might return a different value then what was set with SetValue.
+			/// This of course depends on your situation.
+			ValueType SetValue(ValueType n) { return mValue.exchange(n); }
+
+			/// SetValueConditional
+			/// Safely the value to a new value if the original value is equal to 
+			/// a condition value. Returns true if the condition was met and the 
+			/// assignment occurred. The comparison and value setting are done as
+			/// an atomic operation and thus another thread cannot intervene between
+			/// the two as would be the case with simple C code.
+			bool SetValueConditional(ValueType n, ValueType condition) 
+			{ 
+				return mValue.compare_exchange_strong(condition, n); 
+			}
+
+			/// Increment
+			/// Safely increments the value. Returns the new value.
+			/// This function acts the same as the C++ pre-increment operator.
+			ValueType Increment() { return ++mValue; }
+
+
+			/// Decrement
+			/// Safely decrements the value. Returns the new value.
+			/// This function acts the same as the C++ pre-decrement operator.
+			ValueType Decrement() { return --mValue; }
+
+
+			/// Add
+			/// Safely adds a value, which can be negative. Returns the new value.
+			/// You can implement subtraction with this function by using a negative argument.
+			ValueType Add(ValueType n) { return (mValue += n); }
+
+
+			/// operators
+			/// These allow an AtomicInt object to safely act like a built-in type.
+			///
+			/// Note: The operators for AtomicInt behaves differently than standard
+			///         C++ operators in that it will always return a ValueType instead
+			///         of a reference.
+			///
+			/// cast operator
+			/// Returns the AtomicInt value as an integral type. This allows the 
+			/// AtomicInt to behave like a standard built-in integer type.
+			operator const ValueType() const { return mValue; }
+
+			/// operator =
+			/// Assigns a new value and returns the value after the operation.
+			///
+			ValueType operator=(ValueType n) { SetValue(n); return n; }
+
+			/// pre-increment operator+=
+			/// Adds a value to the AtomicInt and returns the value after the operation.
+			///
+			/// This function doesn't obey the C++ standard in that it does not return 
+			/// a reference, but rather the value of the AtomicInt after the  
+			/// operation is complete. It must be noted that this design is motivated by
+			/// the fact that it is unsafe to rely on the returned value being equal to 
+			/// the previous value + n, as another thread might have modified the AtomicInt 
+			/// immediately after the subtraction operation.  So rather than returning the
+			/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+			/// used in the function.
+			ValueType operator+=(ValueType n)  { mValue += n; return mValue; }
+
+			/// pre-increment operator-=
+			/// Subtracts a value to the AtomicInt and returns the value after the operation.
+			///
+			/// This function doesn't obey the C++ standard in that it does not return 
+			//  a reference, but rather the value of the AtomicInt after the  
+			/// operation is complete. It must be noted that this design is motivated by
+			/// the fact that it is unsafe to rely on the returned value being equal to 
+			/// the previous value - n, as another thread might have modified the AtomicInt 
+			/// immediately after the subtraction operation.  So rather than returning the
+			/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+			/// used in the function.
+			ValueType operator-=(ValueType n) { mValue -= n; return mValue; }
+
+			/// pre-increment operator++
+			/// Increments the AtomicInt. 
+			///
+			/// This function doesn't obey the C++ standard in that it does not return 
+			//  a reference, but rather the value of the AtomicInt after the  
+			/// operation is complete. It must be noted that this design is motivated by
+			/// the fact that it is unsafe to rely on the returned value being equal to 
+			/// the previous value + 1, as another thread might have modified the AtomicInt 
+			/// immediately after the subtraction operation.  So rather than returning the
+			/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+			/// used in the function.
+			ValueType operator++() { return ++mValue; }
+
+			/// post-increment operator++
+			/// Increments the AtomicInt and returns the value of the AtomicInt before
+			/// the increment operation. 
+			///
+			/// This function doesn't obey the C++ standard in that it does not return 
+			//  a reference, but rather the value of the AtomicInt after the  
+			/// operation is complete. It must be noted that this design is motivated by
+			/// the fact that it is unsafe to rely on the returned value being equal to 
+			/// the previous value, as another thread might have modified the AtomicInt 
+			/// immediately after the subtraction operation.  So rather than returning the
+			/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+			/// used in the function.
+			ValueType operator++(int) { return mValue++; }
+
+			/// pre-increment operator--
+			/// Decrements the AtomicInt.
+			///
+			/// This function doesn't obey the C++ standard in that it does not return 
+			//  a reference, but rather the value of the AtomicInt after the  
+			/// operation is complete. It must be noted that this design is motivated by
+			/// the fact that it is unsafe to rely on the returned value being equal to 
+			/// the previous value - 1, as another thread might have modified the AtomicInt 
+			/// immediately after the subtraction operation.  So rather than returning the
+			/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+			/// used in the function.
+			ValueType operator--() { return --mValue; }
+
+			/// post-increment operator--
+			/// Increments the AtomicInt and returns the value of the AtomicInt before
+			/// the increment operation. 
+			///
+			/// This function doesn't obey the C++ standard in that it does not return 
+			//  a reference, but rather the value of the AtomicInt after the  
+			/// operation is complete. It must be noted that this design is motivated by
+			/// the fact that it is unsafe to rely on the returned value being equal to 
+			/// the previous value, as another thread might have modified the AtomicInt 
+			/// immediately after the subtraction operation.  So rather than returning the
+			/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+			/// used in the function.
+			ValueType operator--(int) { return mValue--;}
+
+		private:
+			std::atomic<T> mValue;
+		};
+
+	}
+}
+
+
+#endif // EATHREAD_ATOMIC_CPP11_H
@@ -0,0 +1,826 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// eathread.h
+//
+// Created by Paul Pedriana, Maxis
+//
+// Provides some base global definitions for the EA::Thread library.
+//
+// Design
+// Many of the design criteria for EA::Thread is based on the design of the 
+// Posix threading standard. The Posix threading standard is designed to 
+// work portably on a wide range of operating systems and hardware, including
+// embedded systems and realtime environments. As such, Posix threads generally
+// represent a competent model to follow where possible. Windows and various
+// other platforms have independent multi-threading systems which are taken
+// into account here as well. If something exists in Windows but doesn't 
+// exist here (e.g. Thread suspend/resume), there is a decent chance that it 
+// is by design and for some good reason.
+// 
+// C++
+// There are a number of C++ libraries devoted to multithreading. Usually the 
+// goal of these libraries is provide a platform independent interface which
+// simplifies the most common usage patterns and helps prevent common errors.
+// Some of these libraries are basic wrappers around existing C APIs while 
+// others provide a new and different paradigm. We take the former approach
+// here, as it is provides more or less the same functionality but provides 
+// it in a straightforward way that is easily approached by those familiar 
+// with platform-specific APIs. This approach has been referred to as the 
+// "Wrapper Facade Pattern".
+//
+// Condition Variables
+// Posix condition variables are implemented via the Condition class. Condition 
+// is essentially the Java and C# name for Posix' condition variables. For some
+// people, a condition variable may seem similar to a Win32 Signal. In actuality
+// they are similar but there is one critical difference: a Signal does not 
+// atomically unlock a mutex as part of the signaling process. This results in
+// problematic race conditions that make reliable producer/consumer systems
+// impossible to implement.
+//
+// Signals
+// As of this writing, there isn't a Win32-like Signal class. The reason for this
+// is that Semaphore does most or all the duty that Signal does and is a little
+// more portable, given that Signals exist only on Win32 and not elsewhere.
+//
+// Timeouts
+// Timeouts are specified as absolute times and not relative times. This may
+// not be how Win32 threading works but it is what's proper and is how Posix
+// threading works. From the OpenGroup online pthread documentation on this:
+//     An absolute time measure was chosen for specifying the 
+//     timeout parameter for two reasons. First, a relative time 
+//     measure can be easily implemented on top of a function 
+//     that specifies absolute time, but there is a race 
+//     condition associated with specifying an absolute timeout 
+//     on top of a function that specifies relative timeouts. 
+//     For example, assume that clock_gettime() returns the 
+//     current time and cond_relative_timed_wait() uses relative 
+//     timeouts:
+//            clock_gettime(CLOCK_REALTIME, &now);
+//            reltime = sleep_til_this_absolute_time - now;
+//            cond_relative_timed_wait(c, m, &reltime);
+//     If the thread is preempted between the first statement and 
+//     the last statement, the thread blocks for too long. Blocking, 
+//     however, is irrelevant if an absolute timeout is used. 
+//     An absolute timeout also need not be recomputed if it is used 
+//     multiple times in a loop, such as that enclosing a condition wait.
+//     For cases when the system clock is advanced discontinuously by 
+//     an operator, it is expected that implementations process any 
+//     timed wait expiring at an intervening time as if that time had 
+//     actually occurred.
+// 
+// General Threads
+// For detailed information about threads, it is recommended that you read
+// various competent sources of information about multithreading and 
+// multiprocessing.
+//    Programming with POSIX(R) Threads, by David R. Butenhof
+//    http://www.opengroup.org/onlinepubs/007904975/basedefs/pthread.h.html
+//    usenet: comp.programming.threads
+//    http://www.openmp.org/index.cgi?faq
+//    http://www.lambdacs.com/cpt/MFAQ.html
+//    http://www.lambdacs.com/cpt/FAQ.html
+//    http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dllproc/base/processes_and_threads.asp
+//
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_H
+#define EATHREAD_EATHREAD_H
+
+#include <eathread/internal/config.h>
+
+#if !EA_THREADS_AVAILABLE
+	// Do nothing
+#elif EA_USE_CPP11_CONCURRENCY
+	EA_DISABLE_VC_WARNING(4265 4365 4836 4571 4625 4626 4628 4193 4127 4548 4574 4946 4350)
+	#include <chrono>
+	#include <thread>
+	EA_RESTORE_VC_WARNING()
+#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+	#include <pthread.h>
+	#if defined(_YVALS)         // Dinkumware doesn't usually provide gettimeofday or <sys/types.h>
+		#include <time.h>       // clock_gettime
+	#elif defined(EA_PLATFORM_UNIX)
+		#include <sys/time.h>   // gettimeofday
+	#endif
+#endif
+#if defined(EA_PLATFORM_APPLE)
+	#include <mach/mach_types.h>
+#endif
+#if defined(EA_PLATFORM_SONY) 
+	#include "sdk_version.h"
+	#include <kernel.h>
+#endif
+#include <limits.h>
+#include <float.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EA_THREAD_PREEMPTIVE / EA_THREAD_COOPERATIVE
+//
+// Defined or not defined.
+//
+// EA_THREAD_COOPERATIVE means that threads are not time-sliced by the 
+// operating system. If there exist multiple threads of the same priority 
+// then they will need to wait, sleep, or yield in order for the others 
+// to get time. See enum Scheduling and EATHREAD_SCHED for more info.
+//
+// EA_THREAD_PREEMPTIVE means that threads are time-sliced by the operating 
+// system at runtime. If there exist multiple threads of the same priority 
+// then the operating system will split execution time between them.
+// See enum Scheduling and EATHREAD_SCHED for more info.
+//
+#if !EA_THREADS_AVAILABLE 
+	#define EA_THREAD_COOPERATIVE
+#else
+	#define EA_THREAD_PREEMPTIVE
+#endif
+
+
+/// namespace EA
+///
+/// This is the standard Electronic Arts C++ namespace.
+///
+namespace EA
+{
+	namespace Allocator
+	{
+		class ICoreAllocator;
+	}
+
+	/// namespace Thread
+	///
+	/// This is the standard Electronic Arts Thread C++ namespace.
+	///
+	namespace Thread
+	{
+		/// Scheduling 
+		/// Defines scheduling types supported by the given platform.
+		/// These are defined in detail by the Posix standard, with the 
+		/// exception of Coop, which is added here. FIFO scheduling
+		/// is the most classic for game development, as it allows for 
+		/// thread priorities and well-behaved synchronization primitives,
+		/// but it doesn't do time-slicing. The problem with time slicing
+		/// is that threads are pre-empted in the middle of work and this
+		/// hurts execution performance and cache performance. 
+		///
+		enum Scheduling
+		{
+			kSchedulingFIFO     =  1,    /// There is no automatic time-slicing; thread priorities control execution and context switches.
+			kSchedulingRR       =  2,    /// Same as FIFO but is periodic time-slicing.
+			kSchedulingSporadic =  4,    /// Complex scheduling control. See the Posix standard.
+			kSchedulingTS       =  8,    /// a.k.a. SCHED_OTHER. Usually same as FIFO or RR except that thread priorities and execution can be temporarily modified.
+			kSchedulingCoop     = 16     /// The user must control thread scheduling beyond the use of synchronization primitives.
+		};
+		 
+		#if defined(EA_PLATFORM_UNIX)
+			#define EATHREAD_SCHED    kSchedulingFIFO
+
+		#elif defined(EA_PLATFORM_MICROSOFT)
+			#define EATHREAD_SCHED    kSchedulingRR
+
+		#else
+			#define EATHREAD_SCHED    kSchedulingFIFO
+
+		#endif
+
+
+		// EATHREAD_MULTIPROCESSING_OS
+		//
+		// Defined as 0 or 1. 
+		// Indicates whether the OS supports multiple concurrent processes, which may be in 
+		// addition to supporting multiple threads within a process.
+		// Some platforms support multiple concurrently loaded processes but don't support
+		// running these processes concurrently. We don't currently count this as a
+		// multiprocessing OS.
+		#ifndef EATHREAD_MULTIPROCESSING_OS
+			#if defined(EA_PLATFORM_WINDOWS) || defined(EA_PLATFORM_UNIX)
+				#define EATHREAD_MULTIPROCESSING_OS 1
+			#else
+				#define EATHREAD_MULTIPROCESSING_OS 0
+			#endif
+		#endif
+		
+		// EATHREAD_OTHER_THREAD_NAMING_SUPPORTED
+		// 
+		// Defined as 0 or 1. 
+		// Indicates whether the OS supports setting the thread name from a different
+		// thread (set to 1) or if the name can be set only from the curren thread (set to 0)
+		#ifndef EATHREAD_OTHER_THREAD_NAMING_SUPPORTED
+			#if defined(EA_PLATFORM_LINUX) || defined(EA_PLATFORM_APPLE)
+				#define EATHREAD_OTHER_THREAD_NAMING_SUPPORTED 0
+			#else
+				#define EATHREAD_OTHER_THREAD_NAMING_SUPPORTED 1
+			#endif
+		#endif
+
+		// Uint / Int
+		// Defines a machine-word sized integer, useful for operations that are as efficient
+		// as possible on the given machine. Note that the C99 intfastNN_t types aren't sufficient,
+		// as they are defined by compilers in an undesirable way for the processors we work with.
+		#if !defined(EA_PLATFORM_WORD_SIZE) || (EA_PLATFORM_WORD_SIZE == 4)
+			typedef uint32_t Uint;
+			typedef int32_t  Int;
+		#else
+			typedef uint64_t Uint;
+			typedef int64_t  Int;
+		#endif
+
+
+		/// ThreadId
+		/// Uniquely identifies a thread throughout the system and is used by the EAThread API
+		/// to identify threads in a way equal to system provided thread ids. A ThreadId is the 
+		/// same as a system thread id and can be used in direct system threading API calls.
+		#if !EA_THREADS_AVAILABLE
+			typedef int ThreadId;
+		#elif EA_USE_CPP11_CONCURRENCY
+			typedef std::thread::id ThreadId;
+		#elif defined(EA_PLATFORM_SONY)
+			typedef uint64_t ThreadId;
+		#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+			typedef pthread_t ThreadId;
+		#elif defined(EA_PLATFORM_MICROSOFT) && !EA_POSIX_THREADS_AVAILABLE
+			typedef void* ThreadId; // This is really HANDLE, but HANDLE is the same as void* and we can avoid an expensive #include here.
+		#else
+			typedef int ThreadId;
+		#endif
+
+
+		// ThreadId constants
+		#if EA_USE_CPP11_CONCURRENCY
+			const ThreadId kThreadIdInvalid = ThreadId(); /// Special ThreadId indicating an invalid thread identifier.
+		#else
+			const ThreadId kThreadIdInvalid  = ThreadId(0);            /// Special ThreadId indicating an invalid thread identifier.
+			const ThreadId kThreadIdCurrent  = ThreadId(INT_MAX);      /// Special ThreadId indicating the current thread.
+			const ThreadId kThreadIdAny      = ThreadId(INT_MAX - 1);  /// Special ThreadId indicating no thread in particular.
+		#endif
+
+		/// SysThreadId
+		/// It turns out that Microsoft operating systems (Windows, XBox, XBox 360)
+		/// have two different ways to identify a thread: HANDLE and DWORD. Some API
+		/// functions take thread HANDLES, while others take what Microsoft calls
+		/// thread ids (DWORDs). EAThread ThreadId is a HANDLE, as that is used for 
+		/// more of the core threading APIs. However, some OS-level APIs accept instead   
+		/// the DWORD thread id. 
+		#if defined(EA_PLATFORM_MICROSOFT) && !EA_POSIX_THREADS_AVAILABLE && !EA_USE_CPP11_CONCURRENCY
+			typedef uint32_t SysThreadId;
+			const SysThreadId kSysThreadIdInvalid = SysThreadId(0); /// Special SysThreadId indicating an invalid thread identifier.
+		#elif defined(EA_PLATFORM_SONY)
+			typedef ScePthread SysThreadId;
+			const SysThreadId kSysThreadIdInvalid = { 0 }; /// Special SysThreadId indicating an invalid thread identifier.
+		#elif defined(EA_PLATFORM_APPLE)
+			typedef thread_act_t SysThreadId; // thread_act_t is useful for calling mach APIs such as thread_policy_set() with. 
+			const SysThreadId kSysThreadIdInvalid = SysThreadId(0); /// Special SysThreadId indicating an invalid thread identifier.
+		#elif EA_USE_CPP11_CONCURRENCY
+			typedef std::thread::native_handle_type SysThreadId;
+			const SysThreadId kSysThreadIdInvalid = { 0 }; /// Special SysThreadId indicating an invalid thread identifier.
+			
+			// For MSVC, native_handle_type is not a primitive type so we define operator== and operator!= for convenience.
+			// We use an auto converting proxy type for comparisons to avoid errors when native_handle_type is a built in type.
+			bool Equals(const SysThreadId& a, const SysThreadId& b);
+			struct SysThreadIdProxy 
+			{ 
+				SysThreadIdProxy(const SysThreadId& id_) : id(id_) {}
+				SysThreadId id;
+			};
+			inline bool operator==(const SysThreadId& lhs, const SysThreadIdProxy& rhs) { return Equals(lhs, rhs.id); }
+			inline bool operator!=(const SysThreadId& lhs, const SysThreadIdProxy& rhs) { return !Equals(lhs, rhs.id); }
+		#else
+			typedef ThreadId SysThreadId;
+			const SysThreadId kSysThreadIdInvalid = SysThreadId(0); /// Special SysThreadId indicating an invalid thread identifier.
+		#endif
+
+		/// ThreadUniqueId
+		/// Uniquely identifies a thread throughout the system, but in a way that is not 
+		/// necessarily compatible with system thread id identification. Sometimes it is 
+		/// costly to work with system thread ids whereas all you want is some integer 
+		/// that is unique between threads and you don't need to use it for system calls.
+		/// See the EAThreadGetUniqueId macro/function for usage.
+		typedef Uint ThreadUniqueId;
+
+		// ThreadUniqueId constants
+		const ThreadUniqueId kThreadUniqueIdInvalid = 0; /// Special ThreadUniqueId indicating an invalid thread identifier.
+
+
+		// Time constants
+		// Milliseconds are the units of time in EAThread. While every generation of computers
+		// results in faster computers and thus milliseconds become an increasingly large number
+		// compared to the computer speed, computer multithreading is still largely done at the 
+		// millisecond level, due to it still being a small value relative to human perception.
+		// We may reconsider this some time in the future and provide an option to have ThreadTime
+		// be specified in finer units, such as microseconds.
+		#if EA_USE_CPP11_CONCURRENCY
+			typedef std::chrono::milliseconds::rep ThreadTime;                               /// Current storage mechanism for time used by thread timeout functions. Units are milliseconds.
+			const   ThreadTime kTimeoutImmediate = std::chrono::milliseconds::zero().count();/// Used to specify to functions to return immediately if the operation could not be done.
+			const   ThreadTime kTimeoutNone = std::chrono::milliseconds::max().count();      /// Used to specify to functions to block without a timeout (i.e. block forever).
+			const   ThreadTime kTimeoutYield = std::chrono::milliseconds::zero().count();    /// This is used with ThreadSleep to minimally yield to threads of equivalent priority.
+
+			#define EA_THREADTIME_AS_INT64(t)  ((int64_t)(t))
+			#define EA_THREADTIME_AS_DOUBLE(t) ((double)(t))
+
+		#elif defined(EA_PLATFORM_SONY) && EA_THREADS_AVAILABLE
+			typedef double ThreadTime;  // SceKernelUseconds maps to unsigned int 
+			static_assert(sizeof(ThreadTime) >= sizeof(unsigned int), "ThreadTime not large enough for uint32_t representation of milliseconds for platform portablity");
+
+			const ThreadTime kTimeoutImmediate = 0;
+			const ThreadTime kTimeoutNone = DBL_MAX;
+			const ThreadTime kTimeoutYield = 0.000001; // 1 nanosecond in terms of a millisecond
+
+			#define EA_THREADTIME_AS_UINT_MICROSECONDS(t)  ((unsigned int)((t) * 1000.0))                           /// Returns the milliseconds time as uint in microseconds.           
+			#define EA_THREADTIME_AS_INT64(t)  ((int64_t)(t))                                                       /// Returns the unconverted milliseconds time as a int64_t.
+			#define EA_THREADTIME_AS_DOUBLE(t) (t)                                                                  /// Returns the time as double milliseconds. May include a fraction component.
+			#define EA_TIMESPEC_AS_UINT(t)  ((unsigned int)(((t).tv_sec * 1000) + ((t).tv_nsec / 1000000)))         /// Returns the time as uint in milliseconds.            
+			#define EA_TIMESPEC_AS_DOUBLE_IN_MS(t)  ( (((t).tv_sec * 1000000000ull) + ((t).tv_nsec))/1000000.0)     /// Returns the time as uint in milliseconds.            
+
+		#elif (defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE) && EA_THREADS_AVAILABLE
+			struct ThreadTime : public timespec
+			{
+				typedef int seconds_t;  // To consider: change to uint64_t or maybe long.
+				typedef int nseconds_t;
+				
+				ThreadTime()                                            { tv_sec  = 0;                         tv_nsec  = 0; }
+				ThreadTime(const timespec& ts)                          { tv_sec  = ts.tv_sec;                 tv_nsec  = ts.tv_nsec; }
+				ThreadTime(seconds_t nSeconds, nseconds_t nNanoseconds) { tv_sec  = (long)nSeconds;            tv_nsec  = (long)nNanoseconds; }
+				ThreadTime(const int64_t& nMilliseconds)             { tv_sec  = (long)(nMilliseconds / 1000); tv_nsec  = (long)((nMilliseconds - (tv_sec * 1000)) * 1000000); }
+				ThreadTime& operator+=(const int64_t& nMilliseconds) { long lTemp((long)nMilliseconds / 1000); tv_sec  += lTemp; tv_nsec += (long)((nMilliseconds - (lTemp * 1000)) * 1000000); if(tv_nsec >= 1000000000){ tv_sec++; tv_nsec -= 1000000000; } return *this; }
+				ThreadTime& operator-=(const int64_t& nMilliseconds) { long lTemp((long)nMilliseconds / 1000); tv_sec  -= lTemp; tv_nsec -= (long)((nMilliseconds - (lTemp * 1000)) * 1000000); if(tv_nsec < 0)          { tv_sec--; tv_nsec += 1000000000; } return *this; }
+				ThreadTime& operator+=(const ThreadTime& tt)         { tv_sec += tt.tv_sec;                    tv_nsec += tt.tv_nsec; if(tv_nsec >= 1000000000){ tv_sec++; tv_nsec -= 1000000000; } return *this; }
+				ThreadTime& operator-=(const ThreadTime& tt)         { tv_sec -= tt.tv_sec;                    tv_nsec -= tt.tv_nsec; if(tv_nsec < 0)          { tv_sec--; tv_nsec += 1000000000; } return *this; }
+			};
+			inline ThreadTime operator+ (const ThreadTime& tt1, const ThreadTime& tt2)       { ThreadTime ttR(tt1); ttR += tt2;           return ttR; }
+			inline ThreadTime operator+ (const ThreadTime& tt,  const int64_t& nMilliseconds){ ThreadTime ttR(tt);  ttR += nMilliseconds; return ttR; }
+			inline ThreadTime operator- (const ThreadTime& tt1, const ThreadTime& tt2)       { ThreadTime ttR(tt1); ttR -= tt2;           return ttR; }
+			inline ThreadTime operator- (const ThreadTime& tt,  const int64_t& nMilliseconds){ ThreadTime ttR(tt);  ttR -= nMilliseconds; return ttR; }
+			inline bool       operator==(const ThreadTime& tt1, const ThreadTime& tt2) { return (tt1.tv_nsec == tt2.tv_nsec) && (tt1.tv_sec == tt2.tv_sec); } // These comparisons assume that the nsec value is normalized (always between 0 && 1000000000).
+			inline bool       operator!=(const ThreadTime& tt1, const ThreadTime& tt2) { return (tt1.tv_nsec != tt2.tv_nsec) || (tt1.tv_sec != tt2.tv_sec); }
+			inline bool       operator< (const ThreadTime& tt1, const ThreadTime& tt2) { return (tt1.tv_sec == tt2.tv_sec) ? (tt1.tv_nsec <  tt2.tv_nsec) : (tt1.tv_sec <  tt2.tv_sec); }
+			inline bool       operator> (const ThreadTime& tt1, const ThreadTime& tt2) { return (tt1.tv_sec == tt2.tv_sec) ? (tt1.tv_nsec >  tt2.tv_nsec) : (tt1.tv_sec >  tt2.tv_sec); }
+			inline bool       operator<=(const ThreadTime& tt1, const ThreadTime& tt2) { return (tt1.tv_sec == tt2.tv_sec) ? (tt1.tv_nsec <= tt2.tv_nsec) : (tt1.tv_sec <= tt2.tv_sec); }
+			inline bool       operator>=(const ThreadTime& tt1, const ThreadTime& tt2) { return (tt1.tv_sec == tt2.tv_sec) ? (tt1.tv_nsec >= tt2.tv_nsec) : (tt1.tv_sec >= tt2.tv_sec); }
+
+			const  ThreadTime kTimeoutImmediate(0, 0);            /// Used to specify to functions to return immediately if the operation could not be done.
+			const  ThreadTime kTimeoutNone(INT_MAX, INT_MAX);     /// Used to specify to functions to block without a timeout (i.e. block forever).
+			const  ThreadTime kTimeoutYield(0, 0);                /// Used to specify to ThreadSleep to yield to threads of equivalent priority.
+
+			#define EA_THREADTIME_AS_INT64(t)  ((int64_t)(((t).tv_sec * 1000) + ((t).tv_nsec / 1000000)))                   /// Returns the time as int64_t milliseconds.
+			#define EA_THREADTIME_AS_INT64_MICROSECONDS(t)  ((int64_t)(((t).tv_sec * 1000000) + (((t).tv_nsec / 1000))))    /// Returns the time as int64_t microseconds.
+			#define EA_THREADTIME_AS_DOUBLE(t) (((t).tv_sec * 1000.0) + ((t).tv_nsec / 1000000.0))                          /// Returns the time as double milliseconds.
+
+		#elif defined(EA_PLATFORM_MICROSOFT) && defined(EA_PROCESSOR_X86_64)
+			typedef uint64_t   ThreadTime;                        /// Current storage mechanism for time used by thread timeout functions. Units are milliseconds.
+			const   ThreadTime kTimeoutImmediate = 0;             /// Used to specify to functions to return immediately if the operation could not be done.
+			const   ThreadTime kTimeoutNone      = UINT64_MAX;    /// Used to specify to functions to block without a timeout (i.e. block forever).
+			const   ThreadTime kTimeoutYield     = 0;             /// This is used with ThreadSleep to minimally yield to threads of equivalent priority.
+
+			#define EA_THREADTIME_AS_INT64(t)  ((int64_t)(t))
+			#define EA_THREADTIME_AS_DOUBLE(t) ((double)(t))
+
+		#else
+			typedef unsigned   ThreadTime;                        /// Current storage mechanism for time used by thread timeout functions. Units are milliseconds.
+			const   ThreadTime kTimeoutImmediate = 0;             /// Used to specify to functions to return immediately if the operation could not be done.
+			const   ThreadTime kTimeoutNone      = UINT_MAX;      /// Used to specify to functions to block without a timeout (i.e. block forever).
+			const   ThreadTime kTimeoutYield     = 0;             /// This is used with ThreadSleep to minimally yield to threads of equivalent priority.
+
+			#define EA_THREADTIME_AS_INT64(t)  ((int64_t)(t))
+			#define EA_THREADTIME_AS_DOUBLE(t) ((double)(t))
+
+		#endif
+
+		#if defined(EA_PLATFORM_MICROSOFT)                        /// Can be removed from C++11 Concurrency builds once full C++11 implementation is completed
+			uint32_t RelativeTimeoutFromAbsoluteTimeout(ThreadTime absoluteTimeout);
+		#endif
+
+		// Thread priority constants
+		// There is a standardized mechanism to convert system-specific thread
+		// priorities to these platform-independent priorities and back without 
+		// loss of precision or behaviour. The convention is that kThreadPriorityDefault 
+		// equates to the system-specific normal thread priority. Thus for Microsoft
+		// APIs a thread with priority kThreadPriorityDefault will be of Microsoft
+		// priority THREAD_PRIORITY_NORMAL. A thread with an EAThread priority 
+		// of kThreadPriorityDefault + 1 will have a Microsoft priority of THREAD_PRIORITY_NORMAL + 1.
+		// The only difference is that with EAThread all platforms are standardized on 
+		// kThreadPriorityDefault as the normal value and that higher EAThread priority
+		// integral values mean higher thread priorities for running threads. This last
+		// item is of significance because Sony platforms natively define lower integers
+		// to mean higher thread priorities. With EAThread you get consistent behaviour
+		// across platforms and thus kThreadPriorityDefault + 1 always results in a
+		// thread that runs at priority of one level higher. On Sony platforms, this + 1
+		// gets translated to a - 1 when calling the Sony native thread priority API.
+		// EAThread priorities have no mandated integral bounds, though
+		// kThreadPriorityMin and kThreadPriorityMax are defined as convenient practical
+		// endpoints for users.  Users should not generally use hard-coded constants to
+		// refer to EAThread priorities much like it's best not to use hard-coded
+		// constants to refer to platform-specific native thread priorities. Also, users
+		// generally want to avoid manipulating thread priorities to the extent possible
+		// and use conventional synchronization primitives to control execution.
+		// Similarly, wildly varying thread priorities such as +100 are not likely to
+		// achieve much and are not likely to be very portable.
+		//
+		const int kThreadPriorityUnknown = INT_MIN;      /// Invalid or unknown priority.
+		const int kThreadPriorityMin     =    -128;      /// Minimum thread priority enumerated by EAThread. In practice, a valid thread priority can be anything other than kThreadPriorityUnknown.
+		const int kThreadPriorityDefault =       0;      /// Default (a.k.a. normal) thread priority.
+		const int kThreadPriorityMax     =     127;      /// Maximum thread priority enumerated by EAThread. In practice, a valid thread priority can be anything other than kThreadPriorityUnknown.
+
+
+
+		/// kSysThreadPriorityDefault
+		/// Defines the platform-specific default thread priority.
+		#if defined(EA_PLATFORM_SONY)
+			const int kSysThreadPriorityDefault = 700;
+		#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+			const int kSysThreadPriorityDefault = 0; // Some Unix variants use values other than zero, but these are not relevant.
+		#elif defined(EA_PLATFORM_MICROSOFT)
+			const int kSysThreadPriorityDefault = 0; // Same as THREAD_PRIORITY_NORMAL
+		#else
+			const int kSysThreadPriorityDefault = 0;
+		#endif
+
+
+		// The following functions are standalone and not static members of the thread class 
+		// because they are potentially used by multiple threading primitives and we don't 
+		// want to create a dependency of threading primitives on class Thread.
+
+		/// GetThreadId
+		/// Gets the thread ID for the current thread. This thread ID should 
+		/// be unique throughout the system.
+		EATHREADLIB_API ThreadId GetThreadId();
+
+
+		/// GetSysThreadId
+		/// Gets the operating system thread id associated with the given ThreadId.
+		/// It turns out that Microsoft operating systems (Windows, XBox, XBox 360)
+		/// have two different ways to identify a thread: HANDLE and DWORD. Some API
+		/// functions take thread HANDLES, while others take what Microsoft calls
+		/// thread ids (DWORDs). EAThread ThreadId is a HANDLE, as that is used for 
+		/// more of the core threading APIs. However, some OS-level APIs accept instead   
+		/// the DWORD thread id. This function returns the OS thread id for a given 
+		/// EAThread ThreadId. In the case of Microsoft OSs, this returns a DWORD from
+		/// a HANDLE and with other OSs this function simply returns the ThreadId.
+		/// Returns a valid SysThreadId or kSysThreadIdInvalid if the input id is invalid.
+		EATHREADLIB_API SysThreadId GetSysThreadId(ThreadId id);
+
+
+		/// GetThreadId
+		///
+		/// This is a portable function to convert between ThreadId's and SysThreadId's.
+		/// For platforms that do not differentiate between these two types no conversion is attempted. 
+		EATHREADLIB_API ThreadId GetThreadId(SysThreadId id);
+
+
+		/// GetSysThreadId
+		/// Gets the SysThreadId for the current thread. This thread ID should 
+		/// be unique throughout the system.
+		EATHREADLIB_API SysThreadId GetSysThreadId();
+
+
+		/// GetThreadPriority
+		/// Gets the priority of the current thread.
+		/// This function can return any int except for kThreadPriorityUnknown, as the 
+		/// current thread's priority will always be knowable. A return value of kThreadPriorityDefault
+		/// means that this thread is of normal (a.k.a. default) priority.
+		/// See the documentation for thread priority constants (e.g. kThreadPriorityDefault) 
+		/// for more information about thread priority values and behaviour.
+		EATHREADLIB_API int GetThreadPriority();
+
+
+		/// SetThreadPriority
+		/// Sets the priority of the current thread.
+		/// Accepts any integer priority value except kThreadPriorityUnknown.
+		/// On some platforms, this function will automatically convert any invalid 
+		/// priority for that particular platform to a valid one.  A normal (a.k.a. default) thread 
+		/// priority is identified by kThreadPriorityDefault.
+		/// See the documentation for thread priority constants (e.g. kThreadPriorityDefault) 
+		/// for more information about thread priority values and behaviour.
+		EATHREADLIB_API bool SetThreadPriority(int nPriority);
+
+
+		/// GetThreadStackBase
+		/// Returns the base address of the current thread's stack.
+		/// Recall that on all supported platforms that the stack grows downward
+		/// and thus that the stack base address is of a higher value than the 
+		/// stack's contents.
+		EATHREADLIB_API void* GetThreadStackBase();
+
+
+		// Thread processor constants
+		const int kProcessorDefault = -1;    /// Use the default processor for the platform. On many platforms, the default is to not be tied to any specific processor, but other threads can only ever be bound to a single processor.
+		const int kProcessorAny     = -2;    /// Run the thread on any processor. Many platforms will switch threads between processors dynamically.
+
+
+		/// SetThreadProcessor  
+		/// Sets the processor the current thread should run on. Valid values 
+		/// are kThreadProcessorDefault, kThreadProcessorAny, or a processor
+		/// index in the range of [0, processor count). If the input value
+		/// is >= the processor count, it will be reduced to be a modulo of
+		/// the processor count. Any other invalid value will cause the processor
+		/// to be set to zero.
+		/// This function isn't guaranteed to restrict the thread from executing 
+		/// on the given processor for all platforms. Some platforms don't support
+		/// assigning thread processor affinity, while with others (e.g. Windows using 
+		/// SetThreadIdealProcessor) the OS tries to comply but will use a different
+		/// processor when the assigned one is unavailable.
+		EATHREADLIB_API void SetThreadProcessor(int nProcessor);
+		
+
+		/// GetThreadProcessor
+		/// Returns the (possibly virtual) CPU index that the thread is currently
+		/// running on. Different systems may have differing definitions of what
+		/// a unique processor is. Some CPUs have multiple sub-CPUs (e.g. "cores")
+		/// which are treated as unique processors by the system. 
+		/// Many systems switch threads between processors dynamically; thus it's 
+		/// possible that the thread may be on a different CPU by the time this 
+		/// function returns. 
+		/// Lastly, some systems don't provide the ability to detect what processor
+		/// the current thread is running on; in these cases this function returns 0.
+		EATHREADLIB_API int GetThreadProcessor();
+		
+
+		/// GetProcessorCount
+		/// Returns the (possibly virtual) CPU count that the current system has.
+		/// Some systems (e.g. Posix, Unix) don't expose an ability to tell how 
+		/// many processors there are; in these cases this function returns 1.
+		/// This function returns the number of currently active processors. 
+		/// Some systems can modify the number of active processors dynamically.
+		EATHREADLIB_API int GetProcessorCount();
+
+
+		/// kThreadAffinityMaskAny
+		/// Defines the thread affinity mask that enables the thread 
+		/// to float on all available processors.
+		typedef uint64_t ThreadAffinityMask;
+		const ThreadAffinityMask kThreadAffinityMaskAny = ~0U;
+
+
+		/// SetThreadAffinityMask
+		/// 
+		/// The nAffinityMask is a bit field where each bit designates a processor.
+		///  
+		/// This function isn't guaranteed to restrict the thread from executing 
+		/// on the given processor for all platforms. Some platforms don't support
+		/// assigning thread processor affinity, while with others (e.g. Windows using 
+		/// SetThreadIdealProcessor) the OS tries to comply but will use a different
+		/// processor when the assigned one is unavailable.
+		EATHREADLIB_API void SetThreadAffinityMask(ThreadAffinityMask nAffinityMask);
+		EATHREADLIB_API void SetThreadAffinityMask(const EA::Thread::ThreadId& id, ThreadAffinityMask nAffinityMask);
+	
+
+		/// GetThreadAffinityMask
+		///   
+		/// Returns the current thread affinity mask specified by the user.
+		EATHREADLIB_API ThreadAffinityMask GetThreadAffinityMask();
+		EATHREADLIB_API ThreadAffinityMask GetThreadAffinityMask(const EA::Thread::ThreadId& id);
+
+
+		/// GetName
+		/// Returns the name of the thread assigned by the SetName function.
+		/// If the thread was not named by the SetName function, then the name is empty ("").
+		EATHREADLIB_API const char* GetThreadName();
+		EATHREADLIB_API const char* GetThreadName(const EA::Thread::ThreadId& id);
+
+
+		/// SetThreadName
+		///
+		/// Sets a descriptive name or the thread. On some platforms this name is passed on
+		/// to the debugging tools so they can see this name. The name length, including a
+		/// terminating 0 char, is limited to EATHREAD_NAME_SIZE characters. Any characters
+		/// beyond that are ignored.
+		/// 
+		/// You can set the name of a Thread object only if it has already begun.  You can
+		/// also set the name with the Begin function via the ThreadParameters argument to
+		/// Begin. This design is in order to simplify the implementation, but being able
+		/// to set ThreadParameters before Begin is something that can be considered in the
+		/// future.
+		///
+		/// Some platforms (e.g. Linux) have the restriction that this function works
+		/// properly only when called by the same thread that you want to name. Given this
+		/// situation, the most portable way to use this SetName function is to either
+		/// always call it from the thread to be named or to use the ThreadParameters to
+		/// give the thread a name before it is started and let the started thread name
+		/// itself.
+		//
+		// 
+		//
+		EATHREADLIB_API void SetThreadName(const char* pName);
+		EATHREADLIB_API void SetThreadName(const EA::Thread::ThreadId& id, const char* pName);
+
+
+		/// ThreadSleep
+		/// Puts the current thread to sleep for an amount of time hinted at 
+		/// by the time argument. The timeout is merely a hint and the system 
+		/// thread scheduler might return well before the sleep time has elapsed.
+		/// The input 'timeRelative' refers to a relative time and not an
+		/// absolute time such as used by EAThread mutexes, semaphores, etc. 
+		/// This is for consistency with other threading systems such as Posix and Win32.
+		/// A sleep time of zero has the same effect as simply yielding to other
+		/// available threads.
+		///
+		EATHREADLIB_API void ThreadSleep(const ThreadTime& timeRelative = kTimeoutImmediate);
+
+
+		/// ThreadCooperativeYield
+		/// On platforms that use cooperative multithreading instead of 
+		/// pre-emptive multithreading, this function maps to ThreadSleep(0).
+		/// On pre-emptive platforms, this function is a no-op. The intention
+		/// is to allow cooperative multithreaded systems to yield manually
+		/// in order for other threads to run, but also not to penalize 
+		/// pre-emptive systems that don't need such manual yielding. If you 
+		/// want to forcefully yield on a pre-emptive system, call ThreadSleep(0).
+		#ifdef EA_THREAD_COOPERATIVE
+			#define ThreadCooperativeYield() EA::Thread::ThreadSleep(EA::Thread::kTimeoutYield)
+		#else
+			#define ThreadCooperativeYield()
+		#endif
+
+
+		/// End
+		/// This function provides a way for a thread to end itself.
+		EATHREADLIB_API void ThreadEnd(intptr_t threadReturnValue);
+
+
+		/// GetThreadTime
+		/// Gets the current absolute time in milliseconds.
+		/// This is required for working with absolute timeouts, for example.
+		/// To specify a timeout that is relative to the current time, simply
+		/// add time (in milliseconds) to the return value of GetThreadTime.
+		/// Alternatively, you can use ConvertRelativeTime to calculate an absolute time.
+		EATHREADLIB_API ThreadTime GetThreadTime();
+
+
+		/// ConvertRelativeTime
+		/// Given a relative time (in milliseconds), this function returns an 
+		/// absolute time (in milliseconds).
+		/// Example usage:
+		///     mutex.Lock(ConvertRelativeTime(1000));
+		EATHREADLIB_API inline ThreadTime ConvertRelativeTime(const ThreadTime& timeRelative)
+		{
+			return GetThreadTime() + timeRelative;
+		}
+
+		/// SetAssertionFailureFunction
+		/// Allows the user to specify a callback function to trap assertion failures.
+		/// You can use this to glue your own assertion system into this system.
+		typedef void (*AssertionFailureFunction)(const char* pExpression, void* pContext);
+		EATHREADLIB_API void SetAssertionFailureFunction(AssertionFailureFunction pAssertionFailureFunction, void* pContext);
+
+
+		/// AssertionFailure
+		/// Triggers an assertion failure. This function is generally intended for internal
+		/// use but is available so that related code can use the same system.
+		EATHREADLIB_API void AssertionFailure(const char* pExpression);
+		EATHREADLIB_API void AssertionFailureV(const char* pFormat, ...);
+
+
+
+
+		/// Allocator
+		/// This is the same as (the first four functions of) ICoreAllocator.
+		/// If the allocator is set via SetAllocator, then it must be done before
+		/// any other thread operations which might allocate memory are done. 
+		/// Typically this includes creating objects via factory functions and 
+		/// creating threads whereby you specify that thread resources be allocated for you..
+		class Allocator
+		{
+		public:
+			virtual ~Allocator() {}
+			virtual void* Alloc(size_t size, const char* name = 0, unsigned int flags = 0) = 0;
+			virtual void* Alloc(size_t size, const char* name, unsigned int flags,
+									unsigned int align, unsigned int alignOffset = 0) = 0;
+			virtual void Free(void* block, size_t size=0) = 0;
+		};
+
+		EATHREADLIB_API void       SetAllocator(Allocator* pAllocator);
+		EATHREADLIB_API Allocator* GetAllocator();
+
+		EATHREADLIB_API void SetAllocator(EA::Allocator::ICoreAllocator* pAllocator);
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+/// EAThreadGetUniqueId
+///
+/// Gets a value that is unique per thread but isn't necessarily the system-recognized
+/// thread id. This function is at least as fast as GetThreadId, and on some platforms
+/// is potentially significantly faster due to being implemented in inline asm which 
+/// avoids a system function call which may cause an instruction cache miss penalty.
+/// This function is useful for creating very fast implementations of some kinds of 
+/// threading constructs. It's implemented as a macro instead of a function in order
+/// to optimizing inlining success across all platforms and compilers.
+///
+/// This function is guaranteed to yield a valid value; there are no error conditions.
+///
+/// This macro acts as if it were declared as a function like this:
+///     void EAThreadGetUniqueId(ThreadUniqueId& result);
+///
+/// Example usage:
+///     ThreadUniqueId x;
+///     EAThreadGetUniqueId(x);
+///
+#if EA_USE_CPP11_CONCURRENCY
+	#define EAThreadGetUniqueId(dest) (dest = static_cast<uintptr_t>(std::hash<std::thread::id>()(std::this_thread::get_id())))
+
+#elif defined(EA_PLATFORM_WINDOWS) && defined(_MSC_VER) && !defined(_WIN64)
+
+	// Reference implementation:
+	//extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId();
+	//#define EAThreadGetUniqueId(dest) dest = (ThreadUniqueId)(uintptr_t)GetCurrentThreadId()
+
+	// Fast implementation:
+	extern "C" unsigned long __readfsdword(unsigned long offset);
+	#pragma intrinsic(__readfsdword)
+	#define EAThreadGetUniqueId(dest) dest = (EA::Thread::ThreadUniqueId)(uintptr_t)__readfsdword(0x18)
+
+#elif defined(_MSC_VER) && defined(EA_PROCESSOR_X86_64)
+	#pragma warning(push, 0)
+	#include <intrin.h>
+	#pragma warning(pop)
+	#define EAThreadGetUniqueId(dest) dest = (EA::Thread::ThreadUniqueId)(uintptr_t)__readgsqword(0x30)
+	// Could also use dest = (EA::Thread::ThreadUniqueId)NtCurrentTeb(), but that would require #including <windows.h>, which is very heavy.
+
+#else
+
+	// Reference implementation:
+	#define EAThreadGetUniqueId(dest) dest = (EA::Thread::ThreadUniqueId)(uintptr_t)EA::Thread::GetThreadId()
+
+#endif
+
+
+// EAThreadIdToString
+// Convert a thread id to a string suitable for use with printf like functions, e.g.:
+//      printf("%s", EAThreadIdToString(myThreadId));
+// This macro is intended for debugging purposes and makes no guarantees about performance 
+// or how a thread id is mapped to a string.
+namespace EA
+{
+	namespace Thread
+	{
+		namespace detail
+		{
+			struct EATHREADLIB_API ThreadIdToStringBuffer
+			{
+			public:
+				enum { BufSize = 32 };
+				explicit ThreadIdToStringBuffer(EA::Thread::ThreadId threadId);
+				const char* c_str() const { return mBuf; }
+			private:
+				char mBuf[BufSize];
+			};
+
+			struct EATHREADLIB_API SysThreadIdToStringBuffer
+			{
+			public:
+				enum { BufSize = 32 };
+				explicit SysThreadIdToStringBuffer(EA::Thread::SysThreadId sysThreadId);
+				const char* c_str() const { return mBuf; }
+			private:
+				char mBuf[BufSize];
+			};
+		}
+	}
+}
+
+#if !defined(EAThreadThreadIdToString)
+	#define EAThreadThreadIdToString(threadId)       (EA::Thread::detail::ThreadIdToStringBuffer(threadId).c_str())
+#endif
+#if !defined(EAThreadSysThreadIdToString)
+	#define EAThreadSysThreadIdToString(sysThreadId) (EA::Thread::detail::SysThreadIdToStringBuffer(sysThreadId).c_str())
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Inline functions
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PLATFORM_MICROSOFT) && !EA_POSIX_THREADS_AVAILABLE
+	// We implement GetSysThreadId in our associated .cpp file.
+#elif defined(EA_PLATFORM_SONY)
+	// We implement GetSysThreadId in our associated .cpp file.
+#elif defined(EA_PLATFORM_APPLE)
+	// We implement GetSysThreadId in our associated .cpp file.
+#elif EA_USE_CPP11_CONCURRENCY
+	// We implement GetSysThreadId in our associated .cpp file.
+#else
+	inline EA::Thread::SysThreadId EA::Thread::GetSysThreadId(ThreadId id)
+	{
+		return id;
+	}
+
+	inline EA::Thread::SysThreadId EA::Thread::GetSysThreadId()
+	{
+		return GetThreadId(); // ThreadId == SysThreadId in this case
+	}
+#endif
+
+#endif // EATHREAD_EATHREAD_H
+
+
+
@@ -0,0 +1,480 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Defines functionality for thread-safe primitive operations.
+// 
+// EAThread atomics do NOT imply the use of read/write barriers.  This is 
+// partly due to historical reasons and partly due to EAThread's internal 
+// code being optimized for not using barriers.
+//
+// In future, we are considering migrating the atomics interface which  
+// defaults atomics to use full read/write barriers while allowing users
+// to opt-out of full barrier usage.  The new C++11 interface already provides
+// similar interfaces.
+//
+// http://en.cppreference.com/w/cpp/atomic/memory_order
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_ATOMIC_H
+#define EATHREAD_EATHREAD_ATOMIC_H
+
+
+#include <EABase/eabase.h>
+EA_DISABLE_ALL_VC_WARNINGS()
+#include <stddef.h>
+EA_RESTORE_ALL_VC_WARNINGS()
+#include <eathread/internal/config.h>
+#include <eathread/eathread.h>
+#include <eathread/eathread_sync.h>
+
+
+#if !EA_THREADS_AVAILABLE
+	// Do nothing. Let the default implementation below be used.
+//#elif defined(EA_USE_CPP11_CONCURRENCY) && EA_USE_CPP11_CONCURRENCY
+//    #include <eathread/cpp11/eathread_atomic_cpp11.h> // CPP11 atomics are currently broken and slow.  To be renabled for other platforms when VS2013 released.
+#elif defined(EA_USE_COMMON_ATOMICINT_IMPLEMENTATION) && EA_USE_COMMON_ATOMICINT_IMPLEMENTATION
+	#include <eathread/internal/eathread_atomic.h>
+#elif defined(EA_PLATFORM_APPLE)
+	#include <eathread/apple/eathread_atomic_apple.h>
+#elif defined(EA_PROCESSOR_X86) || ((defined(EA_PLATFORM_WINRT) || defined(EA_PLATFORM_WINDOWS_PHONE)) && defined(EA_PROCESSOR_ARM))
+	#include <eathread/x86/eathread_atomic_x86.h>
+#elif defined(EA_PROCESSOR_X86_64)
+	#include <eathread/x86-64/eathread_atomic_x86-64.h>
+#elif defined(EA_PLATFORM_ANDROID)
+	#if EATHREAD_C11_ATOMICS_AVAILABLE
+		#include <eathread/android/eathread_atomic_android_c11.h>  // Android API 21+ only support C11 atomics
+	#else
+		#include <eathread/android/eathread_atomic_android.h>
+	#endif
+#elif defined(EA_COMPILER_GCC) || defined(CS_UNDEFINED_STRING)
+	#include <eathread/gcc/eathread_atomic_gcc.h>
+#else
+	#error Platform not supported yet.
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+
+// EATHREAD_ATOMIC_128_SUPPORTED
+//
+// Defined as 0 or 1. Defined as 1 whenever possible for the given compiler/platform combination.
+// Defines if 128 bit atomic operations are supported.
+// Such operations are only ever supported on 64 bit platforms.
+//
+#ifndef EATHREAD_ATOMIC_128_SUPPORTED           // If not defined by one of the above headers...
+	#define EATHREAD_ATOMIC_128_SUPPORTED 0
+#endif
+
+namespace EA
+{
+	namespace Thread
+	{
+		enum Atomic64Implementation
+		{
+			kAtomic64Emulated,
+			kAtomic64Native
+		};
+
+		/// SetDoubleWordAtomicsImplementation
+		/// Some platforms have multiple implementations, some of which support
+		/// double word atomics and some that don't. For example, certain ARM
+		/// processors will support the ldrexd/strexd atomic instructions but
+		/// others will not. 
+		EATHREADLIB_API void SetAtomic64Implementation(Atomic64Implementation implementation);
+	}
+}
+
+
+#if !defined(EA_THREAD_ATOMIC_IMPLEMENTED) // If there wasn't a processor-specific version already defined...
+
+	// Fail the build if atomics aren't being defined for the given platform/compiler.
+	// If we need to add an exception here, we can add an appropriate ifdef.
+	static_assert(false, "atomic operations must be defined for this platform.");
+
+	namespace EA
+	{
+		namespace Thread
+		{
+			/// Standalone atomic functions
+			/// These act the same as the class functions below.
+			/// The T return values are the previous value, except for the
+			/// AtomicFetchSwap function which returns the swapped out value.
+			///
+			/// T    AtomicGetValue(volatile T*);
+			/// T    AtomicGetValue(const volatile T*);
+			/// void AtomicSetValue(volatile T*, T value);
+			/// T    AtomicFetchIncrement(volatile T*);
+			/// T    AtomicFetchDecrement(volatile T*);
+			/// T    AtomicFetchAdd(volatile T*, T value);
+			/// T    AtomicFetchSub(volatile T*, T value);
+			/// T    AtomicFetchOr(volatile T*, T value);
+			/// T    AtomicFetchAnd(volatile T*, T value);
+			/// T    AtomicFetchXor(volatile T*, T value);
+			/// T    AtomicFetchSwap(volatile T*, T value);
+			/// T    AtomicFetchSwapConditional(volatile T*, T value, T condition);
+			/// bool AtomicSetValueConditional(volatile T*, T value, T condition);
+
+
+
+			/// class AtomicInt
+			///
+			/// Implements thread-safe access to an integer and primary operations on that integer.
+			/// AtomicIntegers are commonly used as lightweight flags and signals between threads
+			/// or as the synchronization object for spinlocks. Those familiar with the Win32 API
+			/// will find that AtomicInt32 is essentially a platform independent interface to 
+			/// the Win32 InterlockedXXX family of functions. Those familiar with Linux may 
+			/// find that AtomicInt32 is essentially a platform independent interface to atomic_t 
+			/// functionality.
+			///
+			/// Note that the reference implementation defined here is itself not thread-safe.
+			/// A thread-safe version requires platform-specific code.
+			///
+			/// Example usage
+			///     AtomicInt32 i = 0;
+			///
+			///     ++i;
+			///     i--;
+			///     i += 7;
+			///     i -= 3;
+			///     i = 2;
+			///     
+			///     int x = i.GetValue();
+			///     i.Increment();
+			///     bool oldValueWas6 = i.SetValueConditional(3, 6);
+			///     i.Add(4);
+			///
+			template <class T>
+			class EATHREADLIB_API AtomicInt
+			{
+			public:
+				/// ThisType
+				/// A typedef for this class type itself, for usage clarity.
+				typedef AtomicInt<T> ThisType;
+
+
+				/// ValueType
+				/// A typedef for the basic object we work with. 
+				typedef T ValueType;
+
+
+				/// AtomicInt
+				/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+				/// This is done so that an AtomicInt acts like a standard built-in integer.
+				AtomicInt()
+					{}
+
+
+				/// AtomicInt
+				/// Constructs with an intial value. 
+				AtomicInt(ValueType n)
+					: mValue(n) {}
+
+
+				/// AtomicInt
+				/// Copy ctor. Uses GetValue to read the value, and thus is synchronized. 
+				AtomicInt(const ThisType& x)
+					: mValue(x.GetValue()) {}
+
+
+				/// AtomicInt
+				/// Assignment operator. Uses GetValue to read the value, and thus is synchronized. 
+				AtomicInt& operator=(const ThisType& x)
+					{ mValue = x.GetValue(); return *this; }
+
+
+				/// GetValue
+				/// Safely gets the current value. A platform-specific version of 
+				/// this might need to do something more than just read the value.
+				ValueType GetValue() const
+					{ return mValue; }
+
+
+				/// GetValueRaw
+				/// "Unsafely" gets the current value. This is useful for algorithms 
+				/// that want to poll the value in a high performance way before 
+				/// reading or setting the value in a more costly thread-safe way. 
+				/// You should not use this function when attempting to do thread-safe
+				/// atomic operations.
+				ValueType GetValueRaw() const
+					{ return mValue; }
+
+
+				/// SetValue
+				/// Safely sets a new value. Returns the old value. Note that due to 
+				/// expected multithreaded accesses, a call to GetValue after SetValue
+				/// might return a different value then what was set with SetValue.
+				/// This of course depends on your situation.
+				ValueType SetValue(ValueType n)
+				{
+					const ValueType nOldValue(mValue);
+					mValue = n;
+					return nOldValue;
+				}
+
+
+				/// SetValueConditional
+				/// Safely set the value to a new value if the original value is equal to 
+				/// a condition value. Returns true if the condition was met and the 
+				/// assignment occurred. The comparison and value setting are done as
+				/// an atomic operation and thus another thread cannot intervene between
+				/// the two as would be the case with simple C code.
+				bool SetValueConditional(ValueType n, ValueType condition)
+				{
+					if(mValue == condition) 
+					{
+						mValue = n;
+						return true;
+					}
+					return false;
+				}
+
+
+				/// Increment
+				/// Safely increments the value. Returns the new value.
+				/// This function acts the same as the C++ pre-increment operator.
+				ValueType Increment()
+					{ return ++mValue; }
+
+
+				/// Decrement
+				/// Safely decrements the value. Returns the new value.
+				/// This function acts the same as the C++ pre-decrement operator.
+				ValueType Decrement()
+					{ return --mValue; }
+
+
+				/// Add
+				/// Safely adds a value, which can be negative. Returns the new value.
+				/// You can implement subtraction with this function by using a negative argument.
+				ValueType Add(ValueType n)
+					{ return (mValue += n); }
+
+
+				/// operators
+				/// These allow an AtomicInt object to safely act like a built-in type.
+				///
+				/// Note: The operators for AtomicInt behaves differently than standard
+				///         C++ operators in that it will always return a ValueType instead
+				///         of a reference.
+				///
+				/// cast operator
+				/// Returns the AtomicInt value as an integral type. This allows the 
+				/// AtomicInt to behave like a standard built-in integer type.
+				operator const ValueType() const
+					 { return mValue; }
+
+				/// operator =
+				/// Assigns a new value and returns the value after the operation.
+				///
+				ValueType operator=(ValueType n)
+				{
+					 mValue = n;
+					 return n;
+				}
+
+				/// pre-increment operator+=
+				/// Adds a value to the AtomicInt and returns the value after the operation.
+				///
+				/// This function doesn't obey the C++ standard in that it does not return 
+				/// a reference, but rather the value of the AtomicInt after the  
+				/// operation is complete. It must be noted that this design is motivated by
+				/// the fact that it is unsafe to rely on the returned value being equal to 
+				/// the previous value + n, as another thread might have modified the AtomicInt 
+				/// immediately after the subtraction operation.  So rather than returning the
+				/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+				/// used in the function.
+				ValueType operator+=(ValueType n)
+				{
+					 mValue += n;
+					 return mValue;
+				}
+
+				/// pre-increment operator-=
+				/// Subtracts a value to the AtomicInt and returns the value after the operation.
+				///
+				/// This function doesn't obey the C++ standard in that it does not return 
+				//  a reference, but rather the value of the AtomicInt after the  
+				/// operation is complete. It must be noted that this design is motivated by
+				/// the fact that it is unsafe to rely on the returned value being equal to 
+				/// the previous value - n, as another thread might have modified the AtomicInt 
+				/// immediately after the subtraction operation.  So rather than returning the
+				/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+				/// used in the function.
+				ValueType operator-=(ValueType n)
+				{
+					 mValue -= n;
+					 return mValue;
+				}
+
+				/// pre-increment operator++
+				/// Increments the AtomicInt. 
+				///
+				/// This function doesn't obey the C++ standard in that it does not return 
+				//  a reference, but rather the value of the AtomicInt after the  
+				/// operation is complete. It must be noted that this design is motivated by
+				/// the fact that it is unsafe to rely on the returned value being equal to 
+				/// the previous value + 1, as another thread might have modified the AtomicInt 
+				/// immediately after the subtraction operation.  So rather than returning the
+				/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+				/// used in the function.
+				ValueType operator++()
+					 { return ++mValue; }
+
+				/// post-increment operator++
+				/// Increments the AtomicInt and returns the value of the AtomicInt before
+				/// the increment operation. 
+				///
+				/// This function doesn't obey the C++ standard in that it does not return 
+				//  a reference, but rather the value of the AtomicInt after the  
+				/// operation is complete. It must be noted that this design is motivated by
+				/// the fact that it is unsafe to rely on the returned value being equal to 
+				/// the previous value, as another thread might have modified the AtomicInt 
+				/// immediately after the subtraction operation.  So rather than returning the
+				/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+				/// used in the function.
+				ValueType operator++(int)
+					 { return mValue++; }
+
+				/// pre-increment operator--
+				/// Decrements the AtomicInt.
+				///
+				/// This function doesn't obey the C++ standard in that it does not return 
+				//  a reference, but rather the value of the AtomicInt after the  
+				/// operation is complete. It must be noted that this design is motivated by
+				/// the fact that it is unsafe to rely on the returned value being equal to 
+				/// the previous value - 1, as another thread might have modified the AtomicInt 
+				/// immediately after the subtraction operation.  So rather than returning the
+				/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+				/// used in the function.
+				ValueType operator--()
+					 { return --mValue; }
+
+				/// post-increment operator--
+				/// Increments the AtomicInt and returns the value of the AtomicInt before
+				/// the increment operation. 
+				///
+				/// This function doesn't obey the C++ standard in that it does not return 
+				//  a reference, but rather the value of the AtomicInt after the  
+				/// operation is complete. It must be noted that this design is motivated by
+				/// the fact that it is unsafe to rely on the returned value being equal to 
+				/// the previous value, as another thread might have modified the AtomicInt 
+				/// immediately after the subtraction operation.  So rather than returning the
+				/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+				/// used in the function.
+				ValueType operator--(int)
+					 { return mValue--;}
+
+			protected:
+				volatile ValueType mValue; /// May not be the same on all platforms.
+			};
+
+
+		} // namespace Thread
+
+	} // namespace EA
+
+#endif // #if EA_THREAD_ATOMIC_IMPLEMENTED
+
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+
+		// Typedefs
+		typedef AtomicInt<int32_t>  AtomicInt32;   /// int32_t  atomic integer.
+		typedef AtomicInt<uint32_t> AtomicUint32;  /// uint32_t atomic integer.
+		typedef AtomicInt<int64_t>  AtomicInt64;   /// int64_t  atomic integer.
+		typedef AtomicInt<uint64_t> AtomicUint64;  /// uint64_t atomic integer.
+
+		#if !defined(EA_PLATFORM_WORD_SIZE) || (EA_PLATFORM_WORD_SIZE == 4)
+			typedef AtomicInt32  AtomicIWord;
+			typedef AtomicUint32 AtomicUWord;
+		#else
+			typedef AtomicInt64  AtomicIWord;
+			typedef AtomicUint64 AtomicUWord;
+		#endif
+
+		#if !defined(EA_PLATFORM_PTR_SIZE) || (EA_PLATFORM_PTR_SIZE == 4)
+			typedef AtomicInt32  AtomicIntPtr;
+			typedef AtomicUint32 AtomicUintPtr;
+		#else
+			typedef AtomicInt64  AtomicIntPtr;
+			typedef AtomicUint64 AtomicUintPtr;
+		#endif
+
+
+		#ifdef _MSC_VER                  // VC++ yields spurious warnings about void* being cast to an integer type and vice-versa.
+			#pragma warning(push)        // These warnings are baseless because we check for platform pointer size above.
+			#pragma warning(disable: 4311 4312 4251)
+		#endif
+
+
+		/// class AtomicPointer
+		///
+		/// For simplicity of the current implementation, we simply have AtomicPointer map
+		/// to AtomicInt32. This is reasonably safe because AtomicInt32 uses intptr_t
+		/// as its ValueType and there are no foreseeble supported platforms in which 
+		/// intptr_t will not exist or be possible as a data type.
+		///
+		class EATHREADLIB_API AtomicPointer : public AtomicIntPtr
+		{
+		public:
+			typedef void* PointerValueType;
+
+			AtomicPointer(void* p = NULL)
+				: AtomicIntPtr(static_cast<ValueType>(reinterpret_cast<uintptr_t>(p))) {}
+
+			AtomicPointer& operator=(void* p) 
+				{ AtomicIntPtr::operator=(static_cast<ValueType>(reinterpret_cast<uintptr_t>(p))); return *this; }
+
+			operator const void*() const // It's debateable whether this should be supported.
+				{ return (void*)AtomicIntPtr::GetValue(); }
+
+			void* GetValue() const
+				{ return (void*)AtomicIntPtr::GetValue(); }
+
+			void* GetValueRaw() const
+				{ return (void*)AtomicIntPtr::GetValueRaw(); }
+
+			void* SetValue(void* p)
+				{ return (void*)AtomicIntPtr::SetValue(static_cast<ValueType>(reinterpret_cast<uintptr_t>(p))); }
+
+			bool SetValueConditional(void* p, void* pCondition)
+				{ return AtomicIntPtr::SetValueConditional(static_cast<ValueType>(reinterpret_cast<uintptr_t>(p)), static_cast<ValueType>(reinterpret_cast<uintptr_t>(pCondition))); }
+		};
+
+
+		#ifdef _MSC_VER
+			#pragma warning(pop)
+		#endif
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+#endif // EATHREAD_EATHREAD_ATOMIC_H
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,249 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements Posix-style barriers.
+// Note that thread synchronization barriers are different from 
+// memory synchronization barriers (a.k.a. fences).
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_BARRIER_H
+#define EATHREAD_EATHREAD_BARRIER_H
+
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+
+
+#if defined(EA_DLL) && defined(_MSC_VER)
+	// Suppress warning about class 'AtomicInt32' needs to have a
+	// dll-interface to be used by clients of class which have a templated member.
+	// 
+	// These templates cannot be instantiated outside of the DLL. If you try, a
+	// link error will result. This compiler warning is intended to notify users
+	// of this.
+	#pragma warning(push)
+	#pragma warning(disable: 4251)
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+/////////////////////////////////////////////////////////////////////////
+/// EABarrierData
+///
+/// This is used internally by class Barrier.
+/// Todo: Consider moving this declaration into a platform-specific 
+/// header file.
+/// 
+
+#if defined(EA_PLATFORM_SONY)
+	#include <kernel.h>
+	#include <eathread/internal/timings.h>
+
+	// We implement the barrier manually, as not all Posix thread implementations
+	// have barriers and even those that have it lack a timeout wait version.
+	struct EABarrierData{
+		ScePthreadCond  mCV;            // Wait for barrier.
+		ScePthreadMutex mMutex;         // Control access to barrier.
+		int             mnHeight;       // Number of threads required.
+		int             mnCurrent;      // Current number of threads. As threads wait, this value decreases towards zero.
+		unsigned long   mnCycle;        // Cycle count.
+		bool            mbValid;        // True if valid.
+
+		EABarrierData();
+	};
+
+#elif (defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE) && EA_THREADS_AVAILABLE
+	#include <pthread.h>
+
+	// We implement the barrier manually, as not all Posix threads implemetnation 
+	// have barrers and even those that have it lack a timeout wait version.
+	struct EABarrierData{
+		pthread_cond_t  mCV;            // Wait for barrier.
+		pthread_mutex_t mMutex;         // Control access to barrier.
+		int             mnHeight;       // Number of threads required.
+		int             mnCurrent;      // Current number of threads. As threads wait, this value decreases towards zero.
+		unsigned long   mnCycle;        // Cycle count.
+		bool            mbValid;        // True if valid.
+
+		EABarrierData();
+	};
+
+#else // All other platforms
+	#include <eathread/eathread_atomic.h>
+	#include <eathread/eathread_semaphore.h>
+
+	struct EATHREADLIB_API EABarrierData{
+		EA::Thread::AtomicInt32    mnCurrent;       // Current number of threads. As threads wait, this value decreases towards zero.
+		int                        mnHeight;        // Number of threads required.
+		EA::Thread::AtomicInt32    mnIndex;         // Which semaphore we are using.
+		EA::Thread::Semaphore      mSemaphore0;     // First semaphore.     We can't use an array of Semaphores, because that would
+		EA::Thread::Semaphore      mSemaphore1;     // Second semaphore.    intefere with our ability to initialize them our way.
+		EABarrierData();
+
+	private:
+		// Prevent default generation of these functions by declaring but not defining them.
+		EABarrierData(const EABarrierData& rhs);               // copy constructor
+		EABarrierData& operator=(const EABarrierData& rhs);    // assignment operator
+	};
+
+#endif
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// BarrierParameters
+		/// Specifies barrier settings.
+		struct EATHREADLIB_API BarrierParameters
+		{
+			int  mHeight;        /// Barrier 'height'. Refers to number of threads which must wait before being released.
+			bool mbIntraProcess; /// True if the semaphore is intra-process, else inter-process.
+			char mName[16];      /// Barrier name, applicable only to platforms that recognize named synchronization objects.
+
+			BarrierParameters(int height = 0, bool bIntraProcess = true, const char* pName = NULL);
+		};
+
+
+		/// Barrier
+		/// A Barrier is a synchronization point for a set of threads. A barrier has
+		/// a count associated with it and threads call the wait function until the
+		/// given count of threads have reached the wait point. Then all threads 
+		/// are released. The first thread released is given a special return value
+		/// that identifies it uniquely so that one-time work can be done. 
+		///
+		/// A primary use of barriers is to spread out work between a number of threads 
+		/// and wait until the work is complete. For example, if you want to find and
+		/// count all objects of a given kind in a large grid, you might have four 
+		/// threads each work on a quadrant and wait on the barrier until all are
+		/// finished. This particular example is more practical on SMP systems than
+		/// uniprocessor systems, but there are also uniprocessor uses. It should be
+		/// noted, however, that a Barrier synchronizes the completion of -threads-, 
+		/// and not necessarily the completion of -tasks-. There may or may not be 
+		/// a direct correspondence between the two.
+		///
+		class EATHREADLIB_API Barrier
+		{
+		public:
+			enum Result{
+				kResultPrimary   =  0,  /// The barrier wait suceeded and this thread is the designated solitary primary thread. Similar to Posix "serial" thread.
+				kResultSecondary =  1,  /// The barrier wait suceeded and this thread is one of the secondary threads.
+				kResultError     = -1,  /// The wait resulted in error, due to various possible reasons.
+				kResultTimeout   = -2   /// The barrier wait timed out.
+			};
+
+			/// Barrier
+			/// For immediate default initialization, use no args.
+			/// For custom immediate initialization, supply a first argument. 
+			/// For deferred initialization, use Barrier(NULL, false) then later call Init.
+			/// For deferred initialization of an array of objects, create an empty
+			/// subclass whose default constructor chains back to Barrier(NULL, false).
+			Barrier(const BarrierParameters* pBarrierParameters = NULL, bool bDefaultParameters = true);
+
+			/// Barrier
+			/// This is a constructor which initializes the Barrier to a specific height 
+			/// and intializes the other Barrier parameters to default values. See the
+			/// BarrierParameters struct for info on these default values.
+			Barrier(int height);
+
+			/// ~Barrier
+			/// Destroys an existing Barrier. The Barrier must not be waited on 
+			/// by any thread, otherwise the resulting behaviour is undefined.
+			~Barrier();
+
+			/// Init
+			/// Initializes the Barrier; used in cases where it cannot be initialized
+			/// via the constructor (as in the case with default construction or 
+			/// array initialization.
+			bool Init(const BarrierParameters* pBarrierParameters);
+
+			/// Wait
+			/// Causes the current thread to wait until the designated number of threads have called Wait. 
+			///
+			/// Returns one of enum Result.
+			///
+			/// A timeout means that the thread gives up its contribution to the height while 
+			/// waiting for the full height to be achieved. A timeout of zero means that a thread 
+			/// only succeeds if it is the final thread (the one which puts the height to full); 
+			/// otherwise the call returns with a timeout.
+			///
+			/// Note that the timeout is specified in absolute time and not relative time.
+			///
+			/// Note also that due to the way thread scheduling works -- particularly in a
+			/// time-sliced threading environment -- that the timeout value is a hint and 
+			/// the actual amount of time passed before the timeout occurs may be significantly
+			/// more or less than the specified timeout time.
+			///
+			Result Wait(const ThreadTime& timeoutAbsolute = kTimeoutNone);
+
+			/// GetPlatformData
+			/// Returns the platform-specific data handle for debugging uses or 
+			/// other cases whereby special (and non-portable) uses are required.
+			void* GetPlatformData()
+				{ return &mBarrierData; }
+
+		protected:
+			EABarrierData mBarrierData;
+
+		private:
+			// Objects of this class are not copyable.
+			Barrier(const Barrier&){}
+			Barrier& operator=(const Barrier&){ return *this; }
+		};
+
+
+		/// BarrierFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class Barrier.
+		/// A primary use of this would be to allow the Barrier implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		class EATHREADLIB_API BarrierFactory
+		{
+		public:
+			static Barrier*    CreateBarrier();                    // Internally implemented as: return new Barrier;
+			static void        DestroyBarrier(Barrier* pBarrier);  // Internally implemented as: delete pBarrier;
+
+			static size_t      GetBarrierSize();                   // Internally implemented as: return sizeof(Barrier);
+			static Barrier*    ConstructBarrier(void* pMemory);    // Internally implemented as: return new(pMemory) Barrier;
+			static void        DestructBarrier(Barrier* pBarrier); // Internally implemented as: pBarrier->~Barrier();
+		};
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+#if defined(EA_DLL) && defined(_MSC_VER)
+   // re-enable warning(s) disabled above.
+   #pragma warning(pop)
+#endif
+
+
+#endif // EATHREAD_EATHREAD_BARRIER_H
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,347 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+#ifndef EATHREAD_EATHREAD_CALLSTACK_H
+#define EATHREAD_EATHREAD_CALLSTACK_H
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+#include <stddef.h>
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// CallstackContext
+		/// 
+		/// This is forward-declared here and fully declared at the bottom of this file.
+		///
+		struct CallstackContext;
+		struct Context;
+
+
+		/// InitCallstack
+		///
+		/// Allows the user to explicitly initialize the callstack mechanism.
+		/// Only the first call to InitCallstack will have effect. Calls to 
+		/// InitCallstack must be matched by calls to ShutdownCallstack.
+		///
+		EATHREADLIB_API void InitCallstack();
+
+
+		/// ShutdownCallstack
+		///
+		/// Allows the user to explicitly shutdown the callstack mechanism.
+		/// Calls to InitCallstack must be matched by calls to ShutdownCallstack.
+		/// The last call to ShutdownCallstack will shutdown and free the callstack mechanism.
+		///
+		EATHREADLIB_API void ShutdownCallstack();
+
+
+		/// GetCallstack
+		///
+		/// Gets the addresses of the calling instructions of a call stack.
+		/// If the CallstackContext parameter is used, then that execution context is used;
+		/// otherwise the current execution context is used.
+		/// The return value is the number of entries written to the callstack array.
+		/// The item at callstack[0] is from the function calling the GetCallstack function.
+		/// For most platforms the addresses reported are the addresses of the instruction 
+		/// that will next be executed upon returning from the function it is calling.
+		/// The maxDepth parameter must be at least one and callstack must be able to hold
+		/// at least one entry (a terminating 0 NULL entry).
+		///
+		EATHREADLIB_API size_t GetCallstack(void* callstack[], size_t maxDepth, const CallstackContext* pContext = NULL);
+
+
+		/// GetCallstack
+		///
+		/// Gets the callstack based on the thread id as opposed to register context.
+		///
+		#if defined(EA_PLATFORM_SONY)
+			EATHREADLIB_API size_t GetCallstack(void* pReturnAddressArray[], size_t nReturnAddressArrayCapacity, EA::Thread::ThreadId& pthread);
+		#endif
+
+
+
+
+		#if defined(EA_PLATFORM_MICROSOFT)
+			/// Microsoft thread handles are opaque types which are non-unique per thread.
+			/// That is, two different thread handles might refer to the same thread.
+			/// threadId is the same as EA::Thread::ThreadId and is a Microsoft thread HANDLE. 
+			/// This is not the same as a Microsoft DWORD thread id which is the same as EA::Thread::SysThreadId.
+			EATHREADLIB_API bool ThreadHandlesAreEqual(intptr_t threadId1, intptr_t threadId2);
+
+			/// This function is the same as EA::Thread::GetSysThreadId(ThreadId id).
+			/// This function converts from one type of Microsoft thread identifier to another.
+			/// threadId is the same as EA::Thread::ThreadId and is a Microsoft thread HANDLE. 
+			/// The return value is a Microsoft DWORD thread id which is the same as EA::Thread::SysThreadId.
+			/// Upon failure, the return value will be zero.
+			EATHREADLIB_API uint32_t GetThreadIdFromThreadHandle(intptr_t threadId);
+		#endif
+
+
+		/// GetCallstackContext
+		///
+		/// Gets the CallstackContext associated with the given thread.
+		/// The thread must be in a non-running state.
+		/// If the threadID is EAThread::kThreadIdInvalid, the current thread context is retrieved.
+		/// However, it's of little use to get the context of the current thread, since upon return
+		/// from the GetCallstackContext the data will not apply to the current thread any more;
+		/// thus this information is probably useful only for diagnostic purposes.
+		/// The threadId parameter is the same type as an EAThread ThreadId. It is important to 
+		/// note that an EAThread ThreadId under Microsoft platforms is a thread handle and not what 
+		/// Microsoft calls a thread id. This is by design as Microsoft thread ids are second class
+		/// citizens and likely wouldn't exist if it not were for quirks in the Windows API evolution.
+		///
+		/// Note that threadId is the same as EA::Thread::ThreadId and is a Microsoft thread HANDLE. 
+		/// This is not the same as a Microsoft DWORD thread id which is the same as EA::Thread::SysThreadId.
+		///
+		/// EACallstack has a general struct for each CPU type called Context, defined in EACallstack/Context.h. 
+		/// The Context struct contains the entire CPU register context information. In order to walk a thread's 
+		/// callstack, you really need only two or three of the register values from the Context. So there is a 
+		/// mini struct called CallstackContext which is just those registers needed to read a thread's callstack.
+		///
+		// ThreadId constants
+		#if EA_USE_CPP11_CONCURRENCY
+			EATHREADLIB_API bool GetCallstackContext(CallstackContext& context, EA::Thread::ThreadId threadId);
+		#else
+			EATHREADLIB_API bool GetCallstackContext(CallstackContext& context, intptr_t threadId = 0);
+		#endif
+
+
+		/// GetCallstackContextSysThreadId
+		///
+		/// This is the same as GetCallstackContext, except it uses what EAThread calls SysThreadId.
+		/// On Microsoft platforms a SysThreadId is a "thread id" whereas ThreadId is "thread handle."
+		/// On non-Microsoft platforms a SysThreadId is defined to be the same as ThreadId and is often
+		/// just an integer or opaque identifier (e.g. pthread).
+		/// This function exists because it may be more convenient to work with SysThreadIds in some cases.
+		/// You can convert from a ThreadId (Microsoft thread handle) to a SysThreadId (Microsoft thread id)
+		/// with the GetThreadIdFromThreadHandle function.
+		EATHREADLIB_API bool GetCallstackContextSysThreadId(CallstackContext& context, intptr_t sysThreadId = 0);
+
+
+		/// GetCallstackContext
+		///
+		/// Gets the CallstackContext from a full Context struct. Note that the Context struct
+		/// defines the entire machine context, whereas the CallstackContext is a tiny struct
+		/// with just a couple integer members and is all that's needed to describe a callstack.
+		///
+		EATHREADLIB_API void GetCallstackContext(CallstackContext& context, const Context* pContext = NULL);
+
+
+		/// GetModuleFromAddress
+		///
+		/// Given an address, this function tells what module it comes from. 
+		/// The primary use of this is to tell what DLL an instruction pointer comes from.
+		/// Returns the required strlen of the pModuleFileName. If the return value is >= moduleNameCapacity,
+		/// there wasn't enough space. pModuleFileName is written with as many characters as possible
+		/// and will always be zero terminated. moduleNameCapacity must be at least one.
+		///
+		EATHREADLIB_API size_t GetModuleFromAddress(const void* pAddress, char* pModuleFileName, size_t moduleNameCapacity);
+
+
+		/// ModuleHandle
+		/// This is a runtime module identifier. For Microsoft Windows-like platforms
+		/// this is the same thing as HMODULE. For other platforms it is a shared library
+		/// runtime library pointer, id, or handle. For Microsoft platforms, each running
+		/// DLL has a module handle.
+		#if defined(EA_PLATFORM_MICROSOFT)
+			typedef void*            ModuleHandle;  // HMODULE, from LoadLibrary()
+		#elif defined(EA_PLATFORM_UNIX) || defined(EA_PLATFORM_APPLE)
+			typedef void*            ModuleHandle;  // void*, from dlopen()
+		#else
+			typedef uintptr_t        ModuleHandle;
+		#endif
+
+
+		/// GetModuleHandleFromAddress
+		///
+		/// Returns the module handle from a code address.
+		/// Returns 0/NULL if no associated module could be found.
+		///
+		EATHREADLIB_API ModuleHandle GetModuleHandleFromAddress(const void* pAddress);
+
+
+		/// EAGetInstructionPointer
+		///
+		/// Returns the current instruction pointer (a.k.a. program counter).
+		/// This function is implemented as a macro, it acts as if its declaration 
+		/// were like so:
+		///     void EAGetInstructionPointer(void*& p);
+		///
+		/// For portability, this function should only be used as a standalone 
+		/// statement on its own line.
+		///
+		/// Example usage:
+		///    void* pInstruction;
+		///    EAGetInstructionPointer(pInstruction);
+		///
+		#if defined(_MSC_VER) && defined(EA_PROCESSOR_X86)
+			// We implement this via calling the next line of code as a function.
+			// Then we continue as if we were exiting that function but with no
+			// return statement. The result is that the instruction pointer will
+			// be placed on the stack and we merely pop it off the stack and 
+			// into a local variable.
+			#define EAGetInstructionPointer(p)   \
+			{                                    \
+				uintptr_t eip;                   \
+				__asm {                          \
+					__asm call GetEIP            \
+					__asm GetEIP:                \
+					__asm pop eip                \
+				}                                \
+				p = (void*)eip;                  \
+			}
+
+			EA_DISABLE_VC_WARNING(4740) 
+			inline void GetInstructionPointer(void*& p) 
+				{EAGetInstructionPointer(p);}
+			EA_RESTORE_VC_WARNING()
+
+		#elif defined(_MSC_VER) && (defined(EA_PROCESSOR_X86_64) || defined(EA_PROCESSOR_ARM))
+
+			EATHREADLIB_API EA_NO_INLINE void GetInstructionPointer(void*& p);
+
+			#define EAGetInstructionPointer(p) EA::Thread::GetInstructionPointer(p)
+
+		#elif defined(__ARMCC_VERSION) // ARM compiler
+
+			// Even if there are compiler intrinsics that let you get the instruction pointer, 
+			// this function can still be useful. For example, on ARM platforms this function
+			// returns the address with the 'thumb bit' set if it's thumb code. We need this info sometimes.
+			EATHREADLIB_API void GetInstructionPointer(void*& p);
+
+			// The ARM compiler provides a __current_pc() instrinsic, which returns an unsigned integer type.
+			#define EAGetInstructionPointer(p) { uintptr_t pc = (uintptr_t)__current_pc(); p = reinterpret_cast<void*>(pc); }
+
+		//#elif defined(EA_COMPILER_CLANG) // Disabled until implemented. The GCC code below works under clang, though it wouldn't if compiler extensions were disabled.
+		//    EATHREADLIB_API void GetInstructionPointer(void*& p);
+		//
+		//    // To do: implement this directly instead of via a call to GetInstructionPointer.
+		//    #define EAGetInstructionPointer(p) EA::Thread::GetInstructionPointer(p)
+			
+		#elif defined(__GNUC__) || defined(EA_COMPILER_CLANG) // This covers EA_PLATFORM_UNIX, EA_PLATFORM_OSX 
+
+			// Even if there are compiler intrinsics that let you get the instruction pointer, 
+			// this function can still be useful. For example, on ARM platforms this function
+			// returns the address with the 'thumb bit' set if it's thumb code. We need this info sometimes.
+			EATHREADLIB_API void GetInstructionPointer(void*& p) __attribute__((noinline));
+
+			// It turns out that GCC has an extension that allows you to take the address 
+			// of a label. The code here looks a little wacky, but that's how it's done.
+			// Basically, this generates a global variable called 'label' and the assignment
+			// to 'p' reads that variable into p. One possible downside to this technique is
+			// that it relies on registers and global memory not being corrupted, yet one of
+			// reasons why we might want to be getting the instruction pointer is in dealing
+			// with some sort or processor exception which may be due to memory corruption.
+			// To consider: Make a version of this which calculates the value dynamically via asm.
+			#define EAGetInstructionPointer(p) EA::Thread::GetInstructionPointer(p)
+		#else
+			#error
+		#endif
+
+
+		/// EASetStackBase / SetStackBase / GetStackBase / GetStackLimit
+		///
+		/// EASetStackBase as a macro and acts as if its declaration were like so:
+		///     void EASetStackBase();
+		/// 
+		/// EASetStackBase sets the current stack pointer as the bottom (beginning)
+		/// of the stack. Depending on the platform, the "bottom" may be up or down
+		/// depending on whether the stack grows upward or downward (usually it grows
+		/// downward and so "bottom" actually refers to an address that is above child
+		/// stack frames in memory.
+		/// This function is intended to be called on application startup as early as 
+		/// possible, and in each created thread, as early as possible. Its purpose 
+		/// is to record the beginning stack pointer because the platform doesn't provide
+		/// APIs to tell what it is, and we need to know it (e.g. so we don't overrun
+		/// it during stack unwinds). 
+		///
+		/// For portability, EASetStackBase should be used only as a standalone 
+		/// statement on its own line, as it may include statements that can't work otherwise.
+		///
+		/// Example usage:
+		///    int main(int argc, char** argv) {
+		///       EASetStackBase();
+		///       . . .
+		///    }
+		///
+		/// SetStackBase is a function which lets you explicitly set a stack bottom instead
+		/// of doing it automatically with EASetStackBase. If you pass NULL for pStackBase
+		/// then the function uses its stack location during its execution, which will be 
+		/// a little less optimal than calling EASetStackBase.
+		///
+		/// GetStackBase returns the stack bottom set by EASetStackBase or SetStackBase.
+		/// It returns NULL if no stack bottom was set or could be set.
+		///
+		/// GetStackLimit returns the current stack "top", which will be lower than the stack
+		/// bottom in memory if the platform grows its stack downward.
+
+		EATHREADLIB_API void  SetStackBase(void* pStackBase);
+		inline          void  SetStackBase(uintptr_t pStackBase){ SetStackBase((void*)pStackBase); }
+		EATHREADLIB_API void* GetStackBase();
+		EATHREADLIB_API void* GetStackLimit();
+
+
+		#if defined(_MSC_VER) && defined(EA_PROCESSOR_X86)
+			#define EASetStackBase()               \
+			{                                      \
+				void* esp;                         \
+				__asm { mov esp, ESP }             \
+				::EA::Thread::SetStackBase(esp);   \
+			}                               
+
+		#elif defined(_MSC_VER) && (defined(EA_PROCESSOR_X86_64) || defined(EA_PROCESSOR_ARM))
+			// This implementation uses SetStackBase(NULL), which internally retrieves the stack pointer.
+			#define EASetStackBase()                     \
+			{                                            \
+				::EA::Thread::SetStackBase((void*)NULL); \
+			}                                            \
+
+		#elif defined(__ARMCC_VERSION)          // ARM compiler
+
+			#define EASetStackBase()  \
+				::EA::Thread::SetStackBase((void*)__current_sp())
+
+		#elif defined(__GNUC__) // This covers EA_PLATFORM_UNIX, EA_PLATFORM_OSX
+
+			#define EASetStackBase()  \
+				::EA::Thread::SetStackBase((void*)__builtin_frame_address(0));
+
+		#else
+			// This implementation uses SetStackBase(NULL), which internally retrieves the stack pointer.
+			#define EASetStackBase()                     \
+			{                                            \
+				::EA::Thread::SetStackBase((void*)NULL); \
+			}                                            \
+
+		#endif
+
+		#if defined(EA_PLATFORM_UNIX) || defined(EA_PLATFORM_APPLE) || defined(EA_PLATFORM_SONY)
+			// GetPthreadStackInfo
+			//
+			// With some implementations of pthread, the stack base is returned by pthread as NULL if it's the main thread,
+			// or possibly if it's a thread you created but didn't call pthread_attr_setstack manually to provide your 
+			// own stack. It's impossible for us to tell here whether will be such a NULL return value, so we just do what
+			// we can and the user nees to beware that a NULL return value means that the system doesn't provide the 
+			// given information for the current thread. This function returns false and sets pBase and pLimit to NULL in 
+			// the case that the thread base and limit weren't returned by the system or were returned as NULL.
+
+			bool GetPthreadStackInfo(void** pBase, void** pLimit);
+		#endif
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+#endif // Header include guard.
+
+
+
@@ -0,0 +1,524 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+#ifndef EATHREAD_EATHREAD_CALLSTACK_CONTEXT_H
+#define EATHREAD_EATHREAD_CALLSTACK_CONTEXT_H
+
+
+#include <EABase/eabase.h>
+#include <eathread/internal/config.h>
+#include <stddef.h>
+
+EA_DISABLE_VC_WARNING(4201)
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// CallstackContext
+		///
+		/// Processor-specific information that's needed to walk a call stack.
+		///
+		enum CallstackContextType
+		{
+			CALLSTACK_CONTEXT_UNKNOWN = 0,
+			CALLSTACK_CONTEXT_POWERPC,
+			CALLSTACK_CONTEXT_X86,
+			CALLSTACK_CONTEXT_X86_64,
+			CALLSTACK_CONTEXT_ARM,
+			CALLSTACK_CONTEXT_ARM64,
+			CALLSTACK_CONTEXT_MIPS,
+			CALLSTACK_CONTEXT_SPU,
+			NUMBER_OF_CALLSTACK_CONTEXT_TYPES
+		};
+
+		// NOTE: These context structures were moved to this header as of EAThread version 1.17.02
+		// TODO: We should evaluate if these really do belong here.
+
+		// The following are base values required for processor-agnostic offline stack dumping. 
+		// Not all implementations will fill them in, and most times only the base and pointer 
+		// will be filled. Also, most of the specific contexts' will have a member with the 
+		// same value as the stack pointer, i.e. mESP on the x86
+		struct CallstackContextBase
+		{
+			uintptr_t mStackBase;       /// Used to help tell what the valid stack ranges is. 0 if not used.
+			uintptr_t mStackLimit;      /// "
+			uintptr_t mStackPointer;    /// "
+
+			CallstackContextBase() : mStackBase(0), mStackLimit(0), mStackPointer(0) {}
+		};
+
+			struct CallstackContextPowerPC : public CallstackContextBase
+			{
+				uintptr_t mGPR1;        /// General purpose register 1.
+				uintptr_t mIAR;         /// Instruction address pseudo-register.
+				
+				CallstackContextPowerPC() : mGPR1(0), mIAR(0) {}
+			};
+
+			struct CallstackContextX86 : public CallstackContextBase
+			{
+				uint32_t mEIP;      /// Instruction pointer.
+				uint32_t mESP;      /// Stack pointer.
+				uint32_t mEBP;      /// Base pointer.
+
+				CallstackContextX86() : mEIP(0), mESP(0), mEBP(0) {}
+			};
+
+		#if defined(EA_PROCESSOR_X86)
+			struct CallstackContext : public CallstackContextX86 
+			{ 
+				static const CallstackContextType kType = CALLSTACK_CONTEXT_X86;
+			};
+		#endif
+
+			struct CallstackContextX86_64 : public CallstackContextBase
+			{
+				uint64_t mRIP;      /// Instruction pointer.
+				uint64_t mRSP;      /// Stack pointer.
+				uint64_t mRBP;      /// Base pointer.
+
+				CallstackContextX86_64() : mRIP(0), mRSP(0), mRBP(0) {}
+			};
+
+		#if defined(EA_PROCESSOR_X86_64)
+			struct CallstackContext : public CallstackContextX86_64 
+			{ 
+				static const CallstackContextType kType = CALLSTACK_CONTEXT_X86_64;
+			};
+		#endif
+
+			struct CallstackContextARM : public CallstackContextBase
+			{
+				uint32_t mFP;   /// Frame pointer; register 11 for ARM instructions, register 7 for Thumb instructions.
+				uint32_t mSP;   /// Stack pointer; register 13
+				uint32_t mLR;   /// Link register; register 14
+				uint32_t mPC;   /// Program counter; register 15
+				CallstackContextARM() : mFP(0), mSP(0), mLR(0), mPC(0) {}
+			};
+
+		#if defined(EA_PROCESSOR_ARM32)
+			struct CallstackContext : public CallstackContextARM 
+			{ 
+				static const CallstackContextType kType = CALLSTACK_CONTEXT_ARM;
+			};
+		#endif
+
+			struct CallstackContextARM64 : public CallstackContextBase
+			{
+				uint64_t mFP;   /// Frame pointer; register 29 
+				uint64_t mSP;   /// Stack pointer; register SP 
+				uint64_t mLR;   /// Link register; register 30 
+				uint64_t mPC;   /// Program counter; register PC 
+				CallstackContextARM64() : mFP(0), mSP(0), mLR(0), mPC(0) {}
+			};
+
+		#if defined(EA_PROCESSOR_ARM64)
+			struct CallstackContext : public CallstackContextARM64
+			{ 
+				static const CallstackContextType kType = CALLSTACK_CONTEXT_ARM64;
+			};
+		#endif
+
+			struct CallstackContextMIPS : public CallstackContextBase
+			{
+				uintptr_t mPC;      /// Program counter.
+				uintptr_t mSP;      /// Stack pointer.
+				uintptr_t mFP;      /// Frame pointer.
+				uintptr_t mRA;      /// Return address.
+
+				CallstackContextMIPS() : mPC(0), mSP(0), mFP(0), mRA(0) {}
+			};
+
+
+			struct CallstackContextSPU : public CallstackContextBase
+			{
+				uint32_t mGPR0;    /// General purpose register 0, word 0: return address. If this is zero then we can still read a call stack, but simply lose the first entry.
+				uint32_t mGPR1;    /// General purpose register 1, word 0: caller stack frame address. This is required to be set in order to read the call stack properly.
+
+				CallstackContextSPU() : mGPR0(0), mGPR1(0) {}
+			};
+
+
+		union VMXRegister
+		{
+			uint8_t  mByte    [16 / sizeof(uint8_t )];
+			uint16_t mHalfword[16 / sizeof(uint16_t)];
+			uint32_t mWord    [16 / sizeof(uint32_t)];
+			uint64_t mDword   [16 / sizeof(uint64_t)];  // Some VMX implementations don't support 64 bit integers.
+			float    mFloat   [16 / sizeof(float)];
+			double   mDouble  [16 / sizeof(double)];    // Some VMX implementations don't support 64 bit doubles.
+		};
+
+
+
+
+		/// ContextPowerPC32
+		///
+		/// This is a generic 32 bit PowerPC with VMX context.
+		///
+		struct ContextPowerPC32
+		{
+			uint32_t    mGpr[32];    // General registers 0..31
+			uint32_t    mCr;         // Condition register
+			uint32_t    mXer;        // Fixed point exception register
+			uint32_t    mLr;         // Link register
+			uint32_t    mCtr;        // Count register low
+			uint32_t    mCtrHigh;    // Count register high
+			uint32_t    mIar;        // Instruction address register
+			uint32_t    mMsr;        // Machine status register
+			double      mFpr[32];    // Floating registers 0..31
+			double      mFpscr;      // Floating point status/control reg
+			VMXRegister mVr[32];     // Vector registers 0..127
+			VMXRegister mVscr;       // Vector status/control register
+
+		}; // ContextPowerPC32
+
+
+
+
+		/// ContextPowerPC64
+		///
+		/// This is a generic 64 bit PowerPC with VMX context.
+		///
+		struct ContextPowerPC64
+		{
+			uint64_t    mGpr[32];    // General registers 0..31
+			uint64_t    mCr;         // Condition register
+			uint64_t    mXer;        // Fixed point exception register
+			uint64_t    mLr;         // Link register
+			uint64_t    mCtr;        // Count register
+			uint64_t    mIar;        // Instruction address register
+			uint64_t    mMsr;        // Machine status register
+			double      mFpr[32];    // Floating registers 0..31
+			double      mFpscr;      // Floating point status/control reg
+			VMXRegister mVr[32];     // Vector registers 0..127
+			VMXRegister mVscr;       // Vector status/control register
+
+		}; // ContextPowerPC64
+
+
+
+
+		/// ContextX86
+		///
+		/// Generic Intel x86 context.
+		/// This is a duplicate of the CONTEXT structure defined by Microsoft in WinNT.h.
+		///
+		struct ContextX86
+		{
+			uint32_t   ContextFlags;
+
+			uint32_t   Dr0;
+			uint32_t   Dr1;
+			uint32_t   Dr2;
+			uint32_t   Dr3;
+			uint32_t   Dr6;
+			uint32_t   Dr7;
+
+			// FLOATING_SAVE_AREA
+			uint32_t   Controluint32_t;
+			uint32_t   Statusuint32_t;
+			uint32_t   Taguint32_t;
+			uint32_t   ErrorOffset;
+			uint32_t   ErrorSelector;
+			uint32_t   DataOffset;
+			uint32_t   DataSelector;
+			uint8_t    RegisterArea[80];
+			uint32_t   Cr0NpxState;
+
+			uint32_t   SegGs;
+			uint32_t   SegFs;
+			uint32_t   SegEs;
+			uint32_t   SegDs;
+
+			uint32_t   Edi;
+			uint32_t   Esi;
+			uint32_t   Ebx;
+			uint32_t   Edx;
+			uint32_t   Ecx;
+			uint32_t   Eax;
+
+			uint32_t   Ebp;
+			uint32_t   Eip;
+			uint32_t   SegCs;
+			uint32_t   EFlags;
+			uint32_t   Esp;
+			uint32_t   SegSs;
+
+			uint8_t    ExtendedRegisters[512];
+
+		}; // ContextX86
+
+		#ifdef EA_PROCESSOR_X86 // Win32, Linux, OSX.
+			struct Context : public ContextX86
+			{
+				// Empty
+			};
+		#endif
+
+
+
+		/// ContextX86_64
+		///
+		/// Generic Intel x86-64 context.
+		/// This is a duplicate of the CONTEXT structure defined 
+		/// by Microsoft in WinNT.h in VC8 and later.
+		///
+		EA_PREFIX_ALIGN(16)
+		struct M128A_
+		{
+			uint64_t Low;
+			int64_t  High;
+		}EA_POSTFIX_ALIGN(16);
+
+		struct XMM_SAVE_AREA32_
+		{
+			uint16_t  ControlWord;
+			uint16_t  StatusWord;
+			uint8_t   TagWord;
+			uint8_t   Reserved1;
+			uint16_t  ErrorOpcode;
+			uint32_t  ErrorOffset;
+			uint16_t  ErrorSelector;
+			uint16_t  Reserved2;
+			uint32_t  DataOffset;
+			uint16_t  DataSelector;
+			uint16_t  Reserved3;
+			uint32_t  MxCsr;
+			uint32_t  MxCsr_Mask;
+			M128A_    FloatRegisters[8];
+			M128A_    XmmRegisters[16];
+			uint8_t   Reserved4[96];
+		};
+
+		EA_PREFIX_ALIGN(16) struct ContextX86_64
+		{
+			uint64_t P1Home;
+			uint64_t P2Home;
+			uint64_t P3Home;
+			uint64_t P4Home;
+			uint64_t P5Home;
+			uint64_t P6Home;
+
+			uint32_t ContextFlags;
+			uint32_t MxCsr;
+
+			uint16_t SegCs;
+			uint16_t SegDs;
+			uint16_t SegEs;
+			uint16_t SegFs;
+			uint16_t SegGs;
+			uint16_t SegSs;
+			uint32_t EFlags;
+
+			uint64_t Dr0;
+			uint64_t Dr1;
+			uint64_t Dr2;
+			uint64_t Dr3;
+			uint64_t Dr6;
+			uint64_t Dr7;
+
+			uint64_t Rax;
+			uint64_t Rcx;
+			uint64_t Rdx;
+			uint64_t Rbx;
+			uint64_t Rsp;
+			uint64_t Rbp;
+			uint64_t Rsi;
+			uint64_t Rdi;
+			uint64_t R8;
+			uint64_t R9;
+			uint64_t R10;
+			uint64_t R11;
+			uint64_t R12;
+			uint64_t R13;
+			uint64_t R14;
+			uint64_t R15;
+
+			uint64_t Rip;
+
+			union {
+				XMM_SAVE_AREA32_ FltSave;
+
+				struct {
+					M128A_ Header[2];
+					M128A_ Legacy[8];
+					M128A_ Xmm0;
+					M128A_ Xmm1;
+					M128A_ Xmm2;
+					M128A_ Xmm3;
+					M128A_ Xmm4;
+					M128A_ Xmm5;
+					M128A_ Xmm6;
+					M128A_ Xmm7;
+					M128A_ Xmm8;
+					M128A_ Xmm9;
+					M128A_ Xmm10;
+					M128A_ Xmm11;
+					M128A_ Xmm12;
+					M128A_ Xmm13;
+					M128A_ Xmm14;
+					M128A_ Xmm15;
+				} DUMMYSTRUCTNAME;
+			} DUMMYUNIONNAME;
+
+			M128A_   VectorRegister[26];
+			uint64_t VectorControl;
+
+			uint64_t DebugControl;
+			uint64_t LastBranchToRip;
+			uint64_t LastBranchFromRip;
+			uint64_t LastExceptionToRip;
+			uint64_t LastExceptionFromRip;
+
+		}; // ContextX86_64
+
+		#ifdef EA_PROCESSOR_X86_64
+			struct Context : public ContextX86_64
+			{
+				// Empty
+			};
+		#endif
+
+
+
+
+		union DoubleFloat
+		{
+			double   d64;
+			float    f32[2];
+			uint64_t u64;
+			uint32_t u32[2];
+		};
+
+
+		/// ContextARM
+		///
+		/// Generic ARM processor context.
+		/// There are many variations of ARM processors, so one context can't 
+		/// address them all. We assume an ARM 7 with VFPv3 here, which is the
+		/// latest we use as of 2010.
+		/// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0041c/ch09s02s02.html
+		/// http://www.arm.com/products/processors/technologies/vector-floating-point.php
+		///
+		/// mGpr[0]   Volatile register. Argument1, return value.
+		/// mGpr[1]   Volatile register. Argument2, Second 32-bits if double/int Return Value
+		/// mGpr[2]   Volatile register. Argument3.
+		/// mGpr[3]   Volatile register. Argument4. Further arguments are put on the stack.
+		/// mGpr[4]   Permanent register.
+		/// mGpr[5]   Permanent register.
+		/// mGpr[6]   Permanent register.
+		/// mGpr[7]   Permanent register. Thumb instruction set frame pointer.
+		/// mGpr[8]   Permanent register.
+		/// mGpr[9]   Permanent register. Has platform-specific uses. On iOS it's reserved for the OS.
+		/// mGpr[10]  Permanent register. SL (Stack limit, in some uses)
+		/// mGpr[11]  Permanent register. ARM instruction set frame pointer, except for Apple/iOS where it's general purpose.
+		/// mGpr[12]  Permanent register. IP (scratch register/new-sb in inter-link-unit calls)
+		/// mGpr[13]  Permanent register. SP (Stack pointer)
+		/// mGpr[14]  Permanent register. LR (Link register)
+		/// mGpr[15]  Permanent register. PC (Program Counter)
+
+		struct ContextARM
+		{
+			uint32_t    mGpr[16];           // General registers.
+			uint32_t    mCpsr;              // Current program status register.
+			uint32_t    mSpsr;              // Saved program status register.
+			uint32_t    mFpscr;             // Floating point status condition register.
+			DoubleFloat mDoubleFloat[32];   // If these are present, the device will have either 16 (VFPv3-D16) or 32 (VFPv3-D32) registers.
+
+		}; // ContextARM
+
+		#ifdef EA_PROCESSOR_ARM32
+			struct Context : public ContextARM
+			{
+				// Empty
+			};
+		#endif
+
+		/// ContextARM64
+		///
+		/// Generic ARM64 processor context.
+		/// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf (page 14)
+		/// https://en.wikipedia.org/wiki/Aarch64#AArch64 (optional reading)
+		/// http://lxr.free-electrons.com/source/arch/arm64/include/uapi/asm/sigcontext.h
+		///
+		/// mGpr[0]   Volatile register. Argument1, return value.
+		/// mGpr[1]   Volatile register. Argument2, Second 32-bits if double/int Return Value (update)
+		/// mGpr[2]   Volatile register. Argument3.
+		/// mGpr[3]   Volatile register. Argument4. 
+		/// mGpr[4]   Volatile register. Argument5. 
+		/// mGpr[5]   Volatile register. Argument6. 
+		/// mGpr[6]   Volatile register. Argument7. 
+		/// mGpr[7]   Volatile register. Argument8. 
+		/// mGpr[8]   Permanent register. syscall number is in r8.
+		/// mGpr[9]   Volatile register. Temporary data.
+		/// mGpr[10]  Volatile register. Temporary data.
+		/// mGpr[11]  Volatile register. Temporary data.
+		/// mGpr[12]  Volatile register. Temporary data.
+		/// mGpr[13]  Volatile register. Temporary data.
+		/// mGpr[14]  Volatile register. Temporary data.
+		/// mGpr[15]  Volatile register. Temporary data.
+		/// mGpr[16]  Permanent register. IP0 (scratch register/new-sb in inter-link-unit calls)
+		/// mGpr[17]  Permanent register. IP1 (scratch register/new-sb in inter-link-unit calls)
+		/// mGpr[18]  Permanent register. Has platform-specific uses. On iOS it's reserved for the OS.
+		/// mGpr[19]  Callee-saved register. 
+		/// mGpr[20]  Callee-saved register. 
+		/// mGpr[21]  Callee-saved register. 
+		/// mGpr[22]  Callee-saved register. 
+		/// mGpr[23]  Callee-saved register. 
+		/// mGpr[24]  Callee-saved register. 
+		/// mGpr[25]  Callee-saved register. 
+		/// mGpr[26]  Callee-saved register. 
+		/// mGpr[27]  Callee-saved register. 
+		/// mGpr[28]  Callee-saved register. 
+		/// mGpr[29]  Permanent register. FP (Frame pointer)
+		/// mGpr[30]  Permanent register. LR (Link register)
+		/// mGpr[31]  Permanent register. SP (Stack pointer)
+		///
+		/// Program Counter is not a General Purpose Register 
+		/// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0801a/BABGHBJC.html
+		EA_PREFIX_ALIGN(16)
+		struct ContextARM64
+		{
+			uint64_t    mGpr[32];           // General registers.
+			uint64_t    mPC;                // Program counter.
+			uint64_t    mNzcv;              // Global condition register.
+			uint32_t    mFpsr;              // Floating point status register.
+			uint32_t    mFpcr;              // Floating point condition register.
+			union
+			{
+				uint8_t  mByteArray  [512];                     // Access Neon registers as raw bytes.
+				double   mDoubleArray[512/sizeof(double)];      // Access Neon registers as doubles
+				float    mFloatArray [512/sizeof(float)];       // Access Neon registers as floats
+				uint16_t mUInt16Array[512/sizeof(uint16_t)];    // Access Neon registers as uint16_t's
+				uint32_t mUInt32Array[512/sizeof(uint32_t)];    // Access Neon registers as uint32_t's
+				uint64_t mUInt64Array[512/sizeof(uint64_t)];    // Access Neon registers as uint64_t's
+			} mNeon;
+			uint32_t mPadding[2]; // required to avoid warning 4324 on vc
+		}EA_POSTFIX_ALIGN(16);// ContextARM64
+
+		#ifdef EA_PROCESSOR_ARM64
+			struct Context : public ContextARM64
+			{
+				// Empty
+			};
+		#endif
+
+	} // namespace Thread
+
+} // namespace EA
+
+EA_RESTORE_VC_WARNING()
+
+#endif // Header include guard.
+
+
+
@@ -0,0 +1,254 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements a condition variable in the style of Posix condition variables 
+// and Java and C# thread Monitors (Java objects and C# monitors have built-in 
+// locks and pthreads condition variables and EAThread::Conditions and Posix
+// condition variables do not. A Condition is usually the appropriate thread 
+// synchronization mechanism for producer/consumer situations whereby one
+// or more threads create data for one or more other threads to work on,
+// such as is the case with a message queue.    
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_CONDITION_H
+#define EATHREAD_EATHREAD_CONDITION_H
+
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+#include <eathread/eathread_mutex.h>
+
+
+#if defined(EA_DLL) && defined(_MSC_VER)
+	// Suppress warning about class 'EA::Thread::simple_list<T>' needs to have
+	// dll-interface to be used by clients of class which have a templated member.
+	// 
+	// These templates cannot be instantiated outside of the DLL. If you try, a
+	// link error will result. This compiler warning is intended to notify users
+	// of this.
+	#pragma warning(push)
+	#pragma warning(disable: 4251)
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+/////////////////////////////////////////////////////////////////////////
+/// EAConditionData
+///
+/// This is used internally by class Condition.
+/// Todo: Consider moving this declaration into a platform-specific 
+/// header file.
+/// 
+#if defined(EA_PLATFORM_SONY)
+	// Condition variables are built into Posix/Unix.
+	#include <kernel.h>
+	#include <eathread/internal/timings.h>
+
+	struct EAConditionData
+	{
+		ScePthreadCond mCV;
+		EAConditionData();
+	};
+
+#elif (defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE) && EA_THREADS_AVAILABLE
+	// Condition variables are built into Posix/Unix.
+	#include <pthread.h>
+
+	struct EAConditionData
+	{
+		pthread_cond_t mCV;
+		EAConditionData();
+	};
+
+#else // All other platforms
+	#include <eathread/eathread_semaphore.h>
+	#include <eathread/eathread_atomic.h>
+
+	struct EATHREADLIB_API EAConditionData
+	{
+		EA::Thread::AtomicInt32 mnWaitersBlocked;
+		int                     mnWaitersToUnblock;
+		int                     mnWaitersDone;
+		EA::Thread::Semaphore   mSemaphoreBlockQueue;
+		EA::Thread::Semaphore   mSemaphoreBlockLock;
+		EA::Thread::Mutex       mUnblockLock;
+
+		EAConditionData();
+
+	private:
+		// Prevent default generation of these functions by declaring but not defining them.
+		EAConditionData(const EAConditionData& rhs);             // copy constructor
+		EAConditionData& operator=(const EAConditionData& rhs);  // assignment operator
+	};
+
+#endif
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+#if defined(EA_PLATFORM_SONY)
+		static const int CONDITION_VARIABLE_NAME_LENGTH_MAX = 31;
+#else
+		static const int CONDITION_VARIABLE_NAME_LENGTH_MAX = 15;
+#endif
+		/// ConditionParameters
+		/// Specifies condition variable settings.
+		struct EATHREADLIB_API ConditionParameters
+		{
+			bool mbIntraProcess;										/// True if the Condition is intra-process, else inter-process.
+			char mName[CONDITION_VARIABLE_NAME_LENGTH_MAX + 1];			/// Condition name, applicable only to platforms that recognize named synchronization objects.
+
+			ConditionParameters(bool bIntraProcess = true, const char* pName = NULL);
+		};
+
+
+		/// Condition
+		/// Implements a condition variable thread synchronization primitive. A condition variable is usually the 
+		/// appropriate thread synchronization mechanism for producer/consumer situations whereby one or more 
+		/// threads create data for one or more other threads to work on, such as is the case with a message queue. 
+		///
+		/// To use a condition variable to wait for resource, you Lock the Mutex for that resource, then (in a loop)
+		/// check and Wait on a condition variable that you associate with the mutex. Upon calling Wait, 
+		/// the Lock will be released so that other threads can adjust the resource. Upon return from Wait,
+		/// the Mutex is re-locked for the caller. To use a Condition to signal a change in something, you simply
+		/// call the Signal function. In the case of Signal(false), one blocking waiter will be released,
+		/// whereas with Signal(true), all blocking waiters will be released. Upon release of single or multiple
+		/// waiting threads, the Lock is contested for by all of them, so in the case or more than one waiter,
+		/// only one will immediately come away with ownership of the lock.
+		class EATHREADLIB_API Condition
+		{
+		public:
+			enum Result
+			{
+				kResultOK      =  0,
+				kResultError   = -1,
+				kResultTimeout = -2
+			};
+
+			/// Condition
+			/// For immediate default initialization, use no args.
+			/// For custom immediate initialization, supply a first argument. 
+			/// For deferred initialization, use Condition(NULL, false) then later call Init.
+			/// For deferred initialization of an array of objects, create an empty
+			/// subclass whose default constructor chains back to Condition(NULL, false).
+			Condition(const ConditionParameters* pConditionParameters = NULL, bool bDefaultParameters = true);
+
+			/// ~Condition
+			/// Destroys the Condition object. If any threads that are blocking while waiting on 
+			/// while the Condition is destroyed, the resulting behaviour is undefined.
+			~Condition();
+
+			/// Init
+			/// Initializes the Condition.
+			bool Init(const ConditionParameters* pConditionParameters);
+
+			/// Wait
+			/// Waits for the Condition with timeout. You must have a Mutex 
+			/// (that you conceptually associate with the resource) locked before
+			/// calling this function or else the resulting behaviour is undefined.
+			/// Within a while loop, check the resource state and call Wait if the 
+			/// necessary condition is not met.
+			///
+			/// The call to Wait associates the Condition with your mutex, so it can
+			/// then unlock the mutex/resource (allows another thread to fill the resource).
+			///
+			/// Upon non-error return of Wait, the mutex will be re-locked by the calling 
+			/// thread, even if the result is a timeout. Upon returning from wait, before 
+			/// doing any processing as a result of a Signal, your loop should always re-check
+			/// the resource state. The Posix Wait specification explicitly notes
+			/// that uncommon 'spurious wakeups' are possible and so should be tested
+			/// for. It impossible to test for a spurious wakeup from within this Wait
+			/// function, as this function can't know the resource state that caused the 
+			/// Signal to occur.
+			///
+			/// It should be noted that upon a kResultOK return from Wait, the user should
+			/// not assume that what the user was waiting on is still available. The signaling
+			/// of a Condition should be considered merely a hint to the waiter that the user
+			/// can probably proceed. Also, the user should usually call Wait only if the 
+			/// user has nothing to wait for; the user should check for this before calling Wait.
+			///
+			/// Note that the timeout is specified in absolute time and not relative time.
+			///
+			/// Note also that due to the way thread scheduling works -- particularly in a
+			/// time-sliced threading environment -- that the timeout value is a hint and 
+			/// the actual amount of time passed before the timeout occurs may be significantly
+			/// more or less than the specified timeout time.
+			///
+			Result Wait(Mutex* pMutex, const ThreadTime& timeoutAbsolute = kTimeoutNone);
+
+			/// Signal
+			/// Releases one or all waiters, depending on the input 'bBroadcast' argument.
+			/// The waiters will then contest for the Lock.
+			bool Signal(bool bBroadcast = false);
+
+			/// GetPlatformData
+			/// Returns the platform-specific data handle for debugging uses or 
+			/// other cases whereby special (and non-portable) uses are required.
+			void* GetPlatformData()
+				{ return &mConditionData; }
+
+		protected:
+			EAConditionData mConditionData;
+
+		private:
+			// Objects of this class are not copyable.
+			Condition(const Condition&){}
+			Condition& operator=(const Condition&){ return *this; }
+		};
+
+
+		/// ConditionFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class Condition.
+		/// A primary use of this would be to allow the Condition implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		class EATHREADLIB_API ConditionFactory
+		{
+		public:
+			static Condition* CreateCondition();                        // Internally implemented as: return new Condition;
+			static void       DestroyCondition(Condition* pCondition);  // Internally implemented as: delete pCondition;
+
+			static size_t     GetConditionSize();                       // Internally implemented as: return sizeof(Condition);
+			static Condition* ConstructCondition(void* pMemory);        // Internally implemented as: return new(pMemory) Condition;
+			static void       DestructCondition(Condition* pCondition); // Internally implemented as: pCondition->~Condition();
+		};
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+#if defined(EA_DLL) && defined(_MSC_VER)
+	// re-enable warning 4251 (it's a level-1 warning and should not be suppressed globally)
+	#pragma warning(pop)
+#endif
+
+
+
+#endif // EATHREAD_EATHREAD_CONDITION_H
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,797 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements a fast, user-space mutex. Also known as a lightweight mutex.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_FUTEX_H
+#define EATHREAD_EATHREAD_FUTEX_H
+
+#include <eathread/eathread.h>
+#include <eathread/eathread_atomic.h>
+#include <eathread/eathread_sync.h>
+#include <eathread/eathread_mutex.h>
+#include <stddef.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_MANUAL_FUTEX_ENABLED
+//
+// Defined as 0 or 1.
+// If enabled then Futex is implemented with atomics and semaphores instead of
+// via a direct system-supported lightweight mutex implementation.
+//
+#ifndef EATHREAD_MANUAL_FUTEX_ENABLED
+	#if defined(EA_PLATFORM_MICROSOFT)              // VC++ has CriticalSection, which is a futex.
+		#define EATHREAD_MANUAL_FUTEX_ENABLED 0     // Currently 0 because that's best. Can be set to 1.
+	#elif defined(EA_PLATFORM_SONY)
+		#define EATHREAD_MANUAL_FUTEX_ENABLED 0    // Allows us to have a spin count.        
+	#else
+		#define EATHREAD_MANUAL_FUTEX_ENABLED 1     // Set to 1 until we can resolve any dependencies such as PPMalloc.
+	#endif
+#endif
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_FUTEX_SPIN_COUNT
+//
+#ifndef EATHREAD_FUTEX_SPIN_COUNT
+	#define EATHREAD_FUTEX_SPIN_COUNT 256 
+#endif
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+/////////////////////////////////////////////////////////////////////////
+/// Futex data
+///
+/// This is used internally by class Futex.
+/// Note that we don't use an EAThread semaphore, as the direct semaphore
+/// we use here is more direct and avoid inefficiencies that result from 
+/// the possibility of EAThread semaphores being optimized for being 
+/// standalone.
+/// 
+#if !EA_THREADS_AVAILABLE
+	#define EA_THREAD_NONTHREADED_FUTEX 1
+
+	#if EATHREAD_MANUAL_FUTEX_ENABLED
+		struct EAFutexSemaphore
+		{
+			int mnCount;
+		};
+	#endif
+
+#elif EA_USE_CPP11_CONCURRENCY
+	EA_DISABLE_VC_WARNING(4265 4365 4836 4571 4625 4626 4628 4193 4127 4548)
+	#include <mutex>
+	EA_RESTORE_VC_WARNING()
+
+#elif defined(__APPLE__)
+	#if EATHREAD_MANUAL_FUTEX_ENABLED
+		#include <eathread/eathread_semaphore.h>
+		typedef EA::Thread::Semaphore EAFutexSemaphore;
+	#endif
+
+#elif defined(EA_PLATFORM_SONY)
+	#if EATHREAD_MANUAL_FUTEX_ENABLED
+		#include <kernel/semaphore.h>
+		#include <eathread/internal/timings.h>
+
+		typedef SceKernelSema EAFutexSemaphore;        
+	#endif
+
+#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+	#if EATHREAD_MANUAL_FUTEX_ENABLED
+		#include <semaphore.h>
+		typedef sem_t EAFutexSemaphore;
+	#endif
+
+#elif defined(EA_PLATFORM_MICROSOFT)
+
+	// We avoid #including heavy system headers, as this file is a common header itself.
+
+		extern "C"
+		{
+			#if defined(EA_COMPILER_GNUC)
+				// Mingw declares these slightly differently.
+				struct _CRITICAL_SECTION;
+				__declspec(dllimport) int           __stdcall InitializeCriticalSectionAndSpinCount(_CRITICAL_SECTION* pCriticalSection, unsigned long dwSpinCount);
+				__declspec(dllimport) void          __stdcall InitializeCriticalSection(_CRITICAL_SECTION* pCriticalSection);
+				__declspec(dllimport) void          __stdcall DeleteCriticalSection(_CRITICAL_SECTION* pCriticalSection);
+				__declspec(dllimport) void          __stdcall EnterCriticalSection(_CRITICAL_SECTION* pCriticalSection);
+				__declspec(dllimport) void          __stdcall LeaveCriticalSection(_CRITICAL_SECTION* pCriticalSection);
+				__declspec(dllimport) int           __stdcall TryEnterCriticalSection(_CRITICAL_SECTION* pCriticalSection);
+			#else
+				#if !defined _Must_inspect_result_ 
+					#define _Must_inspect_result_
+				#endif
+
+				struct _RTL_CRITICAL_SECTION;
+				__declspec(dllimport) _Must_inspect_result_ int           __stdcall InitializeCriticalSectionAndSpinCount(_Out_ _RTL_CRITICAL_SECTION* pCriticalSection, _In_ unsigned long dwSpinCount);
+				__declspec(dllimport) void          __stdcall InitializeCriticalSection(_Out_ _RTL_CRITICAL_SECTION* pCriticalSection);
+				__declspec(dllimport) void          __stdcall DeleteCriticalSection(_Inout_ _RTL_CRITICAL_SECTION* pCriticalSection);
+				__declspec(dllimport) void          __stdcall EnterCriticalSection(_Inout_ _RTL_CRITICAL_SECTION* pCriticalSection);
+				__declspec(dllimport) void          __stdcall LeaveCriticalSection(_Inout_ _RTL_CRITICAL_SECTION* pCriticalSection);
+				__declspec(dllimport) int           __stdcall TryEnterCriticalSection(_Inout_ _RTL_CRITICAL_SECTION* pCriticalSection);
+			#endif
+
+			__declspec(dllimport) unsigned long __stdcall GetCurrentThreadId();
+		}
+
+	#if EATHREAD_MANUAL_FUTEX_ENABLED
+		typedef void* EAFutexSemaphore; // void* instead of HANDLE to avoid #including windows headers.
+	#endif
+
+#else
+	#define EA_THREAD_NONTHREADED_FUTEX 1
+
+	#if EATHREAD_MANUAL_FUTEX_ENABLED
+		struct EAFutexSemaphore
+		{
+			int mnCount;
+		};
+	#endif
+#endif
+/////////////////////////////////////////////////////////////////////////
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		#if defined(_WIN64)
+			static const int FUTEX_PLATFORM_DATA_SIZE = 40; // CRITICAL_SECTION is 40 bytes on Win64.
+		#elif defined(_WIN32)
+			static const int FUTEX_PLATFORM_DATA_SIZE = 32; // CRITICAL_SECTION is 24 bytes on Win32 and 28 bytes on XBox 360.
+		#endif
+
+
+		/// class Futex
+		///
+		/// A Futex is a fast user-space mutex. It works by attempting to use
+		/// atomic integer updates for the common case whereby the mutex is
+		/// not already locked. If the mutex is already locked then the futex
+		/// drops down to waiting on a system-level semaphore. The result is 
+		/// that uncontested locking operations can be significantly faster 
+		/// than contested locks. Contested locks are slightly slower than in 
+		/// the case of a formal mutex, but usually not by much.
+		///
+		/// The Futex acts the same as a conventional mutex with respect to  
+		/// memory synchronization. Specifically: 
+		///     - A Lock or successful TryLock implies a read barrier (i.e. acquire).
+		///     - A second lock by the same thread implies no barrier.
+		///     - A failed TryLock implies no barrier.
+		///     - A final unlock by a thread implies a write barrier (i.e. release).
+		///     - A non-final unlock by a thread implies no barrier.
+		///
+		/// Futex limitations relative to Mutexes:
+		///     - Futexes cannot be inter-process.
+		///     - Futexes cannot be named.
+		///     - Futexes cannot participate in condition variables. A special 
+		///       condition variable could be made that works with them, though.
+		///     - Futex locks don't have timeouts. This could probably be
+		///       added with some work, though users generally shouldn't need timeouts. 
+		///
+		class EATHREADLIB_API Futex
+		{
+		public:
+			enum Result
+			{
+				kResultTimeout = -2
+			};
+
+			/// Futex
+			///
+			/// Creates a Futex. There are no creation options.
+			///
+			Futex();
+
+			/// ~Futex
+			///
+			/// Destroys an existing futex. The futex must not be locked by any thread
+			/// upon this call, otherwise the resulting behaviour is undefined.
+			///
+			~Futex();
+
+			/// TryLock
+			///
+			/// Tries to lock the futex; returns true if possible.
+			/// This function always returns immediately. It will return false if 
+			/// the futex is locked by another thread, and it will return true 
+			/// if the futex is not locked or is already locked by the current thread.
+			///
+			bool TryLock();
+
+			/// Lock
+			///
+			/// Locks the futex; returns the new lock count.
+			/// If the futex is locked by another thread, this call will block until
+			/// the futex is unlocked. If the futex is not locked or is locked by the
+			/// current thread, this call will return immediately.
+			///
+			void Lock();
+
+			/// Lock
+			///
+			/// Tries to lock the futex until the given time.
+			/// If the futex is locked by another thread, this call will block until
+			/// the futex is unlocked or the given time has passed. If the futex is not locked
+			/// or is locked by the current thread, this call will return immediately.
+			///
+			/// Return value:
+			///     kResultTimeout Timeout
+			///     > 0            The new lock count.
+			int Lock(const ThreadTime& timeoutAbsolute);
+
+			/// Unlock
+			///
+			/// Unlocks the futex. The futex must already be locked at least once by 
+			/// the calling thread. Otherwise the behaviour is not defined.
+			///
+			void Unlock();
+
+			/// GetLockCount
+			///
+			/// Returns the number of locks on the futex. The return value from this 
+			/// function is only reliable if the calling thread already has one lock on 
+			/// the futex. Otherwise the returned value may not represent actual value
+			/// at any point in time, as other threads lock or unlock the futex soon after the call.
+			///
+			int GetLockCount() const;
+
+			/// HasLock
+			/// Returns true if the current thread has the futex locked. 
+			bool HasLock() const;
+
+			/// SetSpinCount
+			/// Specifies how many times we spin while waiting to acquire the lock.
+			void SetSpinCount(Uint spinCount);
+
+		protected:
+			#if EATHREAD_MANUAL_FUTEX_ENABLED
+				void CreateFSemaphore();
+				void DestroyFSemaphore();
+				void SignalFSemaphore();
+				void WaitFSemaphore();
+				bool WaitFSemaphore(const ThreadTime& timeoutAbsolute);
+				void OnLockAcquired(ThreadUniqueId threadUniqueId);
+			#endif
+
+		private:
+			// Objects of this class are not copyable.
+			Futex(const Futex&){}
+			Futex& operator=(const Futex&){ return *this; }
+
+		protected:
+			#if EATHREAD_MANUAL_FUTEX_ENABLED
+				AtomicUWord      mUseCount;         /// Not the same thing as lock count, as waiters increment this value.
+				uint16_t         mRecursionCount;   /// The number of times the lock-owning thread has the mutex. This is currently uint16_t for backward compatibility with PPMalloc.
+				uint16_t         mSpinCount;        /// The number of times we spin while waiting for the lock.   To do: Change these to be uint32_t once PPMalloc is no longer dependent on this.
+				ThreadUniqueId   mThreadUniqueId;   /// Unique id for owning thread; not necessarily same as type ThreadId.
+				EAFutexSemaphore mSemaphore;        /// OS-level semaphore that waiters wait on when lock attempts failed.
+			#else
+
+				#if EA_USE_CPP11_CONCURRENCY
+					std::recursive_timed_mutex mMutex;
+					int mnLockCount;
+					std::thread::id mLockingThread;
+				#elif defined(EA_COMPILER_MSVC) && defined(EA_PLATFORM_MICROSOFT) // In the case of Microsoft platforms, we just use CRITICAL_SECTION, as it is essentially a futex.
+					// We use raw structure math because otherwise we'd expose the user to system headers, 
+					// which breaks code and bloats builds. We validate our math in eathread_futex.cpp.
+					#if defined(_WIN64)
+						uint64_t mCRITICAL_SECTION[FUTEX_PLATFORM_DATA_SIZE / sizeof(uint64_t)];
+					#else
+						uint64_t mCRITICAL_SECTION[FUTEX_PLATFORM_DATA_SIZE / sizeof(uint64_t)];
+					#endif
+				#elif defined(EA_PLATFORM_SONY)
+					EA::Thread::Mutex mMutex;
+					Uint mSpinCount;
+				#else
+					#define EAT_FUTEX_USE_MUTEX 1
+					EA::Thread::Mutex mMutex;
+				#endif
+			#endif
+		};
+
+
+
+		/// FutexFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class Futex.
+		/// A primary use of this would be to allow the Futex implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		///
+		class EATHREADLIB_API FutexFactory
+		{
+		public:
+			static Futex*  CreateFutex();                    // Internally implemented as: return new Futex;
+			static void    DestroyFutex(Futex* pFutex);      // Internally implemented as: delete pFutex;
+
+			static size_t  GetFutexSize();                   // Internally implemented as: return sizeof(Futex);
+			static Futex*  ConstructFutex(void* pMemory);    // Internally implemented as: return new(pMemory) Futex;
+			static void    DestructFutex(Futex* pFutex);     // Internally implemented as: pFutex->~Futex();
+		};
+
+
+
+		/// class AutoFutex
+		/// An AutoFutex locks the Futex in its constructor and 
+		/// unlocks the Futex in its destructor (when it goes out of scope).
+		class EATHREADLIB_API AutoFutex
+		{
+		public:
+			AutoFutex(Futex& futex);
+		   ~AutoFutex();
+
+		protected:
+			Futex& mFutex;
+
+			// Prevent copying by default, as copying is dangerous.
+			AutoFutex(const AutoFutex&);
+			const AutoFutex& operator=(const AutoFutex&);
+		};
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Inlines
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EAFutexReadBarrier
+//
+// For futexes, which are intended to be used only in user-space and without 
+// talking to IO devices, DMA memory, or uncached memory, we directly use
+// memory barriers.
+	#define EAFutexReadBarrier      EAReadBarrier
+	#define EAFutexWriteBarrier     EAWriteBarrier
+	#define EAFutexReadWriteBarrier EAReadWriteBarrier
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		#if EATHREAD_MANUAL_FUTEX_ENABLED
+
+			inline Futex::Futex()
+			  : mUseCount(0), 
+				mRecursionCount(0),
+				mSpinCount(EATHREAD_FUTEX_SPIN_COUNT),
+				mThreadUniqueId(kThreadUniqueIdInvalid),
+				mSemaphore()
+			{
+				CreateFSemaphore();
+			}
+
+
+			inline Futex::~Futex()
+			{
+				EAT_ASSERT(mUseCount == 0);
+
+				DestroyFSemaphore();
+			}
+		
+		
+			inline void Futex::OnLockAcquired(ThreadUniqueId threadUniqueId)
+			{
+				EAFutexReadBarrier();
+				mThreadUniqueId = threadUniqueId;
+				mRecursionCount = 1;
+			}
+
+
+			inline bool Futex::TryLock()
+			{
+				ThreadUniqueId threadUniqueId;
+				EAThreadGetUniqueId(threadUniqueId);
+
+				if(mUseCount.SetValueConditional(1, 0)) // If we could acquire the lock... (set it to 1 if it's 0)
+				{
+					OnLockAcquired(threadUniqueId);
+					return true;
+				}
+
+				// This only happens in the case of recursion on the same thread
+				// This is threadsafe because the only case where this equality passes
+				// is when this value was set on this thread anyway.
+				if(EATHREAD_LIKELY(mThreadUniqueId == threadUniqueId)) // If it turns out that we already have the lock...
+				{
+					++mUseCount;
+					++mRecursionCount;
+					return true;
+				}
+
+				return false;
+			}
+
+
+			inline void Futex::Lock()
+			{
+				ThreadUniqueId threadUniqueId;
+				EAThreadGetUniqueId(threadUniqueId);
+
+				if(mSpinCount) // If we have spinning enabled (usually true)...
+				{
+					if(mUseCount.SetValueConditional(1, 0)) // If we could acquire the lock... (set it to 1 if it's 0)
+					{
+						OnLockAcquired(threadUniqueId);
+						return;
+					}
+
+					if(mThreadUniqueId != threadUniqueId) // Don't spin if we already have the lock.
+					{
+						for(Uint count = mSpinCount; count > 0; count--) // Implement a spin lock for a number of tries.
+						{
+							// We use GetValueRaw calls below instead of atomics because we don't want atomic behavior.
+							if(mUseCount.GetValueRaw() > 1) // If there are multiple waiters, don't bother spinning any more, as they are already spinning themselves.
+								break;
+
+							if(mUseCount.GetValueRaw() == 0) // If it looks like the lock is now free, try to acquire it.
+							{
+								if(mUseCount.SetValueConditional(1, 0)) // If we could acquire the lock... (set it to 1 if it's 0)
+								{
+									OnLockAcquired(threadUniqueId);
+									return;
+								}
+							}
+
+							EAProcessorPause();
+						}
+					}
+				}
+
+				if(++mUseCount > 1) // If we could not get the lock (previous value of mUseCount was >= 1 and not 0) or we already had the lock...
+				{
+					if(mThreadUniqueId == threadUniqueId) // If we already have the lock...
+					{
+						mRecursionCount++;
+						return;
+					}
+					WaitFSemaphore(); 
+				}
+				// Else the increment was from 0 to 1, and we own the lock.
+				OnLockAcquired(threadUniqueId);
+			}
+
+
+
+
+			inline int Futex::Lock(const ThreadTime& timeoutAbsolute)
+			{
+				if(timeoutAbsolute == kTimeoutNone)
+				{
+					Lock();
+					return (int)mRecursionCount;
+				}
+				else if(timeoutAbsolute == kTimeoutImmediate)
+				{
+					if(TryLock())
+						return (int)mRecursionCount;
+					else
+						return kResultTimeout;
+				}
+				else
+				{
+					ThreadUniqueId threadUniqueId;
+					EAThreadGetUniqueId(threadUniqueId);
+
+					if(++mUseCount > 1) // If we could not get the lock (previous value of mUseCount was >= 1 and not 0) or we already had the lock...
+					{
+						if(mThreadUniqueId == threadUniqueId) // If we already have the lock...
+							return (int)++mRecursionCount;
+
+						if(!WaitFSemaphore(timeoutAbsolute))
+						{
+							--mUseCount;
+							return kResultTimeout;
+						}
+					}
+					// Else the increment was from 0 to 1, and we own the lock.
+					OnLockAcquired(threadUniqueId);
+					return 1;  // Return mRecursionCount.
+				}
+			}
+
+
+			inline void Futex::Unlock()
+			{
+				#if EAT_ASSERT_ENABLED
+					ThreadUniqueId threadUniqueId;
+					EAThreadGetUniqueId(threadUniqueId);
+					EAT_ASSERT(mThreadUniqueId == threadUniqueId);
+					EAT_ASSERT((mRecursionCount > 0) && (mUseCount > 0));
+				#endif
+
+				if(EATHREAD_LIKELY(--mRecursionCount == 0))
+				{
+					mThreadUniqueId = kThreadUniqueIdInvalid;
+
+					// after the decrement below we will no longer own the lock
+					EAFutexWriteBarrier();
+					if(EATHREAD_UNLIKELY(--mUseCount > 0))
+						SignalFSemaphore();
+				}
+				else
+				{
+					// this thread still owns the lock, was recursive
+					--mUseCount;
+				}
+			}
+
+
+			inline int Futex::GetLockCount() const
+			{
+				// No atomic operation or memory barrier required, as this function only
+				// has validity if it is being called from the lock-owning thread. However,
+				// we don't at this time choose to assert that mThreadUniqueId == GetThreadId().
+				return (int)mRecursionCount;
+			}
+
+
+			inline bool Futex::HasLock() const
+			{
+				ThreadUniqueId threadUniqueId;
+				EAThreadGetUniqueId(threadUniqueId);
+
+				return (mThreadUniqueId == threadUniqueId);
+			}
+
+
+			inline void Futex::SetSpinCount(Uint spinCount)
+			{
+				mSpinCount = spinCount;
+			}
+
+		#else // #if EATHREAD_MANUAL_FUTEX_ENABLED
+
+			#if EA_USE_CPP11_CONCURRENCY
+
+				inline Futex::Futex() : mnLockCount(0) {}
+
+				inline Futex::~Futex() { EAT_ASSERT(!GetLockCount()); }
+
+				inline bool Futex::TryLock() 
+				{ 
+					if (mMutex.try_lock())
+					{
+						EAT_ASSERT(mnLockCount >= 0);
+						EAT_ASSERT(mnLockCount == 0 || mLockingThread == std::this_thread::get_id());
+						++mnLockCount;
+						mLockingThread = std::this_thread::get_id();
+						return true;
+					}
+
+					return false;
+				}
+
+				inline void Futex::Lock() { mMutex.lock(); mLockingThread = std::this_thread::get_id(); ++mnLockCount; }
+
+				inline int Futex::Lock(const ThreadTime& timeoutAbsolute)
+				{
+					if (timeoutAbsolute == kTimeoutNone)
+					{
+						if (!mMutex.try_lock())
+						{
+							return kResultTimeout;
+						}
+					}
+					else
+					{
+						std::chrono::milliseconds timeoutAbsoluteMs(timeoutAbsolute);
+						std::chrono::time_point<std::chrono::system_clock> timeout_time(timeoutAbsoluteMs);
+						if (!mMutex.try_lock_until(timeout_time))
+						{
+							return kResultTimeout;
+						}
+					}
+
+					EAT_ASSERT(mnLockCount >= 0);
+					EAT_ASSERT(mnLockCount == 0 || mLockingThread == std::this_thread::get_id());
+					mLockingThread = std::this_thread::get_id();
+					return ++mnLockCount; // This is safe to do because we have the lock.
+				}
+
+				inline void Futex::Unlock()
+				{
+					EAT_ASSERT(HasLock());
+					--mnLockCount;
+					if (mnLockCount == 0)
+						mLockingThread = std::thread::id();
+					mMutex.unlock();
+				}
+
+				inline int Futex::GetLockCount() const { return mnLockCount; }
+
+				inline bool Futex::HasLock() const 
+				{ 
+					if ((mnLockCount > 0) && (std::this_thread::get_id() == mLockingThread))
+						return true;
+					return false;
+				}  
+
+				inline void Futex::SetSpinCount(Uint)
+				{
+					// Not supported
+				}
+
+			#elif defined(EA_COMPILER_MSVC) && defined(EA_PLATFORM_MICROSOFT) // Win32, Win64, etc.
+
+				inline Futex::Futex()
+				{
+					// We use InitializeCriticalSectionAndSpinCount, as that has resulted in improved performance in practice on multiprocessors systems.
+					int rv = InitializeCriticalSectionAndSpinCount((_RTL_CRITICAL_SECTION*)mCRITICAL_SECTION, EATHREAD_FUTEX_SPIN_COUNT);
+					EAT_ASSERT(rv != 0);
+					EA_UNUSED(rv);
+				}
+
+				inline Futex::~Futex()
+				{
+					EAT_ASSERT(!GetLockCount());
+					DeleteCriticalSection((_RTL_CRITICAL_SECTION*)mCRITICAL_SECTION);
+				}
+
+				inline bool Futex::TryLock()
+				{
+					return TryEnterCriticalSection((_RTL_CRITICAL_SECTION*)mCRITICAL_SECTION) != 0;
+				}
+
+				inline void Futex::Lock()
+				{
+					EnterCriticalSection((_RTL_CRITICAL_SECTION*)mCRITICAL_SECTION);
+				}
+
+				inline int Futex::Lock(const ThreadTime& timeoutAbsolute)
+				{
+					if(timeoutAbsolute == kTimeoutNone)
+					{
+						Lock();
+						return GetLockCount();
+					}
+					else if(timeoutAbsolute == kTimeoutImmediate)
+					{
+						if(TryLock())
+							return GetLockCount();
+						else
+							return kResultTimeout;
+					}
+					else
+					{
+						while(!TryLock())
+						{
+							if(GetThreadTime() >= timeoutAbsolute)
+								return kResultTimeout;
+							ThreadSleep(1);
+						}
+						return GetLockCount();
+					}
+				}
+
+				inline void Futex::Unlock()
+				{
+					EAT_ASSERT(HasLock());
+					LeaveCriticalSection((_RTL_CRITICAL_SECTION*)mCRITICAL_SECTION);
+				} 
+
+				inline int Futex::GetLockCount() const
+				{
+					// Return the RecursionCount member of RTL_CRITICAL_SECTION.
+
+					// We use raw structure math because otherwise we'd expose the user to system headers, 
+					// which breaks code and bloats builds. We validate our math in eathread_futex.cpp.
+					#if defined(_WIN64)
+						return *((int*)mCRITICAL_SECTION + 3); 
+					#else
+						return *((int*)mCRITICAL_SECTION + 2);
+					#endif
+				}
+
+				inline bool Futex::HasLock() const
+				{
+					// Check the OwningThread member of RTL_CRITICAL_SECTION.
+
+					// We use raw structure math because otherwise we'd expose the user to system headers, 
+					// which breaks code and bloats builds. We validate our math in eathread_futex.cpp.
+					#if defined(_WIN64)
+						return (*((uint32_t*)mCRITICAL_SECTION + 4) == (uintptr_t)GetCurrentThreadId());
+					#else
+						return (*((uint32_t*)mCRITICAL_SECTION + 3) == (uintptr_t)GetCurrentThreadId());
+					#endif
+				}
+
+				inline void Futex::SetSpinCount(Uint)
+				{
+					// Not supported
+				}
+
+			#elif defined(EAT_FUTEX_USE_MUTEX)
+
+				inline Futex::Futex()
+				  { }
+
+				inline Futex::~Futex()
+				  { }
+
+				inline bool Futex::TryLock()
+				  { return mMutex.Lock(EA::Thread::kTimeoutImmediate) > 0; }
+
+				inline void Futex::Lock()
+				  { mMutex.Lock(); }
+
+				inline int Futex::Lock(const ThreadTime& timeoutAbsolute)
+				  { return mMutex.Lock(timeoutAbsolute); }
+
+				inline void Futex::Unlock()
+				  { mMutex.Unlock(); }
+
+				inline int Futex::GetLockCount() const
+				  { return mMutex.GetLockCount(); }
+
+				inline bool Futex::HasLock() const
+				  { return mMutex.HasLock(); }
+
+				inline void Futex::SetSpinCount(Uint)
+				  { }
+
+			#endif // _MSC_VER
+
+		#endif // EATHREAD_MANUAL_FUTEX_ENABLED
+
+
+
+		inline AutoFutex::AutoFutex(Futex& futex) 
+		  : mFutex(futex)
+		{
+			mFutex.Lock();
+		}
+
+		inline AutoFutex::~AutoFutex()
+		{
+			mFutex.Unlock();
+		}
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+#endif // EATHREAD_EATHREAD_FUTEX_H
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,323 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// This is a small templated list implementation which suffices for our 
+// purposes but is not optimal. It is present in order to avoid dependencies
+// on external libraries.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_LIST_H
+#define EATHREAD_EATHREAD_LIST_H
+
+
+#include <eathread/internal/config.h>
+#include <eathread/eathread.h>
+#include <stddef.h> // size_t, etc.
+#include <new>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+
+		namespace details
+		{
+			/// Default allocator implementation used by the simple_list class
+			template<typename T>
+			struct ListDefaultAllocatorImpl
+			{
+				template<typename OT>
+				struct rebind { typedef ListDefaultAllocatorImpl<OT> other; };
+
+				T* construct()
+				{
+					Allocator* pAllocator = GetAllocator();
+
+					if(pAllocator)
+						return new(pAllocator->Alloc(sizeof(T))) T;
+					else
+						return new T;
+				}
+
+				void destroy(T* obj)
+				{
+					Allocator* pAllocator = GetAllocator();
+
+					if(pAllocator)
+					{
+						obj->~T();
+						pAllocator->Free(obj);
+					}
+					else
+						delete obj;
+				}
+			};
+		}
+
+
+		/// Simple version of an STL bidirectional list.
+		/// Implemented to avoid dependency on container implementations.
+		///
+		/// This implementation has some non-stl standard methods like find. 
+		///            
+		template<typename T, class Allocator = details::ListDefaultAllocatorImpl<T> >
+		class simple_list
+		{
+			simple_list(const simple_list&);
+			simple_list& operator=(const simple_list&);
+
+		protected:
+			struct list_node
+			{
+				T          mValue;
+				list_node* mpPrev;
+				list_node* mpNext;
+			};
+
+			typedef list_node node_t;
+			typedef typename  Allocator::template rebind<list_node>::other allocator_t;
+			
+			allocator_t      mAllocator;
+			node_t*          mpNodeHead;
+			node_t*          mpNodeTail;
+			size_t           mnSize;
+
+		public:
+			typedef T        value_type;              //< list value type
+			typedef const T  const_value_type;        //< constant list value type
+			typedef const T& const_value_ref_type;    //< constant reference list value type
+			
+			struct         const_iterator;
+			struct         iterator;
+			friend  struct const_iterator;
+			friend  struct iterator;
+
+
+			struct const_iterator
+			{
+				friend class simple_list<T>;
+
+				const_iterator()
+					: mpNode(NULL)
+				{ }
+
+				const_iterator(const const_iterator& rhs)
+					: mpNode(rhs.mpNode)
+				{ }
+
+				const_iterator& operator=(const const_iterator& rhs)
+				{
+					mpNode = rhs.mpNode;
+					return *this;
+				}
+
+				const T& operator*() const
+					{ return mpNode->mValue; }
+
+				const T* operator->() const
+					{ return &**this; }  
+					 
+				bool operator==(const const_iterator& rhs) const
+					{ return rhs.mpNode == mpNode; }
+
+				bool operator!=(const const_iterator& rhs) const
+					{ return rhs.mpNode != mpNode; }
+
+				const_iterator& operator++()
+				{
+					mpNode = mpNode->mpNext;
+					return *this;
+				}
+
+			protected:
+				const node_t* mpNode;
+
+			protected:
+				const_iterator(node_t* pNode)
+					: mpNode(pNode)
+				{ }
+
+				const_iterator& operator=(const node_t* pNode)
+				{
+					mpNode = pNode;
+					return *this;
+				}
+			}; // const_iterator
+
+
+
+			struct iterator : public const_iterator
+			{
+				friend class simple_list<T>;
+
+				iterator()
+					: const_iterator(){ }
+
+				iterator(const const_iterator& rhs)
+					: const_iterator(rhs)
+				{ }
+
+				iterator& operator=(const const_iterator& rhs)
+				{
+					*static_cast<const_iterator*>(this)= rhs;
+					return *this;
+				}
+
+				T& operator*() const
+					{ return const_cast<T&>(**static_cast<const const_iterator*>(this)); }
+
+				T& operator->() const
+					{ return const_cast<T*>(&**static_cast<const const_iterator*>(this)); }
+
+				iterator& operator++()
+				{
+					++(*static_cast<const_iterator*>(this));
+					return *this;
+				}
+
+			protected:
+				iterator(node_t* pNode)
+					: const_iterator(pNode)
+				{ }
+
+				iterator& operator=(node_t* pNode)
+				{
+					const_cast<node_t*>(*this) = pNode;
+					return *this;
+				}
+			}; // iterator
+
+
+
+			simple_list()
+				: mnSize(0)
+			{
+				mpNodeHead         = mAllocator.construct();
+				mpNodeTail         = mAllocator.construct();
+				mpNodeHead->mpNext = mpNodeTail;
+				mpNodeHead->mpPrev = mpNodeTail;
+				mpNodeTail->mpNext = mpNodeHead;
+				mpNodeTail->mpPrev = mpNodeHead;
+			}
+
+			~simple_list()
+			{
+				clear();
+				mAllocator.destroy(mpNodeHead);
+				mAllocator.destroy(mpNodeTail);
+			}
+
+			bool empty() const
+				{ return mpNodeHead->mpNext == mpNodeTail; }
+
+			void push_back(const T& value)
+			{
+				node_t* const pNode   = mAllocator.construct();
+				pNode->mValue         = value;
+				pNode->mpPrev         = mpNodeTail->mpPrev;                        
+				pNode->mpNext         = mpNodeTail;
+				pNode->mpPrev->mpNext = pNode;
+				mpNodeTail->mpPrev    = pNode;
+				++mnSize;
+			}
+
+			void push_front(const T& value)
+			{
+				node_t* const pNode = mAllocator.construct();
+				pNode->mValue       = value;
+				pNode->mpPrev       = mpNodeHead;
+				pNode->mpNext       = mpNodeHead->mpNext;
+				mpNodeHead->mpNext  = pNode;
+				++mnSize;
+			}
+
+			void pop_front()
+			{
+				if(!empty())
+				{
+					node_t* const pNode   = mpNodeHead->mpNext;
+					mpNodeHead->mpNext    = pNode->mpNext;
+					pNode->mpNext->mpPrev = mpNodeHead;
+					mAllocator.destroy(pNode);
+					--mnSize;
+				}
+			}
+
+			size_t size() const
+				{ return mnSize; }
+
+			iterator erase(iterator& iter)
+			{
+				if(!empty())
+				{
+					node_t* const pNext = iter.mpNode->mpNext;
+					iter.mpNode->mpNext->mpPrev = iter.mpNode->mpPrev;
+					iter.mpNode->mpPrev->mpNext = iter.mpNode->mpNext;
+					--mnSize;
+					mAllocator.destroy(const_cast<node_t*>(iter.mpNode));
+					return pNext;
+				}
+				return end();
+			}
+
+			void clear()
+			{
+				if(!empty())
+				{
+					node_t* pNode = mpNodeHead->mpNext;
+
+					while(pNode != mpNodeTail)
+					{
+						node_t* const pNext   = pNode->mpNext;
+						pNode->mpNext->mpPrev = pNode->mpPrev;
+						pNode->mpPrev->mpNext = pNext;
+						mAllocator.destroy(pNode);
+						pNode = pNext;
+					}
+					mnSize = 0;
+				}
+			}
+
+			T& front() const
+				{ return mpNodeHead->mpNext->mValue; }
+
+			const const_iterator begin() const
+				{ return mpNodeHead->mpNext; }
+
+			const const_iterator end() const
+				{ return mpNodeTail; }
+
+			/// returns end()if not found
+			iterator find(const T& element)
+			{
+				iterator iter = begin();
+				while((iter != end()) && !(element == *iter))
+					 ++iter;
+				return iter;
+			}
+
+		}; // simple_list
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+#endif // EATHREAD_EATHREAD_LIST_H
+
+
+
+
+
+
+
+
@@ -0,0 +1,341 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements a lightweight mutex.
+/////////////////////////////////////////////////////////////////////////////
+
+// TODO(rparolin):  Consider adding support for static thread safety analysis.
+// https://clang.llvm.org/docs/ThreadSafetyAnalysis.html
+
+
+#ifndef EATHREAD_EATHREAD_MUTEX_H
+#define EATHREAD_EATHREAD_MUTEX_H
+
+#if defined(_MSC_VER)
+#include <math.h>   // #include math.h because VC++ has a header file but that requires math.h to be #included before some other headers, lest you get a warning.
+#endif
+#include <stddef.h>
+#include <eathread/internal/config.h>
+#include <eathread/eathread.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+/////////////////////////////////////////////////////////////////////////
+/// EAMutexData
+///
+/// This is used internally by class Mutex.
+/// Todo: Consider moving this declaration into a platform-specific 
+/// header file.
+/// 
+#if !EA_THREADS_AVAILABLE
+	#define EA_THREAD_NONTHREADED_MUTEX 1
+
+	struct EAMutexData
+	{
+		int mnLockCount;
+
+		EAMutexData();
+	};
+
+#elif EA_USE_CPP11_CONCURRENCY
+	EA_DISABLE_ALL_VC_WARNINGS()
+	#include <mutex>
+	EA_RESTORE_ALL_VC_WARNINGS()
+
+	#if defined EA_PLATFORM_MICROSOFT
+		#ifdef CreateMutex
+			#undef CreateMutex // Windows #defines CreateMutex to CreateMutexA or CreateMutexW.
+		#endif
+	#endif
+
+	struct EAMutexData
+	{
+		std::recursive_timed_mutex mMutex;
+		int mnLockCount;
+		#if EAT_ASSERT_ENABLED
+			EA::Thread::ThreadId mThreadId; // This value is only valid in debug builds.
+		#endif
+
+		EAMutexData();
+
+	private:
+		EAMutexData(const EAMutexData&);
+		EAMutexData& operator=(const EAMutexData&);
+	};
+
+#elif defined(EA_PLATFORM_SONY)
+	#include <kernel.h>
+	#include <eathread/internal/timings.h>
+
+	struct EAMutexData
+	{
+		ScePthreadMutex mMutex;
+		int mnLockCount;
+		#if EAT_ASSERT_ENABLED
+			EA::Thread::ThreadId mThreadId;    // This value is only valid in debug builds.
+		#endif
+
+		EAMutexData();
+		void SimulateLock(bool bLock);
+	};
+
+#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+	#include <pthread.h>
+
+	#if defined(EA_PLATFORM_WINDOWS)
+		#ifdef CreateMutex
+			#undef CreateMutex // Windows #defines CreateMutex to CreateMutexA or CreateMutexW.
+		#endif
+	#endif
+
+	struct EAMutexData
+	{
+		pthread_mutex_t mMutex;
+		int mnLockCount;
+		#if EAT_ASSERT_ENABLED
+			EA::Thread::ThreadId mThreadId;    // This value is only valid in debug builds.
+		#endif
+
+		EAMutexData();
+		void SimulateLock(bool bLock);
+	};
+
+#elif defined(EA_PLATFORM_MICROSOFT) && !EA_POSIX_THREADS_AVAILABLE
+
+	#ifdef EA_PROCESSOR_X86_64
+		static const int MUTEX_PLATFORM_DATA_SIZE = 40; // CRITICAL_SECTION is 40 bytes on Win64.
+	#else
+		static const int MUTEX_PLATFORM_DATA_SIZE = 32; // CRITICAL_SECTION is 24 bytes on Win32, 28 bytes on XBox 360.
+	#endif
+
+	#ifdef CreateMutex
+		#undef CreateMutex // Windows #defines CreateMutex to CreateMutexA or CreateMutexW.
+	#endif
+
+	struct EATHREADLIB_API EAMutexData
+	{
+		uint64_t mData[MUTEX_PLATFORM_DATA_SIZE / sizeof(uint64_t)]; // Holds either CRITICAL_SECTION or HANDLE if mbIntraProcess is true or false, respectively.
+		int      mnLockCount;
+		bool     mbIntraProcess;
+		#if EAT_ASSERT_ENABLED
+			EA::Thread::ThreadId mThreadId;    // This value is only valid in debug builds.
+			EA::Thread::SysThreadId mSysThreadId; // This value is only valid in debug builds.
+		#endif
+
+		EAMutexData();
+	};
+
+#else
+	#define EA_THREAD_NONTHREADED_MUTEX 1
+
+	struct EAMutexData
+	{
+		int mnLockCount;
+
+		EAMutexData();
+	};
+
+
+
+#endif
+/////////////////////////////////////////////////////////////////////////
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// MutexParameters
+		/// Specifies mutex settings.
+		struct EATHREADLIB_API MutexParameters
+		{
+			bool mbIntraProcess; /// True if the mutex is intra-process, else inter-process.
+			char mName[128];      /// Mutex name, applicable only to platforms that recognize named synchronization objects.
+
+			MutexParameters(bool bIntraProcess = true, const char* pName = NULL);
+		};
+
+
+		/// class Mutex
+		///
+		/// Mutex are assumed to always be 'recursive', meaning that a given thread 
+		/// can lock the mutex more than once. If you want a specifically non-recursive 
+		/// mutex, you can use a semaphore with a lock count of 1.
+		class EATHREADLIB_API Mutex
+		{
+		public:
+			enum Result
+			{
+				kResultError   = -1,
+				kResultTimeout = -2
+			};
+
+			/// Mutex
+			/// For immediate default initialization, use no args.
+			/// For custom immediate initialization, supply a first argument. 
+			/// For deferred initialization, use Mutex(NULL, false) then later call Init.
+			/// For deferred initialization of an array of objects, create an empty
+			/// subclass whose default constructor chains back to Mutex(NULL, false).
+			Mutex(const MutexParameters* pMutexParameters = NULL, bool bDefaultParameters = true);
+
+			/// ~Mutex
+			/// Destroys an existing mutex. The mutex must not be locked by any thread, 
+			/// otherwise the resulting behaviour is undefined.
+			~Mutex();
+
+			/// Init
+			/// Initializes the mutex if not done so in the constructor.
+			/// This should only be called in the case that this class was constructed 
+			/// with RWMutex(NULL, false).
+			bool Init(const MutexParameters* pMutexParameters);
+
+			/// Lock
+			/// Locks the mutex, with a timeout specified. This function will
+			/// return immediately if the mutex is not locked or if the calling
+			/// thread already has it locked at least once. If the mutex is 
+			/// locked by another thread, this function will block until the mutex
+			/// is unlocked by the owning thread or until the timeout time has
+			/// passed. This function may return before the specified timeout has passed
+			/// and so should not be implicitly used as a timer. Some platforms may 
+			/// return immediately if the timeout is specified as anything but kTimeoutNone.
+			///
+			/// Note that the timeout is specified in absolute time and not relative time.
+			///
+			/// Note also that due to the way thread scheduling works -- particularly in a
+			/// time-sliced threading environment -- that the timeout value is a hint and 
+			/// the actual amount of time passed before the timeout occurs may be significantly
+			/// more or less than the specified timeout time.
+			///
+			/// Return value:
+			///     kResultError   Error
+			///     kResultTimeout Timeout
+			///     > 0            The new lock count.
+			int Lock(const ThreadTime& timeoutAbsolute = EA::Thread::kTimeoutNone);
+
+			/// Unlock
+			/// Unlocks the mutex. The mutex must already be locked at least once by 
+			/// the calling thread. Otherwise the behaviour is not defined.
+			/// Return value is the lock count value immediately upon unlock.
+			int Unlock();
+
+			/// GetLockCount
+			/// Returns the number of locks on the mutex. The return value from this 
+			/// function is only reliable if the calling thread already has one lock on 
+			/// the critical section. Otherwise the value could be changing as other 
+			/// threads lock or unlock the mutex soon after the call.
+			/// This function is useful in debugging and asserting and useful for backing
+			/// out of recursive locks under the case of exceptions and other abortive 
+			/// situations. This function will not necessarily call memory synchronization 
+			/// primitives (e.g. ReadBarrier) itself on systems that require SMP synchronization.
+			int GetLockCount() const;
+
+
+			/// HasLock
+			/// Returns true if the current thread has the mutex locked. 
+			/// This function is reliable only in a debug build whereby 
+			/// EAT_ASSERT_ENABLED is defined to 1. This function can thus
+			/// only be used in debugging situations whereby you want to 
+			/// assert that you have a mutex locked or not. To make this 
+			/// function work in a non-debug environment would necessitate
+			/// adding an undesirable amount of code and data.
+			bool HasLock() const;
+
+			/// GetPlatformData
+			/// Returns the platform-specific data handle for debugging uses or 
+			/// other cases whereby special (and non-portable) uses are required.
+			void* GetPlatformData()
+				{ return &mMutexData; }
+
+		protected:
+			EAMutexData mMutexData;
+
+		private:
+			// Objects of this class are not copyable.
+			Mutex(const Mutex&){}
+			Mutex& operator=(const Mutex&){ return *this; }
+		};
+
+
+
+		/// MutexFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class Mutex.
+		/// A primary use of this would be to allow the Mutex implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		class EATHREADLIB_API MutexFactory
+		{
+		public:
+			static Mutex*  CreateMutex();                    // Internally implemented as: return new Mutex;
+			static void    DestroyMutex(Mutex* pMutex);      // Internally implemented as: delete pMutex;
+
+			static size_t  GetMutexSize();                   // Internally implemented as: return sizeof(Mutex);
+			static Mutex*  ConstructMutex(void* pMemory);    // Internally implemented as: return new(pMemory) Mutex;
+			static void    DestructMutex(Mutex* pMutex);     // Internally implemented as: pMutex->~Mutex();
+		};
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class AutoMutex
+		/// An AutoMutex locks the Mutex in its constructor and 
+		/// unlocks the Mutex in its destructor (when it goes out of scope).
+		class EATHREADLIB_API AutoMutex
+		{
+		public:
+			inline AutoMutex(Mutex& mutex) 
+				: mMutex(mutex)
+				{ mMutex.Lock(); }
+
+			inline ~AutoMutex()
+				{ mMutex.Unlock(); }
+
+		protected:
+			Mutex& mMutex;
+
+			// Prevent copying by default, as copying is dangerous.
+			AutoMutex(const AutoMutex&);
+			const AutoMutex& operator=(const AutoMutex&);
+		};
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+#endif // EATHREAD_EATHREAD_MUTEX_H
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,302 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements a classic thread pool.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_POOL_H
+#define EATHREAD_EATHREAD_POOL_H
+
+
+#ifndef EATHREAD_EATHREAD_THREAD_H
+	#include <eathread/eathread_thread.h>
+#endif
+#ifndef EATHREAD_EATHREAD_CONDITION_H
+	#include <eathread/eathread_condition.h>
+#endif
+#ifndef EATHREAD_EATHREAD_ATOMIC_H
+	#include <eathread/eathread_atomic.h>
+#endif
+#ifndef EATHREAD_EATHREAD_LIST_H
+	#include <eathread/eathread_list.h>
+#endif
+#include <stddef.h>
+
+
+#if defined(EA_DLL) && defined(_MSC_VER)
+	// Suppress warning about class 'EA::Thread::simple_list<T>' needs to have
+	// dll-interface to be used by clients of class which have a templated member.
+	// 
+	// These templates cannot be instantiated outside of the DLL. If you try, a
+	// link error will result. This compiler warning is intended to notify users
+	// of this.
+	#pragma warning(push)
+	#pragma warning(disable: 4251)
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+/////////////////////////////////////////////////////////////////////////////
+// EA_THREAD_POOL_MAX_SIZE
+//
+// Defines the maximum number of threads the pool can have.
+// Currently we have a limit of at most N threads in a pool, in order to 
+// simplify memory management issues.
+//
+#ifndef EA_THREAD_POOL_MAX_SIZE
+	#define EA_THREAD_POOL_MAX_SIZE 16
+#endif
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// ThreadPoolParameters
+		/// Specifies how a thread pool is initialized
+		struct EATHREADLIB_API ThreadPoolParameters
+		{
+			unsigned         mnMinCount;                /// Default is kDefaultMinCount.
+			unsigned         mnMaxCount;                /// Default is kDefaultMaxCount.
+			unsigned         mnInitialCount;            /// Default is kDefaultInitialCount
+			ThreadTime       mnIdleTimeoutMilliseconds; /// Default is kDefaultIdleTimeout. This is a relative time, not an absolute time. Can be a millisecond value or Thread::kTimeoutNone or Thread::kTimeoutImmediate.
+			unsigned         mnProcessorMask;           /// Default is 0xffffffff. Controls which processors we are allowed to create threads on. Default is all processors.
+			ThreadParameters mDefaultThreadParameters;  /// Currently only the mnStackSize, mnPriority, and mpName fields from ThreadParameters are used.
+
+			ThreadPoolParameters();
+
+		private:
+			// Prevent default generation of these functions by not defining them
+			ThreadPoolParameters(const ThreadPoolParameters& rhs);               // copy constructor
+			ThreadPoolParameters& operator=(const ThreadPoolParameters& rhs);    // assignment operator
+		};
+
+
+		/// class ThreadPool
+		/// 
+		/// Implements a conventional thread pool. Thread pools are useful for situations where
+		/// thread creation and destruction is common and the application speed would improve
+		/// by using pre-made threads that are ready to execute. 
+		class EATHREADLIB_API ThreadPool
+		{
+		public:
+			enum Default
+			{
+				kDefaultMinCount      = 0,
+				kDefaultMaxCount      = 4,
+				kDefaultInitialCount  = 0,
+				kDefaultIdleTimeout   = 60000, // Milliseconds
+				kDefaultProcessorMask = 0xffffffff
+			};
+
+			enum Result
+			{
+				kResultOK       =  0,
+				kResultError    = -1,
+				kResultTimeout  = -2,
+				kResultDeferred = -3
+			};
+
+			enum JobWait
+			{
+				kJobWaitNone,    /// Wait for no jobs to complete, including those currently running.
+				kJobWaitCurrent, /// Wait for currently proceeding jobs to complete but not those that haven't started.
+				kJobWaitAll      /// Wait for all jobs to complete, including those that haven't yet begun.
+			};
+
+			/// ThreadPool
+			/// For immediate default initialization, use no args.
+			/// For custom immediate initialization, supply a first argument. 
+			/// For deferred initialization, use ThreadPool(NULL, false) then later call Init.
+			/// For deferred initialization of an array of objects, create an empty
+			/// subclass whose default constructor chains back to ThreadPool(NULL, false).
+			ThreadPool(const ThreadPoolParameters* pThreadPoolParameters = NULL, bool bDefaultParameters = true);
+
+			/// ~ThreadPool
+			/// Destroys the thread pool. Waits for any busy threads to complete.
+		   ~ThreadPool();
+
+			/// Init
+			/// Initializes the thread pool with given characteristics. If the thread pool is 
+			/// already initialized, this updates the settings.
+			bool Init(const ThreadPoolParameters* pThreadPoolParameters);
+
+			/// Shutdown
+			/// Disables the thread pool, waits for busy threads to complete, destroys all threads.
+			///
+			/// If bWaitForAllJobs is true, then Shutdown will wait until all jobs, including
+			/// jobs that haven't been started yet, to complete. Otherwise, only currently 
+			/// proceeding jobs will be completed. 
+			///
+			/// Note that the timeout is specified in absolute time and not relative time.
+			///
+			/// Note also that due to the way thread scheduling works -- particularly in a
+			/// time-sliced threading environment -- that the timeout value is a hint and 
+			/// the actual amount of time passed before the timeout occurs may be significantly
+			/// more or less than the specified timeout time.
+			///
+			bool Shutdown(JobWait jobWait = kJobWaitAll, const ThreadTime& timeoutAbsolute = kTimeoutNone);
+
+			/// Begin
+			/// Starts a thread from the pool with the given parameters. 
+			/// Returns kResultError or a job id of >= kResultOK. A return of kResultDeferred is 
+			/// possible if the number of active threads is greater or equal to the max count.
+			/// If input ppThread is non-NULL and return value is >= kResultOK, the returned thread
+			/// will be the thread used for the job. Else the returned thread pointer will be NULL.
+			/// If input bEnabledDeferred is false but the max count of active theads has been 
+			/// reached, a new thread is nevertheless created.
+			int Begin(IRunnable*       pRunnable, void* pContext = NULL, Thread** ppThread = NULL, bool bEnableDeferred = false);
+			int Begin(RunnableFunction pFunction, void* pContext = NULL, Thread** ppThread = NULL, bool bEnableDeferred = false);
+
+			/// WaitForJobCompletion
+			/// Waits for an individual job or for all jobs (job id of -1) to complete. 
+			/// If a job id is given which doesn't correspond to any existing job, 
+			/// the job is assumed to have been completed and the wait completes immediately.
+			/// If new jobs are added while the wait is occurring, this function will wait
+			/// for those jobs to complete as well. jobWait is valid only if nJob is -1.
+			/// Note that the timeout is specified in absolute time and not relative time.
+			/// Returns one of enum Result.
+			int WaitForJobCompletion(int nJob = -1, JobWait jobWait = kJobWaitAll, const ThreadTime& timeoutAbsolute = kTimeoutNone);
+
+			/// Pause
+			/// Enables or disables the activation of threads from the pool. 
+			/// When paused, calls to Begin will return kResultDeferred instead of kResultOK.
+			void Pause(bool bPause);
+
+			/// Locks the thread pool thread list.
+			void Lock();
+			void Unlock();
+
+			struct Job
+			{
+				int              mnJobID;       /// Unique job id.
+				IRunnable*       mpRunnable;    /// User-supplied IRunnable. This is an alternative to mpFunction.
+				RunnableFunction mpFunction;    /// User-supplied function. This is an alternative to mpRunnable.
+				void*            mpContext;     /// User-supplied context.
+
+				Job();
+			};
+
+			struct ThreadInfo
+			{
+				volatile bool mbActive;         /// True if the thread is currently busy working on a job.
+				volatile bool mbQuit;           /// If set to true then this thread should quit at the next opportunity.
+			  //bool          mbPersistent;     /// If true then this thread is never quit at runtime. False by default.
+				Thread*       mpThread;         /// The Thread itself.
+				ThreadPool*   mpThreadPool;     /// The ThreadPool that owns this thread.
+				Job           mCurrentJob;      /// The most recent job a thread is or was working on.
+
+				ThreadInfo();
+			};
+
+			/// AddThread
+			/// Adds a new thread with the given ThreadParameters.
+			/// The return value is not safe to use unless this function is called
+			/// and the result used within a Lock/Unlock pair.
+			/// It's the user's responsibility to supply ThreadParameters that are sane.
+			/// If bBeginThread is true, then the Thread is started via a call to 
+			/// pThreadInfo->mpThread->Begin(ThreadFunction, pThreadInfo, &tp);
+			/// Otherwise the user is expected to manually start the thread.
+			ThreadInfo* AddThread(const ThreadParameters& tp, bool bBeginThread);
+
+			// Gets the ThreadInfo for the nth Thread identified by index. 
+			// You must call this function and use the info within a Lock/Unlock pair 
+			// on the thread pool.
+			ThreadInfo* GetThreadInfo(int index);
+
+			// Unless you call this function while the Pool is locked (via Lock), the return
+			// value may be out of date by the time you read it. 
+			int GetThreadCount();
+
+		protected:
+			typedef EA::Thread::simple_list<Job>         JobList;
+			typedef EA::Thread::simple_list<ThreadInfo*> ThreadInfoList;
+
+			// Member functions
+			static intptr_t ThreadFunction(void* pContext);
+			ThreadInfo*     CreateThreadInfo();
+			void            SetupThreadParameters(ThreadParameters& tp);
+			void            AdjustThreadCount(unsigned nCount);
+			Result          QueueJob(const Job& job, Thread** ppThread, bool bEnableDeferred);
+			void            AddThread(ThreadInfo* pThreadInfo);
+			void            RemoveThread(ThreadInfo* pThreadInfo);
+			void            FixThreads();
+
+			// Member data
+			bool                mbInitialized;              // 
+			uint32_t            mnMinCount;                 // Min number of threads to have available.
+			uint32_t            mnMaxCount;                 // Max number of threads to have available.
+			AtomicInt32         mnCurrentCount;             // Current number of threads available.
+			AtomicInt32         mnActiveCount;              // Current number of threads busy with jobs.
+			ThreadTime          mnIdleTimeoutMilliseconds;  // Timeout before quitting threads that have had no jobs.
+			uint32_t            mnProcessorMask;            // If mask is not 0xffffffff then we manually round-robin assign processors.
+			uint32_t            mnProcessorCount;           // The number of processors currently present.
+			uint32_t            mnNextProcessor;            // Used if we are manually round-robin assigning processors. 
+			AtomicInt32         mnPauseCount;               // A positive value means we pause working on jobs.
+			AtomicInt32         mnLastJobID;                // 
+			ThreadParameters    mDefaultThreadParameters;   // 
+			Condition           mThreadCondition;           // Manages signalling mJobList.
+			Mutex               mThreadMutex;               // Guards manipulation of mThreadInfoList and mJobList.
+			ThreadInfoList      mThreadInfoList;            // List of threads in our pool.
+			JobList             mJobList;                   // List of waiting jobs.
+
+		private:
+			// Prevent default generation of these functions by not defining them
+			ThreadPool(const ThreadPool& rhs);               // copy constructor
+			ThreadPool& operator=(const ThreadPool& rhs);    // assignment operator
+		};
+
+
+
+		/// ThreadPoolFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class ThreadPool.
+		/// A primary use of this would be to allow the ThreadPool implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		class EATHREADLIB_API ThreadPoolFactory
+		{
+		public:
+			static ThreadPool*  CreateThreadPool();                          // Internally implemented as: return new ThreadPool;
+			static void         DestroyThreadPool(ThreadPool* pThreadPool);  // Internally implemented as: delete pThreadPool;
+
+			static size_t       GetThreadPoolSize();                         // Internally implemented as: return sizeof(ThreadPool);
+			static ThreadPool*  ConstructThreadPool(void* pMemory);          // Internally implemented as: return new(pMemory) ThreadPool;
+			static void         DestructThreadPool(ThreadPool* pThreadPool); // Internally implemented as: pThreadPool->~ThreadPool();
+		};
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+#if defined(EA_DLL) && defined(_MSC_VER)
+	// re-enable warning 4251 (it's a level-1 warning and should not be suppressed globally)
+	#pragma warning(pop)
+#endif
+
+
+#endif // EATHREAD_EATHREAD_POOL_H
+
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,221 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements a lightweight mutex with multiple reads but single writer.
+// This allows for high performance systems whereby the consumers of data
+// are more common than the producers of data.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_RWMUTEX_H
+#define EATHREAD_EATHREAD_RWMUTEX_H
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+/////////////////////////////////////////////////////////////////////////
+/// EARWMutexData
+///
+/// This is used internally by class RWMutex.
+/// Todo: Consider moving this declaration into a platform-specific 
+/// header file.
+/// 
+	#include <eathread/eathread_mutex.h>
+	#include <eathread/eathread_condition.h>
+
+	struct EATHREADLIB_API EARWMutexData
+	{
+		int                   mnReadWaiters;
+		int                   mnWriteWaiters;
+		int                   mnReaders;
+		EA::Thread::ThreadId  mThreadIdWriter;
+		EA::Thread::Mutex     mMutex;
+		EA::Thread::Condition mReadCondition;
+		EA::Thread::Condition mWriteCondition;
+
+		EARWMutexData();
+
+	private:
+		// Prevent default generation of these functions by declaring but not defining them.
+		EARWMutexData(const EARWMutexData& rhs);               // copy constructor
+		EARWMutexData& operator=(const EARWMutexData& rhs);    // assignment operator
+	};
+/////////////////////////////////////////////////////////////////////////
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// RWMutexParameters
+		/// Specifies rwlock settings.
+		struct EATHREADLIB_API RWMutexParameters
+		{
+			bool mbIntraProcess; /// True if the mutex is intra-process, else inter-process.
+			char mName[16];      /// Mutex name, applicable only to platforms that recognize named synchronization objects.
+
+			RWMutexParameters(bool bIntraProcess = true, const char* pName = NULL);
+		};
+
+
+		/// class RWMutex
+		/// Implements a multiple reader / single writer mutex.
+		/// This allows for significantly higher performance when data to be protected
+		/// is read much more frequently than written. In this case, a waiting writer
+		/// gets top priority and all new readers block after a waiter starts waiting.
+		class EATHREADLIB_API RWMutex
+		{
+		public:
+			enum Result
+			{
+				kResultError   = -1,
+				kResultTimeout = -2
+			};
+
+			enum LockType
+			{
+				kLockTypeNone  = 0,
+				kLockTypeRead  = 1,
+				kLockTypeWrite = 2
+			};
+
+			/// RWMutex
+			/// For immediate default initialization, use no args.
+			/// For custom immediate initialization, supply a first argument. 
+			/// For deferred initialization, use RWMutex(NULL, false) then later call Init.
+			/// For deferred initialization of an array of objects, create an empty
+			/// subclass whose default constructor chains back to RWMutex(NULL, false).
+			RWMutex(const RWMutexParameters* pRWMutexParameters = NULL, bool bDefaultParameters = true);
+
+			/// ~RWMutex
+			/// Destroys an existing mutex. The mutex must not be locked by any thread, 
+			/// otherwise the resulting behaviour is undefined.
+			~RWMutex();
+
+			/// Init
+			/// Initializes the mutex if not done so in the constructor.
+			/// This should only be called in the case that this class was constructed 
+			/// with RWMutex(NULL, false).
+			bool Init(const RWMutexParameters* pRWMutexParameters);
+
+			/// Lock
+			/// Returns the new lock count for the given lock type.
+			///
+			/// Note that the timeout is specified in absolute time and not relative time.
+			///
+			/// Note also that due to the way thread scheduling works -- particularly in a
+			/// time-sliced threading environment -- that the timeout value is a hint and 
+			/// the actual amount of time passed before the timeout occurs may be significantly
+			/// more or less than the specified timeout time.
+			///
+			int Lock(LockType lockType, const ThreadTime& timeoutAbsolute = EA::Thread::kTimeoutNone);
+
+			/// Unlock
+			/// Unlocks the mutex. The mutex must already be locked by  the 
+			/// calling thread. Otherwise the behaviour is not defined.
+			/// Return value is the lock count value immediately upon unlock
+			/// or is one of enum Result.
+			int Unlock();
+
+			/// GetLockCount
+			int GetLockCount(LockType lockType);
+
+			/// GetPlatformData
+			/// Returns the platform-specific data handle for debugging uses or 
+			/// other cases whereby special (and non-portable) uses are required.
+			void* GetPlatformData()
+				{ return &mRWMutexData; }
+
+		protected:
+			EARWMutexData mRWMutexData;
+
+		private:
+			// Objects of this class are not copyable.
+			RWMutex(const RWMutex&){}
+			RWMutex& operator=(const RWMutex&){ return *this; }
+		};
+
+
+		/// RWMutexFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class RWMutex.
+		/// A primary use of this would be to allow the RWMutex implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		class EATHREADLIB_API RWMutexFactory
+		{
+		public:
+			static RWMutex*  CreateRWMutex();                       // Internally implemented as: return new RWMutex;
+			static void      DestroyRWMutex(RWMutex* pRWMutex);     // Internally implemented as: delete pRWMutex;
+
+			static size_t    GetRWMutexSize();                      // Internally implemented as: return sizeof(RWMutex);
+			static RWMutex*  ConstructRWMutex(void* pMemory);       // Internally implemented as: return new(pMemory) RWMutex;
+			static void      DestructRWMutex(RWMutex* pRWMutex);    // Internally implemented as: pRWMutex->~RWMutex();
+		};
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class AutoRWMutex
+		/// An AutoRWMutex locks the RWMutex in its constructor and 
+		/// unlocks the AutoRWMutex in its destructor (when it goes out of scope).
+		class AutoRWMutex
+		{
+		public:
+			AutoRWMutex(RWMutex& mutex, RWMutex::LockType lockType) 
+				: mMutex(mutex)
+				{  mMutex.Lock(lockType); }
+
+		  ~AutoRWMutex()
+				{  mMutex.Unlock(); }
+
+		protected:
+			RWMutex& mMutex;
+
+			// Prevent copying by default, as copying is dangerous.
+			AutoRWMutex(const AutoRWMutex&);
+			const AutoRWMutex& operator=(const AutoRWMutex&);
+		};
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+#endif // EATHREAD_EATHREAD_RWMUTEX_H
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,430 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements an interprocess mutex with multiple reads but single writer.
+// This allows for high performance systems whereby the consumers of mpData
+// are more common than the producers of mpData.
+/////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef EATHREAD_EATHREAD_RWMUTEX_IP_H
+#define EATHREAD_EATHREAD_RWMUTEX_IP_H
+
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+#include <new>
+#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
+	#pragma warning(push, 0)
+	#include <Windows.h>
+	#pragma warning(pop)
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+#ifdef _MSC_VER
+	#pragma warning(push)           // We have to be careful about disabling this warning. Sometimes the warning is meaningful; sometimes it isn't.
+	#pragma warning(disable: 4251)  // class (some template) needs to have dll-interface to be used by clients.
+	#pragma warning(disable: 6054)  // String 'argument 2' might not be zero-terminated
+#endif
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
+
+			template<typename T>
+			class Shared
+			{
+			public:
+				Shared();
+				Shared(const char* pName);
+			   ~Shared();
+
+				bool Init(const char* pName);
+				void Shutdown();
+				bool IsNew() const { return mbCreated; }
+				T*   operator->()  { return static_cast<T*>(mpData); }
+
+			protected:
+				uint32_t& GetRefCount();
+
+				Shared(const Shared&);
+				Shared& operator=(const Shared&);
+
+			protected:
+				HANDLE mMapping;
+				void*  mpData;
+				bool   mbCreated;
+				char   mName[32];
+				T*     mpT;         // For debug purposes only.
+			};
+
+
+			template <typename T>
+			inline Shared<T>::Shared()
+			  : mMapping(NULL)
+			  , mpData(NULL)
+			  , mbCreated(false)
+			  , mpT(NULL)
+			{
+			}
+
+
+			template <typename T>
+			inline Shared<T>::Shared(const char* pName)
+			  : mMapping(NULL)
+			  , mpData(NULL)
+			  , mbCreated(false)
+			  , mpT(NULL)
+			{
+				Init(pName);
+			}
+
+
+			template <typename T>
+			inline Shared<T>::~Shared()
+			{
+				Shutdown();
+			}
+
+
+			template <typename T>
+			inline bool Shared<T>::Init(const char* pName)
+			{
+				bool bReturnValue = false;
+
+				if(pName)
+					strncpy(mName, pName, sizeof(mName));
+				else
+					mName[0] = 0;
+				mName[sizeof(mName) - 1] = 0;
+		 
+				char mutexName[sizeof(mName) + 16];
+				strcpy(mutexName, mName);
+				strcat(mutexName, ".SharedMutex");
+				HANDLE hMutex = CreateMutexA(NULL, FALSE, mutexName);
+				EAT_ASSERT(hMutex != NULL);
+				if(hMutex != NULL)
+				{
+					WaitForSingleObject(hMutex, INFINITE); // This lock should always be fast, as it belongs to us and we only hold onto it very temporarily.
+
+					const size_t kDataSize = sizeof(T) + 8; // Add bytes so that we can store a ref-count of our own after the mpData. 
+					mMapping = CreateFileMappingA(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, kDataSize, mName);
+
+					if(mMapping)
+					{
+						mbCreated = (GetLastError() != ERROR_ALREADY_EXISTS);
+						mpData    = MapViewOfFile(mMapping, FILE_MAP_ALL_ACCESS, 0, 0, kDataSize);
+
+						uint32_t& refCount = GetRefCount(); // The ref count is stored at the end of the mapped data.
+
+						if(mbCreated)           // If we were the first one to create this, then construct it.
+						{
+							new(mpData) T;
+							refCount = 1;
+						}
+						else
+							refCount++;
+
+						mpT = static_cast<T*>(mpData); // For debug purposes only.
+
+						bReturnValue = true;
+					}
+
+					ReleaseMutex(hMutex);
+					CloseHandle(hMutex);
+				}
+
+				return bReturnValue;
+			}
+
+
+			template <typename T>
+			inline void Shared<T>::Shutdown()
+			{
+				char mutexName[sizeof(mName) + 16];
+				strcpy(mutexName, mName);
+				strcat(mutexName, ".SharedMutex");
+				HANDLE hMutex = CreateMutexA(NULL, FALSE, mutexName);
+				EAT_ASSERT(hMutex != NULL);
+				if(hMutex != NULL)
+				{
+					WaitForSingleObject(hMutex, INFINITE); // This lock should always be fast, as it belongs to us and we only hold onto it very temporarily.
+
+					if(mMapping)
+					{
+						if(mpData)
+						{
+							uint32_t& refCount = GetRefCount(); // The ref count is stored at the end of the mapped data.
+
+							if(refCount == 1)                   // If we are the last to use it, 
+								static_cast<T*>(mpData)->~T();
+							else
+								refCount--;
+
+							UnmapViewOfFile(mpData);
+							mpData = NULL;
+						}
+
+						CloseHandle(mMapping);
+						mMapping = 0;
+					}
+
+					ReleaseMutex(hMutex);
+					CloseHandle(hMutex);
+				} 
+			}
+
+			template <typename T>
+			inline uint32_t& Shared<T>::GetRefCount()
+			{
+				// There will be space after T because we allocated it in Init.
+				uint32_t* pData32 = (uint32_t*)(((uintptr_t)mpData + sizeof(T) + 3) & ~3); // Round up to next 32 bit boundary.
+				return *pData32;
+			}
+
+		#else
+
+			template<typename T>
+			class Shared
+			{
+			public:
+				Shared()               { }
+				Shared(const char*)    { }
+
+				bool Init(const char*) { return true; }
+				void Shutdown()        { }
+				bool IsNew() const     { return true; }
+				T*   operator->()      { return &mT; }
+
+				T mT;
+			};
+
+		#endif // #if defined(EA_PLATFORM_WINDOWS)
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/////////////////////////////////////////////////////////////////////////
+		/// EARWMutexIPData
+		///
+		#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
+
+			struct EATHREADLIB_API SharedData
+			{
+				int   mnReadWaiters;
+				int   mnWriteWaiters;
+				int   mnReaders;
+				DWORD mThreadIdWriter;      // Need to use a thread id instead of a thread handle.
+
+				SharedData() : mnReadWaiters(0), mnWriteWaiters(0), mnReaders(0), mThreadIdWriter(EA::Thread::kSysThreadIdInvalid) { }
+			};
+
+			struct EATHREADLIB_API EARWMutexIPData
+			{
+				Shared<SharedData> mSharedData;
+				HANDLE             mMutex;
+				HANDLE             mReadSemaphore;
+				HANDLE             mWriteSemaphore;
+
+				EARWMutexIPData();
+			   ~EARWMutexIPData();
+
+				bool Init(const char* pName);
+				void Shutdown();
+
+			private:
+				EARWMutexIPData(const EARWMutexIPData&);
+				EARWMutexIPData& operator=(const EARWMutexIPData&);
+			};
+
+		#else
+
+			struct EATHREADLIB_API EARWMutexIPData
+			{
+				EARWMutexIPData(){}
+
+			private:
+				EARWMutexIPData(const EARWMutexIPData&);
+				EARWMutexIPData& operator=(const EARWMutexIPData&);
+			};
+
+		#endif
+
+
+		/// RWMutexParameters
+		struct EATHREADLIB_API RWMutexIPParameters
+		{
+			bool mbIntraProcess; /// True if the mutex is intra-process, else inter-process.
+			char mName[16];      /// Mutex name, applicable only to platforms that recognize named synchronization objects.
+
+			RWMutexIPParameters(bool bIntraProcess = true, const char* pName = NULL);
+		};
+
+
+		/// class RWMutexIP
+		/// Implements an interprocess multiple reader / single writer mutex.
+		/// This allows for significantly higher performance when mpData to be protected
+		/// is read much more frequently than written. In this case, a waiting writer
+		/// gets top priority and all new readers block after a waiter starts waiting.
+		class EATHREADLIB_API RWMutexIP
+		{
+		public:
+			enum Result
+			{
+				kResultError   = -1,
+				kResultTimeout = -2
+			};
+
+			enum LockType
+			{
+				kLockTypeNone  = 0,
+				kLockTypeRead  = 1,
+				kLockTypeWrite = 2
+			};
+
+			/// RWMutexIP
+			/// For immediate default initialization, use no args.
+			/// For custom immediate initialization, supply a first argument. 
+			/// For deferred initialization, use RWMutexIP(NULL, false) then later call Init.
+			/// For deferred initialization of an array of objects, create an empty
+			/// subclass whose default constructor chains back to RWMutexIP(NULL, false).
+			RWMutexIP(const RWMutexIPParameters* pRWMutexIPParameters = NULL, bool bDefaultParameters = true);
+
+			/// ~RWMutexIP
+			/// Destroys an existing mutex. The mutex must not be locked by any thread, 
+			/// otherwise the resulting behaviour is undefined.
+			~RWMutexIP();
+
+			/// Init
+			/// Initializes the mutex if not done so in the constructor.
+			/// This should only be called in the case that this class was constructed 
+			/// with RWMutexIP(NULL, false).
+			bool Init(const RWMutexIPParameters* pRWMutexIPParameters);
+
+			/// Lock
+			/// Returns the new lock count for the given lock type.
+			///
+			/// Note that the timeout is specified in absolute time and not relative time.
+			///
+			/// Note also that due to the way thread scheduling works -- particularly in a
+			/// time-sliced threading environment -- that the timeout value is a hint and 
+			/// the actual amount of time passed before the timeout occurs may be significantly
+			/// more or less than the specified timeout time.
+			///
+			int Lock(LockType lockType, const ThreadTime& timeoutAbsolute = EA::Thread::kTimeoutNone);
+
+			/// Unlock
+			/// Unlocks the mutex. The mutex must already be locked by  the 
+			/// calling thread. Otherwise the behaviour is not defined.
+			/// Return value is the lock count value immediately upon unlock
+			/// or is one of enum Result.
+			int Unlock();
+
+			/// GetLockCount
+			int GetLockCount(LockType lockType);
+
+			/// GetPlatformData
+			/// Returns the platform-specific mpData handle for debugging uses or 
+			/// other cases whereby special (and non-portable) uses are required.
+			void* GetPlatformData()
+				{ return &mRWMutexIPData; }
+
+		protected:
+			EARWMutexIPData mRWMutexIPData;
+
+		private:
+			// Objects of this class are not copyable.
+			RWMutexIP(const RWMutexIP&){}
+			RWMutexIP& operator=(const RWMutexIP&){ return *this; }
+		};
+
+
+		/// RWMutexIPFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class RWMutexIP.
+		/// A primary use of this would be to allow the RWMutexIP implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		class EATHREADLIB_API RWMutexIPFactory
+		{
+		public:
+			static RWMutexIP*  CreateRWMutexIP();                         // Internally implemented as: return new RWMutexIP;
+			static void        DestroyRWMutexIP(RWMutexIP* pRWMutex);     // Internally implemented as: delete pRWMutex;
+
+			static size_t      GetRWMutexIPSize();                        // Internally implemented as: return sizeof(RWMutexIP);
+			static RWMutexIP*  ConstructRWMutexIP(void* pMemory);         // Internally implemented as: return new(pMemory) RWMutexIP;
+			static void        DestructRWMutexIP(RWMutexIP* pRWMutex);    // Internally implemented as: pRWMutex->~RWMutexIP();
+		};
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class AutoRWMutexIP
+		/// An AutoRWMutex locks the RWMutexIP in its constructor and 
+		/// unlocks the AutoRWMutex in its destructor (when it goes out of scope).
+		class AutoRWMutexIP
+		{
+		public:
+			AutoRWMutexIP(RWMutexIP& mutex, RWMutexIP::LockType lockType) 
+				: mMutex(mutex)
+				{  mMutex.Lock(lockType); }
+
+		  ~AutoRWMutexIP()
+				{  mMutex.Unlock(); }
+
+		protected:
+			RWMutexIP& mMutex;
+
+			// Prevent copying by default, as copying is dangerous.
+			AutoRWMutexIP(const AutoRWMutexIP&);
+			const AutoRWMutexIP& operator=(const AutoRWMutexIP&);
+		};
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+#ifdef _MSC_VER
+	#pragma warning(pop)
+#endif
+
+
+#endif // EATHREAD_EATHREAD_RWMUTEX_IP_H
+
+
+
+
+
+
+
@@ -0,0 +1,253 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+//---------------------------------------------------------
+// For conditions of distribution and use, see
+// https://github.com/preshing/cpp11-on-multicore/blob/master/LICENSE
+//---------------------------------------------------------
+
+#ifndef EATHREAD_EATHREAD_RWSEMALOCK_H
+#define EATHREAD_EATHREAD_RWSEMALOCK_H
+
+#include "eathread_atomic.h"
+#include "eathread_semaphore.h"
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+namespace EA
+{
+	namespace Thread
+	{
+		//---------------------------------------------------------
+		// RWSemaLock
+		//---------------------------------------------------------
+		class RWSemaLock
+		{
+		public:
+			RWSemaLock() : mStatus(0) {}
+			RWSemaLock(const RWSemaLock&) = delete;
+			RWSemaLock(RWSemaLock&&) = delete;
+			RWSemaLock& operator=(const RWSemaLock&) = delete;
+			RWSemaLock& operator=(RWSemaLock&&) = delete;
+
+			void ReadLock()
+			{
+				Status oldStatus, newStatus;
+				do
+				{
+					oldStatus.data = mStatus.GetValue();
+					newStatus.data = oldStatus.data;
+
+					if (oldStatus.writers > 0)
+					{
+						newStatus.waitToRead++;
+					}
+					else
+					{
+						newStatus.readers++;
+					}
+					// CAS until successful. On failure, oldStatus will be updated with the latest value.
+				}
+				while (!mStatus.SetValueConditional(newStatus.data, oldStatus.data));
+
+				if (oldStatus.writers > 0)
+				{
+					mReadSema.Wait();
+				}
+			}
+
+			bool ReadTryLock()
+			{
+				Status oldStatus, newStatus;
+				do
+				{
+					oldStatus.data = mStatus.GetValue();
+					newStatus.data = oldStatus.data;
+
+					if (oldStatus.writers > 0)
+					{
+						return false;
+					}
+					else
+					{
+						newStatus.readers++;
+					}
+					// CAS until successful. On failure, oldStatus will be updated with the latest value.
+				}
+				while (!mStatus.SetValueConditional(newStatus.data, oldStatus.data));
+
+				return true;
+			}
+
+			void ReadUnlock()
+			{
+				Status oldStatus;
+				oldStatus.data = mStatus.Add(-Status::kIncrementRead) + Status::kIncrementRead;
+
+				EAT_ASSERT(oldStatus.readers > 0);
+				if (oldStatus.readers == 1 && oldStatus.writers > 0)
+				{
+					mWriteSema.Post();
+				}
+			}
+
+			void WriteLock()
+			{
+				Status oldStatus;
+				oldStatus.data = mStatus.Add(Status::kIncrementWrite) - Status::kIncrementWrite;
+				EAT_ASSERT(oldStatus.writers + 1 <= Status::kMaximum);
+				if (oldStatus.readers > 0 || oldStatus.writers > 0)
+				{
+					mWriteSema.Wait();
+				}
+			}
+
+			bool WriteTryLock()
+			{
+				Status oldStatus, newStatus;
+				do
+				{
+					oldStatus.data = mStatus.GetValue();
+					newStatus.data = oldStatus.data;
+
+					if (oldStatus.writers > 0 || oldStatus.readers > 0)
+					{
+						return false;
+					}
+					else
+					{
+						newStatus.writers++;
+					}
+					// CAS until successful. On failure, oldStatus will be updated with the latest value.
+				}
+				while (!mStatus.SetValueConditional(newStatus.data, oldStatus.data));
+
+				return true;
+			}
+
+			void WriteUnlock()
+			{
+				uint32_t waitToRead = 0;
+				Status oldStatus, newStatus;
+				do
+				{
+					oldStatus.data = mStatus.GetValue();
+					EAT_ASSERT(oldStatus.readers == 0);
+					newStatus.data = oldStatus.data;
+					newStatus.writers--;
+					waitToRead = oldStatus.waitToRead;
+					if (waitToRead > 0)
+					{
+						newStatus.waitToRead = 0;
+						newStatus.readers = waitToRead;
+					}
+					// CAS until successful. On failure, oldStatus will be updated with the latest value.
+				}
+				while (!mStatus.SetValueConditional(newStatus.data, oldStatus.data));
+
+				if (waitToRead > 0)
+				{
+					mReadSema.Post(waitToRead);
+				}
+				else if (oldStatus.writers > 1)
+				{
+					mWriteSema.Post();
+				}
+			}
+
+			// NOTE(rparolin): 
+			// Since the RWSemaLock uses atomics to update its status flags before blocking on a semaphore, you cannot
+			// rely on the answer the IsReadLocked/IsWriteLocked gives you.  It's at a best a guess and you can't rely
+			// on it for any kind of validation checks which limits its usefulness.  In addition, the original
+			// implementation from Preshing does not include such functionality. 
+			//
+			// bool IsReadLocked() {...}
+			// bool IsWriteLocked() {...}
+
+		protected:
+			EA_DISABLE_VC_WARNING(4201) // warning C4201: nonstandard extension used: nameless struct/union
+			union Status
+			{
+				enum
+				{
+					kIncrementRead			= 1,
+					kIncrementWaitToRead	= 1 << 10,
+					kIncrementWrite			= 1 << 20,
+					kMaximum				= (1 << 10) - 1,
+				};
+
+				struct 
+				{
+					int readers		: 10; // 10-bits = 1024
+					int waitToRead	: 10;
+					int writers		: 10;
+					int pad			: 2;
+				};
+
+				int data;
+			};
+			EA_RESTORE_VC_WARNING()
+
+			AtomicInt32 mStatus;
+			Semaphore mReadSema;  // semaphores are non-copyable
+			Semaphore mWriteSema; // semaphores are non-copyable
+		};
+
+
+		//---------------------------------------------------------
+		// ReadLockGuard
+		//---------------------------------------------------------
+		class AutoSemaReadLock
+		{
+		private:
+			RWSemaLock& m_lock;
+
+		public:
+			AutoSemaReadLock(const AutoSemaReadLock&) = delete;
+			AutoSemaReadLock(AutoSemaReadLock&&) = delete;
+			AutoSemaReadLock& operator=(const AutoSemaReadLock&) = delete;
+			AutoSemaReadLock& operator=(AutoSemaReadLock&&) = delete;
+
+			AutoSemaReadLock(RWSemaLock& lock) : m_lock(lock)
+			{
+				m_lock.ReadLock();
+			}
+
+			~AutoSemaReadLock()
+			{
+				m_lock.ReadUnlock();
+			}
+		};
+
+
+		//---------------------------------------------------------
+		// WriteLockGuard
+		//---------------------------------------------------------
+		class AutoSemaWriteLock
+		{
+		private:
+			RWSemaLock& m_lock;
+
+		public:
+			AutoSemaWriteLock(const AutoSemaWriteLock&) = delete;
+			AutoSemaWriteLock(AutoSemaWriteLock&&) = delete;
+			AutoSemaWriteLock& operator=(const AutoSemaWriteLock&) = delete;
+			AutoSemaWriteLock& operator=(AutoSemaWriteLock&&) = delete;
+
+			AutoSemaWriteLock(RWSemaLock& lock) : m_lock(lock)
+			{
+				m_lock.WriteLock();
+			}
+
+			~AutoSemaWriteLock()
+			{
+				m_lock.WriteUnlock();
+			}
+		};
+	}
+}
+
+#endif // EATHREAD_EATHREAD_RWSEMALOCK_H
@@ -0,0 +1,408 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements an efficient proper multithread-safe spinlock which supports
+// multiple readers but a single writer.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_RWSPINLOCK_H
+#define EATHREAD_EATHREAD_RWSPINLOCK_H
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+#include <eathread/eathread_sync.h>
+#include <eathread/eathread_atomic.h>
+#include <new>
+
+
+#ifdef _MSC_VER
+	 #pragma warning(push)
+	 #pragma warning(disable: 4100) // (Compiler claims pRWSpinLock is unreferenced)
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class RWSpinLock
+		///
+		/// A RWSpinLock is like a SpinLock except you can have multiple
+		/// readers but a single exclusive writer. This is very beneficial for 
+		/// situations whereby there are many consumers of some data but only 
+		/// one producer of the data. Unlock many thread-level read/write lock
+		/// implementations, this spin lock, like many others, follows the most
+		/// lean approach and does not do arbitration or fairness. The result is
+		/// that if you have many readers who are constantly locking the read
+		/// lock, write lock attempts may not be able to succeed. So you need to
+		/// be careful in how you use this.
+		///
+		/// We take a page from the Linux kernel here and implement read/write
+		/// locks via a mechanism that uses a 'bias' value and limits the number
+		/// of total readers to 2^24-1, or 16,777,214. This shouldn't be a problem.
+		/// When the spinlock is unlocked, the value is 0x01000000.
+		/// Readers decrement the lock by one each, so when the spinlock is 
+		/// read-locked, the value is between 1 and 0x00ffffff. Writers decrement
+		/// the lock by 0x01000000, so when a spinlock is write-locked, the value
+		/// must be zero. It must be zero because there can only be one writer
+		/// and because there can be no readers when there is a writer. When a 
+		/// reader attempts to get a read-lock, it decrements the lock count and 
+		/// examines the new value. If the new value is < 0, then there was a 
+		/// write-lock present and so the reader immediately increments the lock
+		/// count and tries again later. There are two results that come about due
+		/// to this: 
+		///     1) In the case of 32 bit integers, if by some wild chance of nature
+		///         there are 256 or more reader threads and there is a writer thread
+		///         with a write lock and every one of the reader threads executes 
+		///         the same decrement and compare to < 0 at the same time, then the
+		///         257th thread will mistakenly think that there isn't a write lock.
+		///     2) The logic to test if a write-lock is taken is not to compare
+		///         against zero but to compare against (> -255 and <= 0). This is
+		///         because readers will occasionally be 'mistakenly' decrementing
+		///         the lock while trying to obtain read access.
+		///
+		/// We thus have the following possible values:
+		///     0 < value < 0x01000000    ----> read-locked
+		///     value == 0x01000000       ----> unlocked
+		///     0x01000000 < value <= 0   ----> write-locked
+		///
+		class RWSpinLock
+		{
+		public:
+			RWSpinLock();
+
+			// This function cannot be called while the current thread  
+			// already has a write lock, else this function will hang. 
+			// This function can be called if the current thread already 
+			// has a read lock, though all read locks must be matched by unlocks. 
+			void ReadLock();
+
+			// This function cannot be called while the current thread  
+			// already has a write lock, else this function will hang.
+			// This function can be called if the current thread already 
+			// has a read lock (in which case it will always succeed), 
+			// though all read locks must be matched by unlocks. 
+			bool ReadTryLock();
+
+			// Returns true if any thread currently has a read lock. 
+			// The return value is subject to be out of date by the 
+			// time it is read by the current thread, unless the current
+			// thread has a read lock. If IsReadLocked is true, then 
+			// at that moment IsWriteLocked is necessarily false.
+			// If IsReadLocked is false, IsWriteLock may be either true or false.
+			bool IsReadLocked() const;
+
+			// Unlocks for reading, as a match to ReadLock or a successful
+			// ReadTryLock. All read locks must be matched by ReadUnlock with
+			// the same thread that has the read lock.
+			void ReadUnlock();
+
+			// This function cannot be called while the current thread  
+			// already has a read or write lock, else this function will hang. 
+			void WriteLock();
+
+			// If this function is called while the current thread already 
+			// has a read or write lock, it will always return false.
+			bool WriteTryLock();
+
+			// If this function returns true, then IsReadLocked must at that moment
+			// be false.
+			bool IsWriteLocked() const;
+
+			// Matches WriteLock or a successful WriteTryLock.
+			void WriteUnlock();
+
+			// Returns the address of mValue. This value should be read for 
+			// diagnostic purposes only and should not be written.
+			void* GetPlatformData();
+
+		public:
+			enum Value
+			{
+				kValueUnlocked = 0x01000000
+			};
+
+			AtomicInt32 mValue;
+		};
+
+
+
+		/// RWSpinLockFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class RWSpinlock.
+		/// A primary use of this would be to allow the RWSpinlock implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		///
+		class EATHREADLIB_API RWSpinLockFactory
+		{
+		public:
+			static RWSpinLock* CreateRWSpinLock();
+			static void        DestroyRWSpinLock(RWSpinLock* pRWSpinLock);
+			static size_t      GetRWSpinLockSize();
+			static RWSpinLock* ConstructRWSpinLock(void* pMemory);
+			static void        DestructRWSpinLock(RWSpinLock* pRWSpinLock);
+		};
+
+
+
+		/// class AutoRWSpinLock
+		///
+		/// Example usage:
+		///     void Function() {
+		///         AutoRWSpinLock autoLock(AutoRWSpinLock::kLockTypeRead);
+		///         // Do something
+		///     }
+		///
+		class AutoRWSpinLock
+		{
+		public:
+			enum LockType
+			{
+				kLockTypeRead, 
+				kLockTypeWrite
+			};
+
+			AutoRWSpinLock(RWSpinLock& spinLock, LockType lockType) ;
+		   ~AutoRWSpinLock();
+
+		protected:
+			RWSpinLock& mSpinLock;
+			LockType    mLockType;
+
+			// Prevent copying by default, as copying is dangerous.
+			AutoRWSpinLock(const AutoRWSpinLock&);
+			const AutoRWSpinLock& operator=(const AutoRWSpinLock&);
+		};
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// inlines
+///////////////////////////////////////////////////////////////////////////////
+
+namespace EA
+{
+	namespace Thread
+	{
+
+		///////////////////////////////////////////////////////////////////////
+		// RWSpinLock
+		///////////////////////////////////////////////////////////////////////
+
+		inline
+		RWSpinLock::RWSpinLock()
+			: mValue(kValueUnlocked)
+		{
+		}
+
+
+		inline
+		void RWSpinLock::ReadLock()
+		{
+			Top: // Due to modern processor branch prediction, the compiler will optimize better for true branches and so we do a manual goto loop here.
+			if((unsigned)mValue.Decrement() < kValueUnlocked)
+				return;
+			mValue.Increment();
+			while(mValue.GetValueRaw() <= 0){ // It is better to do this polling loop as a first check than to 
+				#ifdef EA_THREAD_COOPERATIVE  // do an atomic decrement repeatedly, as the atomic lock is 
+					ThreadSleep();            // potentially not a cheap thing due to potential bus locks on some platforms..
+				#else
+					EAProcessorPause();       // We don't check for EA_TARGET_SMP here and instead sleep if not defined because you probably shouldn't be using a spinlock on a pre-emptive system unless it is a multi-processing system.     
+				#endif
+			}
+			goto Top;
+		}
+
+
+		inline
+		bool RWSpinLock::ReadTryLock()
+		{
+			const unsigned nNewValue = (unsigned)mValue.Decrement();
+			if(nNewValue < kValueUnlocked) // Given that nNewValue is unsigned, we don't need to test for < 0.
+				return true;
+			mValue.Increment();
+			return false;
+		}
+
+
+		inline
+		bool RWSpinLock::IsReadLocked() const
+		{
+			const unsigned nValue = (unsigned)mValue.GetValue();
+			return ((nValue - 1) < (kValueUnlocked - 1)); // Given that nNewValue is unsigned, this is faster than comparing ((n > 0) && (n < kValueUnlocked)), due to the presence of only one comparison instead of two.
+		}
+
+
+		inline
+		void RWSpinLock::ReadUnlock()
+		{
+			mValue.Increment();
+		}
+
+
+		inline
+		void RWSpinLock::WriteLock()
+		{
+			Top: 
+			if(mValue.Add(-kValueUnlocked) == 0)
+				return;
+			mValue.Add(kValueUnlocked);
+			while(mValue.GetValueRaw() != kValueUnlocked){  // It is better to do this polling loop as a first check than to
+				#ifdef EA_THREAD_COOPERATIVE             // do an atomic decrement repeatedly, as the atomic lock is 
+					ThreadSleep();                       // potentially not a cheap thing due to potential bus locks on some platforms..
+				#else
+					EAProcessorPause();                  // We don't check for EA_TARGET_SMP here and instead sleep if not defined because you probably shouldn't be using a spinlock on a pre-emptive system unless it is a multi-processing system.     
+				#endif
+			}
+			goto Top;
+		}
+
+
+		inline
+		bool RWSpinLock::WriteTryLock()
+		{
+			if(mValue.Add(-kValueUnlocked) == 0)
+				return true;
+			mValue.Add(kValueUnlocked);
+			return false;
+		}
+
+
+		inline
+		bool RWSpinLock::IsWriteLocked() const
+		{
+			 return (mValue.GetValue() <= 0); // This fails to work if 127 threads at once are in the middle of a failed write lock attempt.
+		}
+
+
+		inline
+		void RWSpinLock::WriteUnlock()
+		{
+			mValue.Add(kValueUnlocked);
+		}
+
+
+		inline
+		void* RWSpinLock::GetPlatformData() 
+		{
+			return &mValue;
+		}
+
+
+
+		///////////////////////////////////////////////////////////////////////
+		// RWSpinLockFactory
+		///////////////////////////////////////////////////////////////////////
+
+		inline
+		RWSpinLock* RWSpinLockFactory::CreateRWSpinLock() 
+		{
+			Allocator* pAllocator = GetAllocator();
+
+			if(pAllocator)
+				return new(pAllocator->Alloc(sizeof(RWSpinLock))) RWSpinLock;
+			else
+				return new RWSpinLock;
+		}
+
+
+		inline
+		void RWSpinLockFactory::DestroyRWSpinLock(RWSpinLock* pRWSpinLock)
+		{
+			Allocator* pAllocator = GetAllocator();
+
+			if(pAllocator)
+			{
+				pRWSpinLock->~RWSpinLock();
+				pAllocator->Free(pRWSpinLock);
+			}
+			else
+				delete pRWSpinLock;
+		}
+
+
+		inline
+		size_t RWSpinLockFactory::GetRWSpinLockSize()
+		{
+			return sizeof(RWSpinLock);
+		}
+
+
+		inline
+		RWSpinLock* RWSpinLockFactory::ConstructRWSpinLock(void* pMemory)
+		{
+			return new(pMemory) RWSpinLock;
+		}
+
+
+		inline
+		void RWSpinLockFactory::DestructRWSpinLock(RWSpinLock* pRWSpinLock)
+		{
+			pRWSpinLock->~RWSpinLock();
+		}
+
+
+
+
+		///////////////////////////////////////////////////////////////////////
+		// AutoRWSpinLock
+		///////////////////////////////////////////////////////////////////////
+
+		inline
+		AutoRWSpinLock::AutoRWSpinLock(RWSpinLock& spinLock, LockType lockType) 
+			: mSpinLock(spinLock), mLockType(lockType)
+		{ 
+			if(mLockType == kLockTypeRead)
+				mSpinLock.ReadLock();
+			else
+				mSpinLock.WriteLock();
+		}
+
+
+		inline
+		AutoRWSpinLock::~AutoRWSpinLock()
+		{ 
+			if(mLockType == kLockTypeRead)
+				mSpinLock.ReadUnlock();
+			else
+				mSpinLock.WriteUnlock();
+		}
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+#ifdef _MSC_VER
+	#pragma warning(pop)
+#endif
+
+
+#endif // EATHREAD_EATHREAD_RWSPINLOCK_H
+
+
+
+
+
+
@@ -0,0 +1,452 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements an efficient proper multithread-safe spinlock which supports
+// multiple simultaneous readers but a single writer, where writers get
+// priority over readers.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_RWSPINLOCKW_H
+#define EATHREAD_EATHREAD_RWSPINLOCKW_H
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+#include <eathread/eathread_sync.h>
+#include <eathread/eathread_atomic.h>
+#include <new>
+
+
+#ifdef _MSC_VER
+	#pragma warning(push)
+	#pragma warning(disable: 4100) // (Compiler claims pRWSpinLockW is unreferenced)
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class RWSpinLockW
+		///
+		/// This class differs from RWSpinLock in that it gives writers priority.
+		/// In exchange for that feature, this version doesn't allow recursive
+		/// read locks and it becomes inefficient due to excessive spinning if
+		/// there are very many simultaneous readers.
+		/// 
+		/// A RWSpinLockW is like a SpinLock except you can have multiple
+		/// readers but a single exclusive writer. This is very beneficial for 
+		/// situations whereby there are many consumers of some data but only 
+		/// one producer of the data. Unlock many thread-level read/write lock
+		/// implementations, this spin lock, like many others, follows the most
+		/// lean approach and does not do arbitration or fairness. The result is
+		/// that if you have many readers who are constantly locking the read
+		/// lock, write lock attempts may not be able to succeed. So you need to
+		/// be careful in how you use this.
+		///
+		/// Note the usage of GetValueRaw in the source code for this class.
+		/// Use of GetValueRaw instead of GetValue is due to a tradeoff that
+		/// has been chosen. GetValueRaw does not come with memory read barrier
+		/// and thus the read value may be out of date. This is OK because it's 
+		/// only used as a rule of thumb to help decide what synchronization 
+		/// primitive to use next. This results in significantly faster execution
+		/// because only one memory synchronization primitive is typically 
+		/// executed instead of two. The problem with GetValueRaw, however, 
+		/// is that in cases where there is very high locking activity from 
+		/// many threads simultaneously GetValueRaw usage could result in 
+		/// a "bad guess" as to what to do next and can also result in a lot
+		/// of spinning, even infinite spinning in the most pathological case.
+		/// However, in practice on the platforms that target this situation
+		/// is unlikely to the point of being virtually impossible in practice.
+		/// And if it was possible then we recommend the user use a different
+		/// mechanism, such as the regular EAThread RWSpinLockW.
+		/// 
+		class RWSpinLockW
+		{
+		public:
+			RWSpinLockW();
+
+			// This function cannot be called while the current thread  
+			// already has a write lock, else this function will hang. 
+			// Nor can this function can be called if the current thread  
+			// already has a read lock, as it can result in a hang.
+			void ReadLock();
+
+			// This function cannot be called while the current thread  
+			// already has a write lock, else this function will hang.
+			// Nor can this function can be called if the current thread  
+			// already has a read lock, as it can result in a hang. 
+			bool ReadTryLock();
+
+			// If this function returns true, then IsReadLocked must at that moment
+			// be false.
+			bool IsReadLocked() const;
+
+			void ReadUnlock();
+
+			// This function cannot be called while the current thread  
+			// already has a read or write lock, else this function will hang. 
+			void WriteLock();
+
+			// If this function is called while the current thread already 
+			// has a read or write lock, it will always return false.
+			bool WriteTryLock();
+
+			// If this function returns true, then IsReadLocked must at that moment
+			// be false.
+			bool IsWriteLocked() const;
+
+			void WriteUnlock();
+
+			/// Returns the platform-specific data handle for debugging uses or 
+			/// other cases whereby special (and non-portable) uses are required.
+			void* GetPlatformData();
+
+		protected:
+			enum Value
+			{
+				kWriteLockBit       = 0x80000000,
+				kWriteWaitingInc    = 0x00010000,
+				kReadLockInc        = 0x00000001,
+				kWriteWaitingMask   = 0x7FFF0000,
+				kReadLockMask       = 0x0000FFFF,
+				kLockAllMask        = kWriteLockBit | kReadLockMask,
+				kWriteAllMask       = kWriteLockBit | kWriteWaitingMask,
+			};
+
+			AtomicInt32 mValue;
+		};
+
+
+
+		/// RWSpinLockWFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class RWSpinLockW.
+		/// A primary use of this would be to allow the RWSpinLockW implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		///
+		class EATHREADLIB_API RWSpinLockWFactory
+		{
+		public:
+			static RWSpinLockW* CreateRWSpinLockW();
+			static void         DestroyRWSpinLockW(RWSpinLockW* pRWSpinLockW);
+			static size_t       GetRWSpinLockWSize();
+			static RWSpinLockW* ConstructRWSpinLockW(void* pMemory);
+			static void         DestructRWSpinLockW(RWSpinLockW* pRWSpinLockW);
+		};
+
+
+
+		/// class AutoRWSpinLockW
+		///
+		/// Example usage:
+		///     void Function() {
+		///         AutoRWSpinLockW autoLock(AutoRWSpinLockW::kLockTypeRead);
+		///         // Do something
+		///     }
+		///
+		class AutoRWSpinLockW
+		{
+		public:
+			enum LockType
+			{
+				kLockTypeRead, 
+				kLockTypeWrite
+			};
+
+			AutoRWSpinLockW(RWSpinLockW& SpinLockW, LockType lockType);
+		   ~AutoRWSpinLockW();
+
+		protected:
+			RWSpinLockW& mSpinLockW;
+			LockType     mLockType;
+
+			// Prevent copying by default, as copying is dangerous.
+			AutoRWSpinLockW(const AutoRWSpinLockW&);
+			const AutoRWSpinLockW& operator=(const AutoRWSpinLockW&);
+		};
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// inlines
+///////////////////////////////////////////////////////////////////////////////
+
+namespace EA
+{
+	namespace Thread
+	{
+		extern Allocator* gpAllocator;
+
+
+		///////////////////////////////////////////////////////////////////////
+		// RWSpinLockW
+		///////////////////////////////////////////////////////////////////////
+
+		inline
+		RWSpinLockW::RWSpinLockW()
+			: mValue(0)
+		{
+		}
+
+
+		inline
+		void RWSpinLockW::ReadLock()
+		{
+			int32_t currVal = mValue.GetValueRaw(); // See not above about GetValueRaw usage.
+
+			// If there is no writer nor waiting writers, attempt a read lock.
+			if( (currVal & kWriteAllMask) == 0 )                                             
+			{
+				if( mValue.SetValueConditional( currVal + kReadLockInc, currVal ) )
+					return;
+			}
+
+			// Spin until there is no writer, any waiting writers, nor any read lockers.
+			// By waiting till there no read or write lockers, we tend to avoid the case
+			// whereby readers starve out writers. The downside is that a lot of read
+			// activity can cause read parallelism to be reduced and read threads waiting
+			// for each other. 
+			do
+			{
+				EA_THREAD_DO_SPIN();
+				currVal = mValue.GetValue();    // or EAReadBarrier(); mValue.GetValueRaw();
+			}while (currVal & kLockAllMask); // or kWriteAllMask
+
+			// At this point, we ignore waiting writers and take the lock if we 
+			// can. Any waiting writers that have shown up right as we execute this 
+			// code aren't given any priority over us, unlike above where they are.
+			for( ;; )
+			{
+				// This code has a small problem in that a large number of simultaneous
+				// frequently locking/unlocking readers can cause this code to spin
+				// a lot (in theory, indefinitely). However, in practice our use cases
+				// and target hardware shouldn't cause this to happen.
+				if( (currVal & kWriteLockBit) == 0 )                                             
+				{
+					if( mValue.SetValueConditional( currVal + kReadLockInc, currVal ) )
+						return;
+				}
+
+				EA_THREAD_DO_SPIN();
+				currVal = mValue.GetValue(); // or EAReadBarrier(); mValue.GetValueRaw();
+			}
+		}
+
+
+		inline
+		bool RWSpinLockW::ReadTryLock()
+		{
+			int32_t currVal = mValue.GetValueRaw();
+
+			// If there is no writer nor waiting writers, attempt a read lock.
+			if( (currVal & kWriteAllMask) == 0 )                                             
+			{
+				if( mValue.SetValueConditional( currVal + kReadLockInc, currVal ) )
+					return true;
+			}
+
+			return false;
+		}
+
+
+		inline
+		bool RWSpinLockW::IsReadLocked() const
+		{
+			// This return value has only diagnostic meaning. It cannot be used for thread synchronization purposes.
+			return ((mValue.GetValueRaw() & kReadLockMask) != 0);
+		}
+
+
+		inline
+		void RWSpinLockW::ReadUnlock()
+		{
+			EAT_ASSERT(IsReadLocked());  // This can't tell us if the current thread was one of the lockers. But it's better than nothing as a debug test.
+			mValue.Add( -kReadLockInc );
+		}
+
+
+		inline
+		void RWSpinLockW::WriteLock()
+		{
+			int32_t currVal = mValue.GetValueRaw();
+
+			// If there is no writer, waiting writers, nor readers, attempt a write lock.
+			if( (currVal & kLockAllMask) == 0 )                                             
+			{
+				if( mValue.SetValueConditional( currVal | kWriteLockBit, currVal ) )
+					return;
+			}
+
+			// Post a waiting write. This will make new readers spin until all existing
+			// readers have released their lock, so that we get an even chance.
+			mValue.Add( kWriteWaitingInc );
+
+			// Spin until we get the lock.
+			for( ;; )
+			{
+				if( (currVal & kLockAllMask) == 0 )                                             
+				{
+					if( mValue.SetValueConditional( (currVal | kWriteLockBit) - kWriteWaitingInc, currVal ) )
+						return;
+				}
+
+				EA_THREAD_DO_SPIN();
+				currVal = mValue.GetValue(); // or EAReadBarrier(); mValue.GetValueRaw();
+			}
+		}
+
+
+		inline
+		bool RWSpinLockW::WriteTryLock()
+		{
+			int32_t currVal = mValue.GetValueRaw();
+
+			// If there is no writer, waiting writers, nor readers, attempt a write lock.
+			if( (currVal & kLockAllMask) == 0 )                                             
+			{
+				if( mValue.SetValueConditional( currVal | kWriteLockBit, currVal ) )
+					return true;
+			}
+
+			return false;
+		}
+
+
+		inline
+		bool RWSpinLockW::IsWriteLocked() const
+		{
+			// This return value has only diagnostic meaning. It cannot be used for thread synchronization purposes.
+			return ( (mValue.GetValueRaw() & kWriteLockBit) != 0 );
+		}
+
+
+		inline
+		void RWSpinLockW::WriteUnlock()
+		{
+			EAT_ASSERT(IsWriteLocked());
+			mValue.Add( -kWriteLockBit );
+		}
+
+
+		inline
+		void* RWSpinLockW::GetPlatformData()
+		{
+			return &mValue;
+		}
+
+
+
+		///////////////////////////////////////////////////////////////////////
+		// RWSpinLockFactory
+		///////////////////////////////////////////////////////////////////////
+
+		inline
+		RWSpinLockW* RWSpinLockWFactory::CreateRWSpinLockW() 
+		{
+			if(gpAllocator)
+				return new(gpAllocator->Alloc(sizeof(RWSpinLockW))) RWSpinLockW;
+			else
+				return new RWSpinLockW;
+		}
+
+		inline
+		void RWSpinLockWFactory::DestroyRWSpinLockW(RWSpinLockW* pRWSpinLock)
+		{
+			if(gpAllocator)
+			{
+				pRWSpinLock->~RWSpinLockW();
+				gpAllocator->Free(pRWSpinLock);
+			}
+			else
+				delete pRWSpinLock;
+		}
+
+		inline
+		size_t RWSpinLockWFactory::GetRWSpinLockWSize()
+		{
+			return sizeof(RWSpinLockW);
+		}
+
+		inline
+		RWSpinLockW* RWSpinLockWFactory::ConstructRWSpinLockW(void* pMemory)
+		{
+			return new(pMemory) RWSpinLockW;
+		}
+
+		inline
+		void RWSpinLockWFactory::DestructRWSpinLockW(RWSpinLockW* pRWSpinLock)
+		{
+			pRWSpinLock->~RWSpinLockW();
+		}
+
+
+
+		///////////////////////////////////////////////////////////////////////
+		// AutoRWSpinLock
+		///////////////////////////////////////////////////////////////////////
+
+		inline
+		AutoRWSpinLockW::AutoRWSpinLockW(RWSpinLockW& spinLock, LockType lockType) 
+			: mSpinLockW(spinLock), mLockType(lockType)
+		{ 
+			if(mLockType == kLockTypeRead)
+				mSpinLockW.ReadLock();
+			else
+				mSpinLockW.WriteLock();
+		}
+
+
+		inline
+		AutoRWSpinLockW::~AutoRWSpinLockW()
+		{ 
+			if(mLockType == kLockTypeRead)
+				mSpinLockW.ReadUnlock();
+			else
+				mSpinLockW.WriteUnlock();
+		}
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+#ifdef _MSC_VER
+	#pragma warning(pop)
+#endif
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,339 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements a semaphore thread synchronization class.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_SEMAPHORE_H
+#define EATHREAD_EATHREAD_SEMAPHORE_H
+
+
+#include <eathread/internal/config.h>
+#include <eathread/eathread.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_USE_SYNTHESIZED_SEMAPHORE
+//
+// Defined as 0 or 1. Defined as 1 if the OS provides no native semaphore support.
+//
+#ifndef EATHREAD_USE_SYNTHESIZED_SEMAPHORE
+	#define EATHREAD_USE_SYNTHESIZED_SEMAPHORE 0
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_FAST_MS_SEMAPHORE_ENABLED
+//
+// Defined as 0 or 1.
+// Enables the usage of a faster intra-process semaphore on Microsoft platforms.
+// By faster we mean that it is typically 10x or more faster.
+// Has the downside that it is not interchangeable with the SEMAPHORE built-in
+// type and it's behaviour won't be strictly identical.
+// Even if this option is enabled, you can still get the built-in behaviour
+// of Microsoft semaphores by specifying the semaphore as inter-process.
+//
+#ifndef EATHREAD_FAST_MS_SEMAPHORE_ENABLED
+	#define EATHREAD_FAST_MS_SEMAPHORE_ENABLED 1
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////
+/// EASemaphoreData
+///
+/// This is used internally by class Semaphore.
+/// Todo: Consider moving this declaration into a platform-specific 
+/// header file.
+/// 
+#if !EA_THREADS_AVAILABLE
+	struct EASemaphoreData
+	{
+		volatile int mnCount;
+		int mnMaxCount;
+
+		EASemaphoreData();
+	};
+
+#elif EATHREAD_USE_SYNTHESIZED_SEMAPHORE
+	#include <eathread/eathread_condition.h>
+	#include <eathread/eathread_mutex.h>
+	#include <eathread/eathread_atomic.h>
+
+	struct EASemaphoreData
+	{
+		EA::Thread::Condition   mCV;
+		EA::Thread::Mutex       mMutex;
+		EA::Thread::AtomicInt32 mnCount;
+		int                     mnMaxCount;
+		bool                    mbValid;
+
+		EASemaphoreData();
+	};
+
+#elif defined(__APPLE__)
+
+	#include <mach/semaphore.h>
+	#include <eathread/eathread_atomic.h>
+
+	struct EASemaphoreData
+	{
+		semaphore_t mSemaphore;
+		EA::Thread::AtomicInt32 mnCount;
+		int  mnMaxCount;
+		bool mbIntraProcess;
+		
+		EASemaphoreData();
+	};
+
+#elif defined(EA_PLATFORM_SONY)
+	#include <kernel/semaphore.h>
+	#include <eathread/eathread_atomic.h>
+	#include <eathread/internal/timings.h>
+	struct EASemaphoreData
+	{
+		SceKernelSema mSemaphore;
+
+		int  mnMaxCount;
+		EA::Thread::AtomicInt32 mnCount;
+
+		EASemaphoreData();
+	};
+
+#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+	#include <semaphore.h>
+	#include <eathread/eathread_atomic.h>
+
+	#if defined(EA_PLATFORM_WINDOWS)
+		#ifdef CreateSemaphore
+			#undef CreateSemaphore // Windows #defines CreateSemaphore to CreateSemaphoreA or CreateSemaphoreW.
+		#endif
+	#endif
+
+	struct EASemaphoreData
+	{
+		sem_t mSemaphore;
+		EA::Thread::AtomicInt32 mnCount;
+		int  mnMaxCount;
+		bool mbIntraProcess;
+
+		EASemaphoreData();
+	};
+
+#elif defined(EA_PLATFORM_MICROSOFT) && !EA_POSIX_THREADS_AVAILABLE
+	#ifdef CreateSemaphore
+		#undef CreateSemaphore // Windows #defines CreateSemaphore to CreateSemaphoreA or CreateSemaphoreW.
+	#endif
+
+	struct EATHREADLIB_API EASemaphoreData
+	{
+		void*   mhSemaphore;    // We use void* instead of HANDLE in order to avoid #including windows.h. HANDLE is typedef'd to (void*) on all Windows-like platforms.
+		int32_t mnCount;        // Number of available posts. Under the fast semaphore pathway, a negative value means there are waiters.
+		int32_t mnCancelCount;  // Used by fast semaphore logic. Is the deferred cancel count.
+		int32_t mnMaxCount;     // 
+		bool    mbIntraProcess; // Used under Windows, which can have multiple processes. Always true for XBox.
+
+		EASemaphoreData();
+		void UpdateCancelCount(int32_t n);
+	};
+
+#endif
+/////////////////////////////////////////////////////////////////////////
+
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// SemaphoreParameters
+		/// Specifies semaphore settings.
+		struct EATHREADLIB_API SemaphoreParameters
+		{
+			int  mInitialCount;  /// Initial available count
+			int  mMaxCount;      /// Max possible count. Defaults to INT_MAX.
+			bool mbIntraProcess; /// True if the semaphore is intra-process, else inter-process.
+			char mName[16];      /// Semaphore name, applicable only to platforms that recognize named synchronization objects.
+
+			SemaphoreParameters(int initialCount = 0, bool bIntraProcess = true, const char* pName = NULL);
+		};
+
+
+		/// class Semaphore
+		/// A semaphore is an object which has an associated count which is >= 0 and
+		/// a value > 0 means that a thread can 'grab' the semaphore and decrement its
+		/// value by one. A value of 0 means that threads must wait until another thread
+		/// 'un-grabs' the semaphore. Thus a semaphore is like a car rental agency which
+		/// has a limited number of cars for rent and if they are out of cars, you have 
+		/// to wait until one of the renters returns their car.
+		class EATHREADLIB_API Semaphore
+		{
+		public:
+			enum Result{
+				kResultError   = -1,
+				kResultTimeout = -2
+			};
+
+			/// Semaphore
+			/// For immediate default initialization, use no args.
+			/// For custom immediate initialization, supply a first argument. 
+			/// For deferred initialization, use Semaphore(NULL, false) then later call Init.
+			/// For deferred initialization of an array of objects, create an empty
+			/// subclass whose default constructor chains back to Semaphore(NULL, false).
+			Semaphore(const SemaphoreParameters* pSemaphoreParameters = NULL, bool bDefaultParameters = true);
+
+			/// Semaphore
+			/// This is a constructor which initializes the Semaphore to a specific count 
+			/// and intializes the other Semaphore parameters to default values. See the
+			/// SemaphoreParameters struct for info on these default values.
+			Semaphore(int initialCount);
+
+			/// ~Semaphore
+			/// Destroys an existing semaphore. The semaphore must not be locked 
+			/// by any thread, otherwise the resulting behaviour is undefined.
+			~Semaphore();
+
+			/// Init
+			/// Initializes the semaphore with given parameters.
+			bool Init(const SemaphoreParameters* pSemaphoreParameters);
+
+			/// Wait
+			/// Locks the semaphore (reducing its count by one) or gives up trying to 
+			/// lock it after a given timeout has expired. If the semaphore count is > 0
+			/// then the count will be reduced by one. If the semaphore count is 0, the
+			/// call will block until another thread unlocks it or the timeout expires.
+			///
+			/// Note that the timeout is specified in absolute time and not relative time.
+			///
+			/// Note also that due to the way thread scheduling works -- particularly in a
+			/// time-sliced threading environment -- that the timeout value is a hint and 
+			/// the actual amount of time passed before the timeout occurs may be significantly
+			/// more or less than the specified timeout time.
+			///
+			/// Return value:
+			///     kResultError      The semaphore could not be obtained due to error.
+			///     kResultTimeout    The semaphore could not be obtained due to timeout.
+			///     >= 0              The new count for the semaphore.
+			///
+			/// It's possible that two threads waiting on the same semaphore will return 
+			/// with a result of zero. Thus you cannot rely on the semaphore's return value
+			/// to ascertain which was the last thread to return from the Wait. 
+			int Wait(const ThreadTime& timeoutAbsolute = kTimeoutNone);
+
+			/// Post
+			/// Increments the signalled value of the semaphore by the count. 
+			/// Returns the available count after the operation has completed. 
+			/// Returns kResultError upon error. A Wait is often eventually 
+			/// followed by a corresponding Post.
+			/// For the case of count being greater than 1, not all platforms
+			/// act the same. If count results in exceeding the max count then
+			/// kResultError is returned. Some platforms return kResultError 
+			/// before any of account is applied, while others return 
+			/// kResultError after some of count has been applied.
+			int Post(int count = 1);
+
+			/// GetCount
+			/// Returns current number of available locks associated with the semaphore.
+			/// This is useful for debugging and for quick polling checks of the 
+			/// status of the semaphore. This value changes over time as multiple
+			/// threads wait and post to the semaphore. This value cannot be trusted
+			/// to exactly represent the count upon its return if multiple threads are 
+			/// using this Semaphore at the time.
+			int GetCount() const;
+
+			/// GetPlatformData
+			/// Returns the platform-specific data handle for debugging uses or 
+			/// other cases whereby special (and non-portable) uses are required.
+			void* GetPlatformData()
+				{ return &mSemaphoreData; }
+
+		protected:
+			EASemaphoreData mSemaphoreData;
+
+		private:
+			// Objects of this class are not copyable.
+			Semaphore(const Semaphore&){}
+			Semaphore& operator=(const Semaphore&){ return *this; }
+		};
+
+
+		/// SemaphoreFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class Semaphore.
+		/// A primary use of this would be to allow the Semaphore implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		class EATHREADLIB_API SemaphoreFactory
+		{
+		public:
+			static Semaphore* CreateSemaphore();                        // Internally implemented as: return new Semaphore;
+			static void       DestroySemaphore(Semaphore* pSemaphore);  // Internally implemented as: delete pSemaphore;
+
+			static size_t     GetSemaphoreSize();                       // Internally implemented as: return sizeof(Semaphore);
+			static Semaphore* ConstructSemaphore(void* pMemory);        // Internally implemented as: return new(pMemory) Semaphore;
+			static void       DestructSemaphore(Semaphore* pSemaphore); // Internally implemented as: pSemaphore->~Semaphore();
+		};
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class AutoSemaphore
+		/// An AutoSemaphore grabs the Semaphore in its constructor and posts 
+		/// the Semaphore once in its destructor (when it goes out of scope).
+		class EATHREADLIB_API AutoSemaphore
+		{
+		public:
+			AutoSemaphore(Semaphore& semaphore) 
+				: mSemaphore(semaphore)
+				{ mSemaphore.Wait(); }
+
+			~AutoSemaphore()
+				{ mSemaphore.Post(1); }
+
+		protected:
+			Semaphore& mSemaphore;
+
+			// Prevent copying by default, as copying is dangerous.
+			AutoSemaphore(const AutoSemaphore&);
+			const AutoSemaphore& operator=(const AutoSemaphore&);
+		};
+
+	} // namespace Thread
+
+} // namespace EA
+
+#endif // EATHREAD_EATHREAD_SEMAPHORE_H
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,319 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements an efficient proper multithread-safe spinlock.
+//
+// A spin lock is the lightest form of mutex available. The Lock operation is
+// simply a loop that waits to set a shared variable. SpinLocks are not 
+// recursive (i.e. they can only be locked once by a thread) and are 
+// intra-process only. You have to be careful using spin locks because if you 
+// have a high priority thread that calls Lock while a lower priority thread
+// has the same lock, then on many systems the higher priority thread will 
+// use up all the CPU time waiting for the lock and the lower priority thread
+// will not get the CPU time needed to free the lock.
+//
+// From Usenet:
+//    A spinlock is a machine-specific "optimized" form of mutex
+//    ("MUTual EXclusion" device). However, you should never use
+//    a spinlock unless you know that you have multiple threads
+//    and that you're running on a multiprocessor. Otherwise, at
+//    best you're wasting a lot of time. A spinlock is great for
+//    "highly parallel" algorithms like matrix decompositions,
+//    where the application (or runtime) "knows" (or at least goes
+//    to lengths to ensure) that the threads participating are all
+//    running at the same time. Unless you know that, (and, if your
+//    code doesn't create threads, you CAN'T know that), don't even
+//    think of using a spinlock."
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_SPINLOCK_H
+#define EATHREAD_EATHREAD_SPINLOCK_H
+
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+#include <new> // include new for placement new operator
+
+#if defined(EA_PROCESSOR_X86)
+	// The reference x86 code works fine, as there is little that assembly
+	// code can do to improve it by much, assuming that the code is compiled
+	// in an optimized way. With VC7 on the PC platform, compiling with 
+	// optimization set to 'minimize size' and most other optimizations 
+	// enabled yielded code that was similar to Intel reference asm code.
+	// However, when the compiler was set to minimize size and enable inlining,
+	// it created an implementation of the Lock function that was less optimal.
+	// #include <eathread/x86/eathread_spinlock_x86.h>
+#elif defined(EA_PROCESSOR_IA64)
+	// The reference code below is probably fine.
+	// #include <eathread/ia64/eathread_spinlock_ia64.h>
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+// The above header files would define EA_THREAD_SPINLOCK_IMPLEMENTED.
+#if !defined(EA_THREAD_SPINLOCK_IMPLEMENTED)
+
+	// We provide an implementation that works for all systems but is less optimal.
+	#include <eathread/eathread_sync.h>
+	#include <eathread/eathread_atomic.h>
+
+	namespace EA
+	{
+		namespace Thread
+		{
+			/// class SpinLock
+			///
+			/// Spinlocks are high-performance locks designed for special circumstances.
+			/// As such, they are not 'recursive' -- you cannot lock a spinlock twice.
+			/// Spinlocks have no explicit awareness of threading, but they are explicitly
+			/// thread-safe. 
+			///
+			/// You do not want to use spin locks as a *general* replacement for mutexes or
+			/// critical sections, even if you know your mutex use won't be recursive.
+			/// The reason for this is due to thread scheduling and thread priority issues.
+			/// A spinlock is not a kernel- or threading-kernel-level object and thus while
+			/// this gives it a certain amount of speed, it also means that if you have a 
+			/// low priority thread thread with a spinlock locked and a high priority thread
+			/// waiting for the spinlock, the program will hang, possibly indefinitely,
+			/// because the thread scheduler is giving all its time to the high priority 
+			/// thread which happens to be stuck. 
+			/// 
+			/// On the other hand, when judiciously used, a spin lock can yield significantly
+			/// higher performance than general mutexes, especially on platforms where mutex
+			/// locking is particularly expensive or on multiprocessing systems.
+			///
+			class SpinLock
+			{
+			protected: // Declared at the top because otherwise some compilers fail to compile inline functions below.
+				AtomicInt32 mAI;  /// A value of 0 means unlocked, while 1 means locked.
+
+			public:
+				SpinLock();
+
+				void Lock();
+				bool TryLock();
+				bool IsLocked();
+				void Unlock();
+
+				void* GetPlatformData();
+			};
+
+
+			/// SpinLockFactory
+			/// 
+			/// Implements a factory-based creation and destruction mechanism for class Spinlock.
+			/// A primary use of this would be to allow the Spinlock implementation to reside in
+			/// a private library while users of the class interact only with the interface
+			/// header and the factory. The factory provides conventional create/destroy 
+			/// semantics which use global operator new, but also provides manual construction/
+			/// destruction semantics so that the user can provide for memory allocation 
+			/// and deallocation.
+			class EATHREADLIB_API SpinLockFactory
+			{
+			public:
+				static SpinLock* CreateSpinLock();
+				static void      DestroySpinLock(SpinLock* pSpinLock);
+
+				static size_t    GetSpinLockSize();
+				static SpinLock* ConstructSpinLock(void* pMemory);
+
+				static void DestructSpinLock(SpinLock* pSpinLock);
+			};
+
+		} // namespace Thread
+
+	} // namespace EA
+
+
+#endif // EA_THREAD_SPINLOCK_IMPLEMENTED
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class AutoSpinLock
+		/// An AutoSpinLock locks the SpinLock in its constructor and 
+		/// unlocks the SpinLock in its destructor (when it goes out of scope).
+		class AutoSpinLock
+		{
+		public:
+			AutoSpinLock(SpinLock& spinLock);
+		   ~AutoSpinLock();
+
+		protected:
+			SpinLock& mSpinLock;
+
+		protected:
+			// Prevent copying by default, as copying is dangerous.
+			AutoSpinLock(const AutoSpinLock&);
+			const AutoSpinLock& operator=(const AutoSpinLock&);
+		};
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// inlines
+///////////////////////////////////////////////////////////////////////////////
+
+namespace EA
+{
+	namespace Thread
+	{
+		extern Allocator* gpAllocator;
+
+
+		///////////////////////////////////////////////////////////////////////
+		// SpinLock
+		///////////////////////////////////////////////////////////////////////
+
+		inline
+		SpinLock::SpinLock() 
+		  : mAI(0)
+		{
+		}
+
+		inline
+		void SpinLock::Lock()
+		{
+			Top: // Due to modern processor branch prediction, the compiler will optimize better for true branches and so we do a manual goto loop here.
+			if(mAI.SetValueConditional(1, 0))
+				return;
+
+			// The loop below is present because the SetValueConditional 
+			// call above is likely to be significantly more expensive and 
+			// thus we benefit by polling before attempting the real thing.
+			// This is a common practice and is recommended by Intel, etc.
+			while (mAI.GetValue() != 0)
+			{
+			#ifdef EA_THREAD_COOPERATIVE
+				ThreadSleep();
+			#else
+				EAProcessorPause();
+			#endif
+			}
+			goto Top;                                          
+		}                                                
+
+		inline
+		bool SpinLock::TryLock()
+		{
+			return mAI.SetValueConditional(1, 0);
+		}
+
+		inline
+		bool SpinLock::IsLocked()
+		{
+			return mAI.GetValueRaw() != 0;
+		}
+
+		inline
+		void SpinLock::Unlock()
+		{
+			EAT_ASSERT(IsLocked());
+			mAI.SetValue(0);
+		}
+
+		inline
+		void* SpinLock::GetPlatformData()
+		{
+			return &mAI;
+		}
+
+
+		///////////////////////////////////////////////////////////////////////
+		// SpinLockFactory
+		///////////////////////////////////////////////////////////////////////
+
+		inline
+		SpinLock* SpinLockFactory::CreateSpinLock()
+		{
+			if(gpAllocator)
+				return new(gpAllocator->Alloc(sizeof(SpinLock))) SpinLock;
+			else
+				return new SpinLock;
+		}
+
+		inline
+		void SpinLockFactory::DestroySpinLock(SpinLock* pSpinLock)
+		{
+			if(gpAllocator)
+			{
+				pSpinLock->~SpinLock();
+				gpAllocator->Free(pSpinLock);
+			}
+			else
+				delete pSpinLock;
+		}
+
+		inline
+		size_t SpinLockFactory::GetSpinLockSize()
+		{
+			return sizeof(SpinLock);
+		}
+
+		inline
+		SpinLock* SpinLockFactory::ConstructSpinLock(void* pMemory)
+		{
+			return new(pMemory) SpinLock;
+		}
+
+		EA_DISABLE_VC_WARNING(4100) // Compiler mistakenly claims pSpinLock is unreferenced
+		inline
+		void SpinLockFactory::DestructSpinLock(SpinLock* pSpinLock)
+		{
+			pSpinLock->~SpinLock();
+		}
+		EA_RESTORE_VC_WARNING()
+
+
+		///////////////////////////////////////////////////////////////////////
+		// AutoSpinLock
+		///////////////////////////////////////////////////////////////////////
+
+		inline
+		AutoSpinLock::AutoSpinLock(SpinLock& spinLock) 
+		  : mSpinLock(spinLock)
+		{
+			mSpinLock.Lock();
+		}
+
+		inline
+		AutoSpinLock::~AutoSpinLock()
+		{
+			mSpinLock.Unlock();
+		}
+
+	} // namespace Thread
+
+} // namespace EA
+
+#endif // EATHREAD_EATHREAD_SPINLOCK_H
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,362 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Defines thread-local storage and related concepts in a platform-independent
+// and thread-safe manner.
+//
+// As of this writing (10/2003), documentation concerning thread-local 
+// storage implementations under GCC, pthreads, and MSVC/Windows can be found at:
+//    http://gcc.gnu.org/onlinedocs/gcc-3.3.2/gcc/Thread-Local.html#Thread-Local
+//    http://java.icmc.sc.usp.br/library/books/ibm_pthreads/users-33.htm#324811
+//    http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vccore/html/_core_Thread_Local_Storage_.28.TLS.29.asp
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_STORAGE_H
+#define EATHREAD_EATHREAD_STORAGE_H
+
+
+#include <eathread/internal/config.h>
+
+EA_DISABLE_VC_WARNING(4574)
+#include <stddef.h>
+EA_RESTORE_VC_WARNING()
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/////////////////////////////////////////////////////////////////////////
+		/// EA_THREAD_LOCAL
+		/// 
+		/// Documentation (partially culled from online information):
+		/// Thread Local Storage (a.k.a. TLS and Thread Specific Storage) is a 
+		/// mechanism by which each thread in a multithreaded process allocates 
+		/// storage for thread-specific data. In standard multithreaded programs, 
+		/// data is shared among all threads of a given process, whereas thread 
+		/// local storage is the mechanism for allocating per-thread data.
+		///
+		/// The EA_THREAD_LOCAL specifier may be used alone, with the extern or 
+		/// static specifiers, but with no other storage class specifier. 
+		/// When used with extern or static, EA_THREAD_LOCAL must appear 
+		/// immediately after the other storage class specifier.
+		///
+		/// The EA_THREAD_LOCAL specifier may be applied to any global, file-scoped 
+		/// static, function-scoped static, or static data member of a class. 
+		/// It may not be applied to block-scoped automatic or non-static data member.
+		///
+		/// When the address-of operator is applied to a thread-local variable, 
+		/// it is evaluated at run-time and returns the address of the current 
+		/// thread's instance of that variable. An address so obtained may be used 
+		/// by any thread. When a thread terminates, any pointers to thread-local
+		/// variables in that thread become invalid.
+		///
+		/// No static initialization may refer to the address of a thread-local variable.
+		/// In C++, if an initializer is present for a thread-local variable, 
+		/// it must be a constant-expression, as defined in 5.19.2 of the ANSI/ISO C++ standard. 
+		/// 
+		/// Windows has special considerations for using thread local storage in a DLL.  
+		/// 
+		/// Example usage:
+		///    #if defined(EA_THREAD_LOCAL)
+		///        EA_THREAD_LOCAL int n = 0;                       // OK
+		///        extern EA_THREAD_LOCAL struct Data s;            // OK
+		///        static EA_THREAD_LOCAL char* p;                  // OK
+		///        EA_THREAD_LOCAL int i = sizeof(i);               // OK.
+		///        EA_THREAD_LOCAL std::string s("hello");          // Bad -- Can't be used for initialized objects.
+		///        EA_THREAD_LOCAL int Function();                  // Bad -- Can't be used as return value.
+		///        void Function(){ EA_THREAD_LOCAL int i = 0; }    // Bad -- Can't be used in function.
+		///        void Function(EA_THREAD_LOCAL int i){ }          // Bad -- can't be used as argument.
+		///        extern int i; EA_THREAD_LOCAL int i;             // Bad -- Declarations differ.
+		///        int EA_THREAD_LOCAL i;                           // Bad -- Can't be used as a type modifier.
+		///        EA_THREAD_LOCAL int i = i;                       // Bad -- Can't reference self before initialization.
+		///    #else
+		///        Need to use EA::Thread::ThreadLocalStorage.
+		///    #endif
+
+		#if !EA_THREADS_AVAILABLE
+			#define EA_THREAD_LOCAL
+
+		// Disabled until we have at least one C++11 compiler that supports this which can be tested.
+		//#elif (EABASE_VERSION_N >= 20040) && !defined(EA_COMPILER_NO_THREAD_LOCAL)
+		//    #define EA_THREAD_LOCAL thread_local
+
+		#elif EA_USE_CPP11_CONCURRENCY
+			#if defined(EA_COMPILER_MSVC11_0) // VC11 doesn't support C++11 thread_local storage class yet
+				#define EA_THREAD_LOCAL __declspec(thread)
+			#else
+				#define EA_THREAD_LOCAL thread_local
+			#endif
+
+		#elif defined(__APPLE__)
+			// http://clang.llvm.org/docs/LanguageExtensions.html
+			#if __has_feature(cxx_thread_local)
+				#define EA_THREAD_LOCAL thread_local
+			#else
+				#define EA_THREAD_LOCAL 
+			#endif
+		#elif (defined(__GNUC__) && ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)))) && (defined(EA_PLATFORM_WINDOWS) || defined(EA_PLATFORM_UNIX)) // Any of the Unix variants, including Mac OSX.
+			// While GNUC v3.3 is the first version that supports thread local storage
+			// declarators, not all versions of GNUC for all platforms support it, 
+			// as it requires support from other tools and libraries beyond the compiler.
+			#if defined(__CYGWIN__) // Cygwin's branch of the GCC toolchain does not currently support TLS.
+				// Not supported.
+			#else
+				#define EA_THREAD_LOCAL __thread
+			#endif
+
+		#elif defined(EA_COMPILER_MSVC) || defined(EA_COMPILER_BORLAND) || (defined(EA_PLATFORM_WINDOWS) &&  defined(EA_COMPILER_INTEL))
+			// This appears to be supported by VC++, Borland C++.
+			// And it is supported by all compilers for the Windows platform.
+			#define EA_THREAD_LOCAL __declspec(thread)
+
+		#elif defined(EA_PLATFORM_SONY) || defined(CS_UNDEFINED_STRING)
+			#define EA_THREAD_LOCAL __thread
+
+		#else
+			// Else don't define it as anything. This will result in a compilation 
+			// error reporting the problem. We cannot simply #define away the 
+			// EA_THEAD_LOCAL term, as doing so would defeat the purpose of the 
+			// specifier. Dynamic thread local storage is a more flexible and
+			// portable solution to the problem.
+			// #define EA_THREAD_LOCAL
+		#endif
+		/////////////////////////////////////////////////////////////////////////
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+
+/////////////////////////////////////////////////////////////////////////
+/// EAThreadLocalStorageData
+///
+/// This is used internally by class ThreadLocalStorage.
+/// Todo: Consider moving this declaration into a platform-specific 
+/// header file.
+///
+#if defined(EA_PLATFORM_SONY)
+	#include <kernel.h>
+
+	struct EAThreadLocalStorageData{
+		ScePthreadKey mKey;     // This is usually a pointer.
+		int           mResult;  // Result of call to scePthreadKeyCreate, so we can know if mKey is valid.
+	};
+#elif (defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE) && !defined(CS_UNDEFINED_STRING)
+	// In this case we will be using pthread_key_create, pthread_key_delete, pthread_getspecific, pthread_setspecific.
+	#include <pthread.h>
+
+	struct EAThreadLocalStorageData{
+		pthread_key_t mKey;     // This is usually a pointer.
+		int           mResult;  // Result of call to pthread_key_create, so we can know if mKey is valid.
+	};
+
+#elif defined(EA_PLATFORM_MICROSOFT) && !defined(EA_PLATFORM_WINDOWS_PHONE) && !(defined(EA_PLATFORM_WINDOWS) && !EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)) 
+	// In this case we will be using TlsAlloc, TlsFree, TlsGetValue, TlsSetValue.
+	typedef uint32_t EAThreadLocalStorageData;
+
+#elif (!EA_THREADS_AVAILABLE || defined(EA_PLATFORM_CONSOLE)) && !defined(CS_UNDEFINED_STRING)
+	#include <eathread/eathread.h>
+
+	struct EAThreadLocalStorageData
+	{
+		struct ThreadToDataPair
+		{
+			EA::Thread::ThreadUniqueId mThreadID;
+			const void* mpData;
+		};
+		#ifndef EA_TLS_MAX_COUNT
+			#define EA_TLS_MAX_COUNT 16 // This is the max number of threads that might want to use the given thread-local-storage item.
+		#endif
+		ThreadToDataPair* GetTLSEntry(bool bCreateIfNotFound);
+		ThreadToDataPair  mDataArray[EA_TLS_MAX_COUNT];
+		int               mDataArrayCount;
+	};
+
+#else // STL version which uses less memory but uses heap memory.
+
+	// If you use this version, then you want to make sure your STL is using new/delete
+	// by default and then make sure you are globally mapping new/delete to your 
+	// custom allocation system. STLPort, for example, tends to want to use its
+	// own internal allocator which is non-optimal for serious uses.
+
+	EA_DISABLE_VC_WARNING(4574 4350)
+	#include <map> // Note that this dependency on STL map is only present if you use this pathway, which is disabled by default.
+	EA_RESTORE_VC_WARNING()
+
+	#include <eathread/eathread.h>
+	#include <eathread/eathread_futex.h>
+
+	struct EAThreadLocalStorageData
+	{
+		EAThreadLocalStorageData() : mThreadToDataMap(NULL) {}
+		~EAThreadLocalStorageData() { delete mThreadToDataMap; mThreadToDataMap = NULL; }
+		void** GetTLSEntry(bool bCreateIfNotFound);
+		// We allocate this map only when needed
+		// This prevents too early allocations before our allocator initialization
+		std::map<EA::Thread::ThreadUniqueId, const void*> *mThreadToDataMap;
+		EA::Thread::Futex mFutex;
+	private:
+		// Disable copy and assignment
+		EAThreadLocalStorageData(const EAThreadLocalStorageData&);
+		EAThreadLocalStorageData operator=(const EAThreadLocalStorageData&);
+	};
+#endif
+/////////////////////////////////////////////////////////////////////////
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/////////////////////////////////////////////////////////////////////////
+		/// class ThreadLocalStorage
+		///
+		/// This is a class that lets you store a pointer to data uniquely for 
+		/// each thread. It thus allows access to a pointer as if it were local
+		/// but each thread gets its own copy.
+		///
+		/// The implementation behind this class maps to the PThreads API under
+		/// Unix-like systems, maps to the Windows TLS SPI under Windows, and 
+		/// maps to a custom implementation otherwise. The PThreads API has a 
+		/// mechanism whereby you can set a callback to execute when a thread
+		/// exits; the callback will call the callback once for each pointer 
+		/// that was stored in all thread local storage objects. Due to the 
+		/// general weaknesses of the PThread mechanism and due to our interest
+		/// in being as lean as possible, we don't support automatic callbacks
+		/// such as with PThreads. The same effect can be achieved manually 
+		/// when needed.
+		///
+		/// Example usage:
+		///     ThreadLocalStorage tls;
+		///     void* pValue;
+		///     bool bResult;
+		///     
+		///     pValue  = tls.GetValue();              // Return value will be NULL.
+		///     bResult = tls.SetValue(NULL);          // This is fine and bResult should be true.
+		///     pValue  = tls.GetValue();              // Return value will be NULL.
+		///     bResult = tls.SetValue(pSomeObject);   // Set thread-specific value to pSomeObject.
+		///     bResult = tls.SetValue(pOtherObject);  // Set thread-specific value to pOtherObject.
+		///     pValue  = tls.GetValue();              // Return value will be pOtherObject.
+		///     bResult = tls.SetValue(NULL);          // This is fine and bResult should be true.
+		///
+		class EATHREADLIB_API ThreadLocalStorage
+		{
+		public:
+			ThreadLocalStorage();
+		   ~ThreadLocalStorage();
+
+			/// GetValue
+			/// Returns the pointer previous stored via GetValue or returns NULL if there
+			/// is not stored value or if the user stored NULL.
+			void* GetValue();
+
+			/// SetValue
+			/// Stores a pointer, returns true if the storage was possible. In general,
+			/// the only reason that false would ever be returned is if there wasn't 
+			/// sufficient memory remaining for the operation. When a thread exits, 
+			/// it should call SetValue(NULL), as there is currently no mechanism to 
+			/// automatically detect thread exits on some platforms and thus there is
+			/// no way to automatically clear these values.
+			bool SetValue(const void* pData);
+
+			/// GetPlatformData
+			/// Returns the platform-specific thread local storage handle for debugging
+			/// uses or other cases whereby special (and non-portable) uses are required.
+			void* GetPlatformData()
+				{ return &mTLSData; }
+
+		protected:
+			EAThreadLocalStorageData mTLSData;
+
+		private:
+			// Disable copy and assignment
+			ThreadLocalStorage(const ThreadLocalStorage&);
+			ThreadLocalStorage operator=(const ThreadLocalStorage&);
+		};
+		/////////////////////////////////////////////////////////////////////////
+
+
+
+		/// ThreadLocalStorageFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class ThreadLocalStorage.
+		/// A primary use of this would be to allow the ThreadLocalStorage implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		class EATHREADLIB_API ThreadLocalStorageFactory
+		{
+		public:
+			static ThreadLocalStorage* CreateThreadLocalStorage();                           // Internally implemented as: return new ThreadLocalStorage;
+			static void                DestroyThreadLocalStorage(ThreadLocalStorage* pTLS);  // Internally implemented as: delete pTLS;
+
+			static size_t              GetThreadLocalStorageSize();                          // Internally implemented as: return sizeof(ThreadLocalStorage);
+			static ThreadLocalStorage* ConstructThreadLocalStorage(void* pMemory);           // Internally implemented as: return new(pMemory) ThreadLocalStorage;
+			static void                DestructThreadLocalStorage(ThreadLocalStorage* pTLS); // Internally implemented as: pTLS->~ThreadLocalStorage();
+		};
+
+
+
+		// ThreadLocalPointer
+		// This is a class that adds pointer type awareness to ThreadLocalStorage.
+		// The interface is designed to look like the standard auto_ptr class.
+		//
+		// The following is disabled until we provide a way to enumerate and delete
+		// the pointers when the object goes out of scope or delete the thread-specific 
+		// pointer when the thread ends. Both are require before this class fully acts
+		// as one would expect.
+		//
+		//template <typename T>
+		//class ThreadLocalPointer
+		//{
+		//public:
+		//    T* get()        const { return  static_cast<T*>(mTLS.GetValue()); }
+		//    T* operator->() const { return  static_cast<T*>(mTLS.GetValue()); }
+		//    T& operator*()  const { return *static_cast<T*>(mTLS.GetValue()); }
+		//    void reset(T* pNew = 0){
+		//        T* const pTemp = get();
+		//        if(pNew != pTemp){
+		//            delete pTemp;
+		//            mTLS.SetValue(pTemp);
+		//        }
+		//    }
+		//
+		//protected:
+		//    ThreadLocalStorage mTLS;
+		//
+		//private:
+		//    ThreadLocalPointer(const ThreadLocalPointer&);
+		//    const ThreadLocalPointer& operator=(const ThreadLocalPointer&);
+		//};
+		/////////////////////////////////////////////////////////////////////////
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+#endif // #ifdef EATHREAD_EATHREAD_STORAGE_H
+
+
+
+
+
+
+
+
@@ -0,0 +1,270 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Functionality related to memory and code generation synchronization.
+//
+// Overview (partially taken from Usenet)
+// On all modern hardware, a store instruction does not necessarily result
+// in an immediate write to main memory, or even to the (processor specific)
+// cache. A store instruction simply places a write request in a request
+// queue, and continues. (Future reads in the same processor will check if
+// there is a write to the same address in this queue, and fetch it, rather
+// than reading from memory. Reads from another processor, however, can't
+// see this queue.) Generally, the ordering of requests in this queue is
+// not guaranteed, although some hardware offers stricter guarantees.
+// Thus, you must do something to ensure that the writes actually occur.
+// This is called a write barrier, and generally takes the form of a special
+// instruction.
+// 
+// And of course, just because you have written the data to main memory
+// doesn't mean that some other processor, executing a different thread,
+// doesn't have a stale copy in its cache, and use that for a read. Before
+// reading the variables, you need to ensure that the processor has the
+// most recent copy in its cache. This is called a read barrier, and
+// again, takes the form of a special hardware instruction. A number of
+// architectures (e.g. Intel x86-32) still guarantee read consistency -- 
+// all of the processors "listen" on the main memory bus, and if there is 
+// a write, automatically purge the corresponding data from their cache. 
+// But not all.
+//
+// Note that if you are writing data within a operating system-level 
+// locked mutex, the lock and unlock of the mutex will synchronize memory
+// for you, thus eliminating the need for you to execute read and/or write
+// barriers. However, mutex locking and its associated thread stalling is 
+// a potentially inefficient operation when in some cases you could simply 
+// write the memory from one thread and read it from another without 
+// using mutexes around the data access. Some systems let you write memory 
+// from one thread and read it from another (without you using mutexes)
+// without using memory barriers, but others (notably SMP) will not let you 
+// get away with this, even if you put a mutex around the write. In these
+// cases you need read/write barriers.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_SYNC_H
+#define EATHREAD_EATHREAD_SYNC_H
+
+
+// Note
+// These functions are not placed in a C++ namespace but instead are standalone.
+// The reason for this is that these are usually implemented as #defines of 
+// C or asm code or implemented as compiler intrinsics. We however document
+// these functions here as if they are simply functions. The actual platform-
+// specific declarations are in the appropriate platform-specific directory.
+
+#include <EABase/eabase.h>
+#include <eathread/internal/config.h>
+
+#if !EA_THREADS_AVAILABLE
+	// Do nothing.
+#elif defined(EA_PROCESSOR_X86)
+	#include <eathread/x86/eathread_sync_x86.h>
+#elif defined(EA_PROCESSOR_X86_64)
+	#include <eathread/x86-64/eathread_sync_x86-64.h>
+#elif defined(EA_PROCESSOR_IA64)
+	#include <eathread/ia64/eathread_sync_ia64.h>
+#elif defined(EA_PLATFORM_APPLE)
+	#include <eathread/apple/eathread_sync_apple.h>
+#elif defined(EA_PROCESSOR_ARM) 
+	#include <eathread/arm/eathread_sync_arm.h>
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+// EA_THREAD_DO_SPIN
+//     
+// Provides a macro which maps to whatever processor idle functionality the given platform requires.
+// 
+// Example usage:
+//     EA_THREAD_DO_SPIN();
+// 
+#ifndef EA_THREAD_DO_SPIN
+	#ifdef EA_THREAD_COOPERATIVE  
+		 #define EA_THREAD_DO_SPIN() ThreadSleep()               
+	#else
+		 #define EA_THREAD_DO_SPIN() EAProcessorPause() // We don't check for EA_TARGET_SMP here and instead sleep if not defined because you probably shouldn't be using a spinlock on a pre-emptive system unless it is a multi-processing system.     
+	#endif
+#endif
+
+
+
+// The above header files would define EA_THREAD_SYNC_IMPLEMENTED.
+#if !defined(EA_THREAD_SYNC_IMPLEMENTED)
+   // Perhaps it should be considered too serious of an error to allow compilation 
+   // to continue. If so, then we should enable the #error below.
+   // #error EA_THREAD_SYNC_IMPLEMENTED not defined. 
+
+
+   /// EAProcessorPause
+   ///
+   /// \Declaration
+   ///    void EAProcessorPause();
+   ///
+   /// \Description
+   ///    This statement causes the processor to efficiently (as much as possible)
+   ///    execute a no-op (a.k.a nop or noop). These are particularly useful in 
+   ///    spin-wait loops. Without a proper pause, some processors suffer severe
+   ///    performance penalties while executing spin-wait loops such as those in 
+   ///    simple spin locks. Many processors have specialized pause instructions 
+   ///    (e.g. Intel x86 P4 'pause' or 'asm rep nop') that can be taken advantage 
+   ///    of here.
+   ///
+   /// \Example
+   ///    while (!flag) {
+   ///       EAProcessorPause();
+   ///    }
+   #define EAProcessorPause()
+
+
+
+   /// EAReadBarrier
+   ///
+   /// \Declaration
+   ///    void EAReadBarrier();
+   ///
+   /// \Description
+   ///    A read barrier ensures that neither software nor hardware perform a memory 
+   ///    read prior to the read barrier and that recent writes to main memory are 
+   ///    immediately seen (and not using stale cached data) by the processor executing
+   ///    the read barrier. This generally does not mean a (performance draining) 
+   ///    invalidation of the entire cache but does possibly mean invalidating any cache 
+   ///    that refers to main memory which has changed. Thus, there is a performance 
+   ///    cost but considering the use of this operation, this is the most efficient 
+   ///    way of achieving the effect.
+   ///
+   /// \Example
+   ///    The following function will operate fine on some multiprocessing systems but 
+   ///    hang (possibly indefinitely) on other multiprocessing systems unless the 
+   ///    EAReadBarrier call is present.
+   ///
+   ///    void ThreadFunction() {
+   ///      extern volatile int gFlag;
+   ///      while(gFlag == 0){ // Wait for separate thread to write to gSomeFlag.
+   ///         EAProcessorPause(); 
+   ///         EAReadBarrier();
+   ///         // Do memory sharing operations with other threads here.
+   ///      }
+   ///    }
+   #define EAReadBarrier()
+
+
+
+
+
+   /// EAWriteBarrier
+   ///
+   /// \Declaration
+   ///    void EAWriteBarrier();
+   ///
+   /// \Description
+   ///    A write barrier ensures that neither software nor hardware delay a memory 
+   ///    write operation past the barrier. If you want your memory write committed
+   ///    to main memory immediately, this statement will have that effect. As such,
+   ///    this is something like a flush of the current processor's write cache.
+   ///    Note that flushing memory from a processor's cache to main memory like this
+   ///    doesn't cause a second processor to immediately see the changed values in 
+   ///    main memory, as the second processor has a read cache between it and main 
+   ///    memory. Thus, a second processor would need to execute a read barrier if it
+   ///    wants to see the updates immediately.
+   #define EAWriteBarrier()
+
+
+
+
+
+   /// EAReadWriteBarrier
+   ///
+   /// Declaration
+   ///    void EAReadWriteBarrier();
+   ///
+   /// Description
+   ///    A read/write barrier has the same effect as both a read barrier and a write
+   ///    barrier at once. A read barrier ensures that neither software nor hardware 
+   ///    perform a memory read prior to the read barrier, while a write barrier 
+   ///    ensures that neither software nor hardware delay a memory write operation 
+   ///    past the barrier. A ReadWriteBarrier specifically acts like a WriteBarrier
+   ///    followed by a ReadBarrier, despite the name ReadWriteBarrier being the 
+   ///    other way around.
+   ///
+   ///    EAReadWriteBarrier synchronizes both reads and writes to system memory 
+   ///    between processors and their caches on multiprocessor systems, particulary 
+   ///    SMP systems. This can be useful to ensure the state of global variables at 
+   ///    a particular point in your code for multithreaded applications. Higher level
+   ///    thread synchronization level primitives such as mutexes achieve the same 
+   ///    effect (while providing the additional functionality of synchronizing code
+   ///    execution) but at a significantly higher cost. 
+   ///
+   ///    A two-processor SMP system has two processors, each with its own instruction
+   ///    and data caches. If the first processor writes to a memory location and the 
+   ///    second processor needs to read from that location, the first procesor's 
+   ///    write may still be in its cache and not committed to main memory and the 
+   ///    second processor may thus would not see the newly written value. The value
+   ///    will eventually get written from the first cache to main memory, but if you 
+   ///    need to ensure that it is written at a particular time, you would use a 
+   ///    ReadWrite barrier. 
+   ///
+   ///    This function is similar to the Linux kernel rwb() function and to the 
+   ///    Windows kernel KeMemoryBarrier function.
+   #define EAReadWriteBarrier()
+
+
+
+
+
+   /// EACompilerMemoryBarrier
+   ///
+   /// \Declaration
+   ///    void EACompilerMemoryBarrier();
+   ///
+   /// \Description
+   ///    Provides a barrier for compiler optimization. The compiler will not make
+   ///    assumptions about locations across an EACompilerMemoryBarrier statement.
+   ///    For example, if a compiler has memory values temporarily cached in 
+   ///    registers but you need them to be written to memory, you can execute the
+   ///    EACompilerMemoryBarrier statement. This is somewhat similar in concept to 
+   ///    the C volatile keyword except that it applies to all memory the compiler
+   ///    is currently working with and applies its effect only where you specify
+   ///    and not for every usage as with the volatile keyword. 
+   ///
+   ///    Under GCC, this statement is equivalent to the GCC `asm volatile("":::"memory")` 
+   ///    statement. Under VC++, this is equivalent to a _ReadWriteBarrier statement  
+   ///    (not to be confused with EAReadWriteBarrier above) and equivalent to the Windows
+   ///    kernel function KeMemoryBarrierWithoutFence. This is also known as barrier()
+   ///    undef Linux. 
+   ///    
+   ///    EACompilerMemoryBarrier is a compiler-level statement and not a 
+   ///    processor-level statement. For processor-level memory barriers, 
+   ///    use EAReadBarrier, etc.
+   /// 
+   /// \Example
+   ///    Without the compiler memory barrier below, an optimizing compiler might
+   ///    never assign 0 to gValue because gValue is reassigned to 1 later and 
+   ///    because gValue is not declared volatile.
+   ///
+   ///    void ThreadFunction() {
+   ///       extern int gValue; // Note that gValue is intentionally not declared volatile, 
+   ///       gValue = 0;
+   ///       EACompilerMemoryBarrier();
+   ///       gValue = 1;
+   ///    }
+   #define EACompilerMemoryBarrier()
+
+
+#endif // EA_THREAD_SYNC_IMPLEMENTED
+
+
+#endif // #ifdef EATHREAD_EATHREAD_SYNC_H
+
+
+
+
+
+
+
+
@@ -0,0 +1,802 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EATHREAD_EATHREAD_THREAD_H
+#define EATHREAD_EATHREAD_THREAD_H
+
+#include <eathread/eathread.h>
+#include <eathread/eathread_semaphore.h>
+#include <eathread/eathread_atomic.h>
+EA_DISABLE_ALL_VC_WARNINGS()
+#include <stddef.h>
+#include <stdlib.h>
+#include <type_traits>
+EA_RESTORE_ALL_VC_WARNINGS()
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+#if defined(EA_DLL) && defined(_MSC_VER)
+	// Suppress warning about class 'AtomicInt32' needs to have a
+	// dll-interface to be used by clients of class which have a templated member.
+	// 
+	// These templates cannot be instantiated outside of the DLL. If you try, a
+	// link error will result. This compiler warning is intended to notify users
+	// of this.
+	#pragma warning(push)
+	#pragma warning(disable: 4251)
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////
+/// ThreadData
+///
+/// This is used internally by class Thread.
+/// To consider: Move this declaration into a platform-specific 
+/// header file.
+/////////////////////////////////////////////////////////////////////////
+
+#if !EA_THREADS_AVAILABLE
+
+	struct EAThreadDynamicData
+	{
+	};
+
+	struct EAThreadData
+	{
+		EAThreadDynamicData* mpData;
+	};
+
+#elif EA_USE_CPP11_CONCURRENCY
+	#include <eathread/eathread_mutex.h>
+	#include <eathread/eathread_semaphore.h>
+
+	EA_DISABLE_VC_WARNING(4062 4265 4365 4836 4571 4625 4626 4628 4193 4127 4548 4350)
+	#if EA_PLATFORM_WINDOWS
+		#include <ctxtcall.h> // workaround for compile errors in winrt.  see http://connect.microsoft.com/VisualStudio/feedback/details/730564/ppl-in-winrt-projects-fail-to-compile
+	#endif
+	#include <future>
+	#include <mutex>
+
+	struct EAThreadDynamicData
+	{
+		typedef void (*ThreadFunc)(EAThreadDynamicData* tdd, void* userFunc, void* userContext, void* userWrapperFunc);
+		EAThreadDynamicData(EA::Thread::ThreadUniqueId uniqueThreadId, const char* pThreadName);
+		EAThreadDynamicData(void* userFunc, void* userContext, void* userWrapperFunc, ThreadFunc threadFunc);
+		~EAThreadDynamicData();
+
+		void AddRef();
+		void Release();
+
+		EA::Thread::AtomicInt32 mnRefCount;
+		EA::Thread::AtomicInt32 mStatus;
+		intptr_t mReturnValue;
+		char mName[EATHREAD_NAME_SIZE];
+		void* mpStackBase; 
+		EA::Thread::ThreadAffinityMask      mnThreadAffinityMask; 
+		
+		EA::Thread::ThreadUniqueId mUniqueThreadId;
+		struct EAThreadComposite
+		{
+			EAThreadComposite()
+			: mReturnPromise()
+			, mReturnFuture(mReturnPromise.get_future())
+			, mGetStatusFuture(mReturnFuture)
+			{
+			}
+
+			std::promise<intptr_t> mReturnPromise;
+			std::shared_future<intptr_t> mReturnFuture;
+			std::shared_future<intptr_t> mGetStatusFuture;
+			std::thread mThread;
+		} *mpComp;
+
+	private:
+		// Disable copy and assignment
+		EAThreadDynamicData(const EAThreadDynamicData&);
+		EAThreadDynamicData operator=(const EAThreadDynamicData&);
+	};
+
+	struct EAThreadData 
+	{
+		EAThreadDynamicData* mpData;
+	};
+
+	EA_RESTORE_VC_WARNING()
+
+// TODO:  collapse the defines.
+#elif defined(EA_PLATFORM_SONY)
+	#include <eathread/eathread_mutex.h>
+	#include <eathread/eathread_semaphore.h>
+	#include <kernel.h>
+	#include <scebase.h>
+
+	// Internal queue wrapper which is used to allow for a higher resolution sleep than what is provided by Sony's sleep functions
+	// as despite the names, sceKernelSleep, sceKernelUSleep and sceKernelNanosleep are all 1 ms resolution whereas this timer is 100 microseconds
+	struct EAThreadTimerQueue
+	{
+		EAThreadTimerQueue()
+		{
+			int result = sceKernelCreateEqueue(&mTimerEventQueue, "EAThread Timer Queue");
+			mbEnabled = result == SCE_OK;
+
+			// A timer queue will fail to be created when there are too many kernel objects open.  It is a valid
+			// use-case for the Event Queue to fail being created as the ThreadSleep function implements a fallback.
+			//
+			// EAT_ASSERT_FORMATTED(mbEnabled, "Failed to initialize the EAThread Timer Queue (0x%x)", result);
+		}
+
+		~EAThreadTimerQueue()
+		{
+			if(mbEnabled)  // only destroy the queue if it was created.
+				sceKernelDeleteEqueue(mTimerEventQueue);
+				
+			mbEnabled = false;
+		}
+
+		SceKernelEqueue mTimerEventQueue;
+		EA::Thread::AtomicUint32 mCurrentId = 0;
+		bool mbEnabled = false;
+	};
+
+	struct EAThreadDynamicData
+	{
+		EAThreadDynamicData();
+	   ~EAThreadDynamicData();
+
+		void  AddRef();
+		void  Release();
+
+		EA::Thread::ThreadId			mThreadId;
+		EA::Thread::SysThreadId			mSysThreadId;
+		pid_t							mThreadPid;                     // For Linux this is the thread ID from gettid(). Otherwise it's the getpid() value.
+		volatile int					mnStatus;
+		intptr_t						mnReturnValue;
+		void*							mpStartContext[2];
+		void*							mpBeginThreadUserWrapper;       // User-specified BeginThread function wrapper or class wrapper
+		void*							mpStackBase; 
+		EA::Thread::AtomicInt32			mnRefCount;
+		char							mName[EATHREAD_NAME_SIZE];
+		int								mStartupProcessor;              // The thread affinity for the thread to set itself to after it starts. We need to do this because we currently have no way to set the affinity of another thread until after it has started.
+		EA::Thread::Mutex				mRunMutex;                      // Locked while the thread is running. The reason for this mutex is that it allows timeouts to be specified in the WaitForEnd function.
+		EA::Thread::Semaphore			mStartedSemaphore;              // Signaled when the thread starts. This allows us to know in a thread-safe way when the thread has actually started executing.
+		EA::Thread::ThreadAffinityMask  mnThreadAffinityMask; 
+		EAThreadTimerQueue				mThreadTimerQueue;				// This queue allows for high resolution timer events to be submitted per thread allowing for better sleep resolution than Sony's provided sleep functions
+	};
+
+
+	struct EAThreadData{
+		EAThreadDynamicData* mpData;
+	};
+
+#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+	#include <pthread.h>
+	#include <eathread/eathread_mutex.h>
+	#include <eathread/eathread_semaphore.h>
+
+	struct EAThreadDynamicData
+	{
+		EAThreadDynamicData();
+	   ~EAThreadDynamicData();
+
+		void  AddRef();
+		void  Release();
+
+		EA::Thread::ThreadId    mThreadId;
+		EA::Thread::SysThreadId mSysThreadId;
+		pid_t                   mThreadPid;                     // For Linux this is the thread ID from gettid(). Otherwise it's the getpid() value.
+		volatile int            mnStatus;
+		intptr_t                mnReturnValue;
+		void*                   mpStartContext[2];
+		void*                   mpBeginThreadUserWrapper;       // User-specified BeginThread function wrapper or class wrapper
+		void*                   mpStackBase; 
+		EA::Thread::AtomicInt32 mnRefCount;
+		char                    mName[EATHREAD_NAME_SIZE];
+		int                     mStartupProcessor;              // DEPRECATED:  The thread affinity for the thread to set itself to after it starts. We need to do this because we currently have no way to set the affinity of another thread until after it has started.
+		EA::Thread::ThreadAffinityMask      mnThreadAffinityMask; // mStartupProcessor is deprecated in favor of using the the mnThreadAffinityMask and doesn't suffer from the limitations of only specifying the value at thread startup time.
+		EA::Thread::Mutex       mRunMutex;                      // Locked while the thread is running. The reason for this mutex is that it allows timeouts to be specified in the WaitForEnd function.
+		EA::Thread::Semaphore   mStartedSemaphore;              // Signaled when the thread starts. This allows us to know in a thread-safe way when the thread has actually started executing.
+	};
+
+
+	struct EAThreadData
+	{
+		EAThreadDynamicData* mpData;
+	};
+
+#elif defined(EA_PLATFORM_MICROSOFT) && !EA_POSIX_THREADS_AVAILABLE
+
+	struct EAThreadDynamicData
+	{
+		EAThreadDynamicData();
+	   ~EAThreadDynamicData();
+		void    AddRef();
+		void    Release();
+
+		EA::Thread::ThreadId                mhThread;
+		unsigned int                        mnThreadId;                     // EA::Thread::SysThreadId
+		int                                 mnStatus;
+		EA::Thread::ThreadAffinityMask      mnThreadAffinityMask;
+		intptr_t                            mnReturnValue;
+		void*                               mpStartContext[3];
+		void*                               mpBeginThreadUserWrapper;     // User-specified BeginThread function wrapper or class wrapper
+		void*                               mpStackBase; 
+		EA::Thread::AtomicInt32             mnRefCount;
+		char                                mName[EATHREAD_NAME_SIZE];
+	};
+
+
+	struct EAThreadData
+	{
+		EAThreadDynamicData* mpData;
+	};
+
+#endif
+
+namespace EA
+{
+namespace Thread
+{
+
+struct EATHREADLIB_API ThreadEnumData
+{
+	ThreadEnumData();
+	~ThreadEnumData();
+
+	EAThreadDynamicData* mpThreadDynamicData;
+	void Release();
+};
+
+} 
+}
+/////////////////////////////////////////////////////////////////////////
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// FindThreadDynamicData
+		/// Utility functionality, not needed for most uses.
+		EATHREADLIB_API EAThreadDynamicData* FindThreadDynamicData(ThreadId threadId);
+		EATHREADLIB_API EAThreadDynamicData* FindThreadDynamicData(SysThreadId threadId);
+		
+		/// EnumerateThreads
+		/// Enumerates known threads. For some platforms the returned thread list is limited
+		/// to the main thread and threads created by EAThread.
+		/// Returns the required count to enumerate all threads.
+		/// Fills in thread data up till the supplied capacity.
+		///
+		/// Example usage:
+		///     ThreadEnumData enumData[32];
+		///     size_t count = EA::Thread::EnumerateThreads(enumData, EAArrayCount(enumData));
+		///
+		///     for(size_t i = 0; i < count; i++)
+		///     {
+		///         printf("Thread id: %s\n", EAThreadIdToString(enumData[i].mpThreadDynamicData->mThreadId));
+		///         enumData[i].Release();
+		///     }
+		size_t EATHREADLIB_API EnumerateThreads(ThreadEnumData* pDataArray, size_t dataArrayCapacity);
+
+		/// RunnableFunction
+		/// Defines the prototype of a standalone thread function.
+		/// The return value is of type intptr_t, which is a standard integral 
+		/// data type that is large enough to hold an int or void*.
+		typedef intptr_t (*RunnableFunction)(void* pContext);
+
+		/// IRunnable
+		/// Defines a class whose Run function executes in a separate thread.
+		/// An implementation of this interface can be run using a Thread class instance.
+		struct EATHREADLIB_API IRunnable
+		{
+			 virtual ~IRunnable() { }
+
+			 /// \brief Task run entry point
+			 /// The thread terminates when this method returns. 
+			 /// The return value is of type intptr_t, which is a standard integral 
+			 /// data type that is large enough to hold an int or void*.
+			 virtual intptr_t Run(void* pContext = NULL) = 0;
+		};
+
+		/// RunnableFunctionUserWrapper
+		/// Defines the prototype of a user callback function when thread function is started.
+		/// \param pContext: thread start context void* passed in from thread Thread::Begin() 
+		/// \param defaultRunnableFunction: default function Thread::Begin() normally would
+		///          call, user must call this function with passed in pContext.
+		///
+		/// Here's an example:
+		/// \code
+		/// int ThreadFunction(void*)
+		/// {
+		///      printf("Throw NULL pointer Exception.\n");
+		///      char* pTest = NULL;
+		///      *pTest = 1;
+		///      return 0;
+		/// }
+		/// 
+		/// intptr_t MyThreadBeginWrapper(RunnableFunction defaultRunnableFunction, void* pContext)
+		/// {
+		///      // Do pre-start thread function stuff
+		///      try {
+		///            // must call defaultRunnableFunction to execute thread function, if don't then
+		///            // thread function will never gets executed.
+		///            intptr_t retValue = defaultRunnableFunction(pContext);
+		///      }
+		///      catch(...) {
+		///            printf("Exception detected.\n");
+		///      }
+		///     
+		///      // do post-start thread function stuff
+		///      return retValue;
+		/// }
+		/// \endcode
+		///
+		/// In your thread begin() function:
+		/// \code
+		/// ...
+		/// threadIds = threads.Begin(ThreadFunction, NULL, NULL, MyThreadBeginWrapper);
+		/// ...
+		/// \endcode
+		typedef intptr_t (*RunnableFunctionUserWrapper)(RunnableFunction defaultRunnableFunction, void* pContext);
+
+
+		/// RunnableClassUserWrapper
+		/// Defines the prototype of a user callback function when thread function is started.
+		/// \param pContext: thread start context void* passed in from thread Thread::Begin() 
+		/// \param defaultRunnableFunction: default function Thread::Begin() normally would
+		///          call, user must call this function with passed in pContext.
+		/// 
+		/// Here's an example:
+		/// \code
+		/// class MyThreadClass
+		/// {
+		///      virtual intptr_t Run(void* pContext = NULL)
+		///      {
+		///            printf("Throw NULL pointer Exception.\n");
+		///            char* pTest = NULL;
+		///            *pTest = 1;
+		///            return 0;
+		///      }
+		/// }
+		/// 
+		/// intptr_t MyThreadBeginWrapper(IRunnable defaultRunnableFunction, void* pContext)
+		/// {
+		///      // do pre-start thread function stuff
+		///
+		///      // a good example is try catch block
+		///      try
+		///      {
+		///            // must call defaultRunnableFunction to execute thread function, if don't then
+		///            // thread function will never gets executed.
+		///            intptr_t retValue = defaultRunnableFunction->Run(pContext);
+		///      }
+		///      catch(...)
+		///      {
+		///            printf("Exception detected.\n");
+		///      }
+		///     
+		///      // do post-start thread function stuff
+		///      return retValue;
+		/// }
+		/// \endcode
+		///
+		/// In your thread begin() function:
+		///
+		/// \code 
+		/// ...
+		/// MyThreadClass myThreadClass = new MyThreadClass();
+		/// threadIds = threads.Begin(&myThreadClass, NULL, NULL, MyThreadBeginWrapper);
+		/// ...
+		/// \endcode
+		typedef intptr_t (*RunnableClassUserWrapper)(IRunnable* defaultRunnableClass, void* pContext);
+
+		 
+		/// ThreadParameters
+		/// Used for specifying thread starting parameters. Note that we do not 
+		/// include a 'start paused' parameter. The reason for this is that such 
+		/// a thing is not portable and other mechanisms can achieve the same 
+		/// effect. Thread pause/resume in general is considered bad practice.
+		struct EATHREADLIB_API ThreadParameters
+		{
+			void*       mpStack;                                       /// Pointer to stack memory. This would be the low address of the memory. A NULL value means to create a default stack. Default is NULL. Note that some platforms (such as Windows) don't support a user-supplied stack.
+			size_t      mnStackSize;                                   /// Size of the stack memory. Default is variable, depending on the platform.
+			int         mnPriority;                                    /// Value in the range of [kThreadPriorityMin, kThreadPriorityMax]. Default is kThreadPriorityDefault.
+			int         mnProcessor;                                   /// 0-based index of which processor to run the thread on. A value of -1 means to use default. Default is -1. See SetThreadProcessor for caveats regarding this value.
+			const char* mpName;                                        /// A name to give to the thread. Useful for identifying threads in a descriptive way.
+			EA::Thread::ThreadAffinityMask mnAffinityMask;             /// A bitmask representing the cores that the thread is allowed to run on.  NOTE:  This affinity mask is only applied when mnProcessor is set to kProcessorAny.
+			bool        mbDisablePriorityBoost;                        /// Whether the system should override the default behavior of boosting the thread priority as they come out of a wait state (currently only supported on Windows).
+
+			ThreadParameters();
+		};
+
+
+
+		/// Thread
+		/// 
+		/// Note that we do not provide thread suspend and resume functions.
+		/// The reason for this is that such things are inherently unsafe as 
+		/// you usually cannot know where the thread is executing when the 
+		/// suspension occurs. The safe alternative is to use signal or 
+		/// semaphore primitives to achieve the same thing in a safe way.
+		///
+		/// For performance reasons, the thread creation functions of this 
+		/// class are themselves not thread-safe. Thus if you want to call
+		/// the Begin functions for an instance of this class from multiple
+		/// threads, you will need to synchronize access to the begin 
+		/// functions yourself.
+		class EATHREADLIB_API Thread
+		{
+		public:
+			enum Status
+			{
+				kStatusNone,    /// The thread has neither started nor ended.
+				kStatusRunning, /// The thread has started but not ended.
+				kStatusEnded    /// The thread has both started and ended.
+			};
+
+			/// Thread
+			/// \brief Thread constructor.
+			Thread();
+
+			/// Thread
+			/// \brief Thread copy constructor.
+			Thread(const Thread& t);
+
+			/// Thread
+			/// \brief Thread destructor. The destructor does not take any 
+			/// action on the thread associated with it. Any threads created
+			/// by this class will continue to run and exit normally after 
+			/// this destructor has executed.
+		   ~Thread();
+
+			/// operator=
+			/// \brief Thread assignment operator.
+			Thread& operator=(const Thread& t);
+
+			/// \brief Return global RunnableFunctionUserWrapper set by user.
+			/// \return function pointer to RunnableFunctionUserWrapper function user
+			/// set, if NULL, nothing is set.
+			/// \sa RunnableFunctionUserWrapper
+			static RunnableFunctionUserWrapper GetGlobalRunnableFunctionUserWrapper();
+
+			/// \brief Set global RunnableFunctionUserWrapper.  This can only be
+			/// set once in the application life time.
+			/// \param pUserWrapper user specified wrapper function pointer.
+			/// \sa RunnableFunctionUserWrapper
+			static void SetGlobalRunnableFunctionUserWrapper(RunnableFunctionUserWrapper pUserWrapper);
+
+			/// \brief Return global RunnableClassUserWrapper set by user.
+			/// \return function pointer to RunnableClassUserWrapper function user
+			/// set, if NULL, nothing is set.
+			/// \sa RunnableClassUserWrapper
+			static RunnableClassUserWrapper GetGlobalRunnableClassUserWrapper();
+
+			/// \brief Set global RunnableClassUserWrapper.  This can only be
+			/// set once in the application life time.
+			/// \sa RunnableClassUserWrapper
+			static void SetGlobalRunnableClassUserWrapper(RunnableClassUserWrapper pUserWrapper);
+
+			/// Begin
+			/// \brief Starts a thread via a RunnableFunction.
+			/// Returns the thread id of the newly running thread.
+			/// The pContext argument is passed to the RunnableFunction and serves
+			/// to allow the caller to pass information to the thread. 
+			/// The pThreadParameters argument allows the caller to specify additional
+			/// information about how to start the thread. If this parameter is NULL, 
+			/// then default settings will be chosen.
+			/// The Begin function itself is not thread-safe. While this Thread class
+			/// can be used to Begin multiple threads, the Begin function itself cannot
+			/// safely be executed by multiple threads at a time. This is by design and
+			/// allows for a simpler more efficient library.
+			/// User can have their own RunnableFunction wrapper by specifying one in
+			/// pUserWrapper.  When pUserWrapper is used, pUserWrapper will get called
+			/// first, then pUserWrapper function can do whatever is desired before the
+			/// just-created thread's entry point is called.
+			/// \sa RunnableFunctionUserWrapper
+			ThreadId Begin(RunnableFunction pFunction, void* pContext = NULL, const ThreadParameters* pThreadParameters = NULL, RunnableFunctionUserWrapper pUserWrapper = GetGlobalRunnableFunctionUserWrapper());
+
+			/// Begin
+			/// Starts a thread via an object of the IRunnable interface.
+			/// Returns the thread id of the newly running thread.
+			/// The pContext argument is passed to the RunnableFunction and serves
+			/// to allow the caller to pass information to the thread. 
+			/// The pThreadParameters argument allows the caller to specify additional
+			/// information about how to start the thread. If this parameter is NULL, 
+			/// then default settings will be chosen.
+			/// The Begin function itself is not thread-safe. While this Thread class
+			/// can be used to Begin multiple threads, the Begin function itself cannot
+			/// safely be executed by multiple threads at a time. This is by design and
+			/// allows for a simpler more efficient library.
+			/// User can have their own RunnableClass wrapper by specifying one pUserWrapper.
+			/// When pUserWrapper is used, pUserWrapper will get called first, then
+			/// pUserWrapper function can do whatever is desired before the just-created
+			/// thread's entry point is called.
+			/// \sa RunnableClassUserWrapper
+			ThreadId Begin(IRunnable* pRunnable, void* pContext = NULL, const ThreadParameters* pThreadParameters = NULL, RunnableClassUserWrapper pUserWrapper = GetGlobalRunnableClassUserWrapper());
+
+			/// WaitForEnd
+			/// Waits for the thread associated with an object of this class
+			/// to end. Returns one of enum Status to indicate the status upon
+			/// return of this call.
+			/// This function is similar to the Posix pthread_join function and
+			/// the Windows WaitForSingleObject function.
+			/// If input pThreadReturnValue is non-NULL, it will be filled in with
+			/// the return value of the thread.
+			/// This function must be called only by a single thread at a time.
+			/// The resulting behaviour is undefined if multiple threads call this function.
+			///
+			/// Note that the timeout is specified in absolute time and not relative time.
+			///
+			/// Note also that due to the way thread scheduling works -- particularly in a
+			/// time-sliced threading environment -- that the timeout value is a hint and 
+			/// the actual amount of time passed before the timeout occurs may be significantly
+			/// more or less than the specified timeout time.
+			///
+			Status WaitForEnd(const ThreadTime& timeoutAbsolute = kTimeoutNone, intptr_t* pThreadReturnValue = NULL);
+
+			/// GetStatus
+			/// Returns one of enum GetStatus. Note that in the most general sense
+			/// the running status may change if the thread quit right after 
+			/// this call was made. But this function is useful if you know that
+			/// a function was running and you want to poll for its status while
+			/// waiting for it to exit.
+			/// If input pThreadReturnValue is non-NULL, it will be filled in with
+			/// the return value of the thread if the Status is kStatusEnded.
+			/// If the Status is not kStatusEnded, pThreadReturnValue will be ignored.
+			Status GetStatus(intptr_t* pThreadReturnValue = NULL) const;
+
+			/// GetId
+			/// Gets the Id of the thread associated with an object of this class.
+			/// This Id is unique throughout the system. This function returns a 
+			/// value that under Posix threads would be synonymous with pthread_t
+			/// and under Windows would be synonymous with a thread HANDLE (and not 
+			/// a Windows thread id).
+			ThreadId GetId() const;
+
+			/// GetPriority
+			/// Gets the priority of the thread. Return kThreadPriorityUnknown if 
+			/// the thread associated with this class isn't running. If a thread 
+			/// wants to get its own priority, it can use this class member or it 
+			/// can simply use the global SetThreadPriority function and not need 
+			/// an instance of this class. If you want to manipulate the thread 
+			/// priority via the native platform interface, you can use GetId to 
+			/// get the platform-specific identifier and use that value with native APIs.
+			///
+			/// This function can return any int except for kThreadPriorityUnknown, as the 
+			/// current thread's priority will always be knowable. A return value of kThreadPriorityDefault
+			/// means that this thread is of normal (a.k.a. default) priority.
+			/// See the documentation for thread priority constants (e.g. kThreadPriorityDefault) 
+			/// for more information about thread priority values and behaviour.
+			int GetPriority() const;
+
+			/// SetPriority
+			/// Sets the priority of the thread. Returns false if the thread associated
+			/// with this class isn't running. If a thread wants to set its own priority,
+			/// it can use this class member or it can simply use the global SetThreadPriority
+			/// function and not need an instance of this class. If you want to manipulate  
+			/// the thread priority via the native platform interface, you can use GetId to 
+			/// get the platform-specific identifier and use that value with native APIs.
+			///
+			/// Accepts any integer priority value except kThreadPriorityUnknown.
+			/// On some platforms, this function will automatically convert any invalid 
+			/// priority for that particular platform to a valid one.  A normal (a.k.a. default) thread 
+			/// priority is identified by kThreadPriorityDefault.
+			///
+			/// You can set the priority of a Thread object only if it has already begun.
+			/// You can also set the priority with the Begin function via the ThreadParameters 
+			/// argument to Begin. This design is so in order to simply the implementation, 
+			/// but being able to set ThreadParameters before Begin is something that can
+			/// be considered in the future.
+			bool SetPriority(int priority);
+
+			/// SetProcessor
+			/// Sets the processor the given thread should run on. Valid values 
+			/// are kThreadProcessorDefault, kThreadProcessorAny, or a processor
+			/// index in the range of [0, processor count). If the input value
+			/// is >= the processor count, it will be reduced to be a modulo of
+			/// the processor count. Any other invalid value will cause the processor
+			/// to be set to zero.
+			/// 
+			/// For some platforms you can set the processor of a Thread object only if it 
+			/// has already begun.
+			///
+			/// You can also set the processor with the Begin function via the ThreadParameters 
+			/// argument to Begin. This design is so in order to simply the implementation, 
+			/// but being able to set ThreadParameters before Begin is something that can
+			/// be considered in the future. This is the most reliable way to set the thread
+			/// processor, as it works on all platforms. 
+			void SetProcessor(int nProcessor);
+
+			/// Wake
+			/// Wakes up a sleeping thread if it is sleeping. This necessarily can only
+			/// be called from a thread other than the sleeping thread. You must be careful
+			/// to not rely on this function as a synchronization primitive. For example,
+			/// in the general case you cannot be sure that after calling Wake that the 
+			/// thread will be awake, as it is possible that right after you called Wake
+			/// the thread immediately went back to sleep before you could do anything.
+			/// Nevertheless, this function is useful in waking up a thread from a 
+			/// (potentially long) sleep so that it can examine data, lock a synchronization
+			/// primitive, or simply exit. 
+			///
+			/// Note that this class has no member Sleep function. The reason is that a 
+			/// thread can only put itself to sleep and cannot put other threads to sleep.
+			/// The thread should use the static Sleep function to put itself to sleep.
+			void Wake();
+
+			/// GetName
+			/// Returns the name of the thread assigned by the SetName function.
+			/// If the thread was not named by the SetName function, then the name is empty ("").
+			const char* GetName() const;
+			
+			/// SetName
+			/// Sets a descriptive name or the thread. On some platforms this name is passed
+			/// on to the debugging tools so they can see this name. The name length, including
+			/// a terminating 0 char, is limited to EATHREAD_NAME_SIZE characters. Any characters
+			/// beyond that are ignored.
+			/// 
+			/// You can set the name of a Thread object only if it has already begun.
+			/// You can also set the name with the Begin function via the ThreadParameters 
+			/// argument to Begin. This design is so in order to simply the implementation, 
+			/// but being able to set ThreadParameters before Begin is something that can
+			/// be considered in the future.
+			///
+			/// Some platforms (e.g. Linux) have the restriction this function works propertly only
+			/// when called by the same thread that you want to name. Given this situation,
+			/// the most portable way to use this SetName function is to either always call
+			/// it from the thread to be named or to use the ThreadParameters to give the 
+			/// thread a name before it is started and let the started thread name itself.
+			void SetName(const char* pName);
+
+			/// SetAffinityMask
+			/// Sets an affinity mask for the thread.  On some platforms, this OS feature is
+			/// not supported.  In this situation, you are at the mercy of the OS thread scheduler.
+			/// 
+			/// Example(s):
+			/// "00000100" -> thread is pinned to processor 2
+			/// "01010100" -> thread is pinned to processor 2, 4, and 6.
+			void SetAffinityMask(ThreadAffinityMask mnAffinityMask);
+
+			/// GetAffinityMask
+			/// Returns the affinity mask for this specific thread.
+			ThreadAffinityMask GetAffinityMask();
+
+			/// SetDefaultProcessor
+			/// Sets the default processor to create threads with. To specify the processor
+			/// for a running thread, use SetProcessor() or specify the processor in the
+			/// thread creation ThreadParameters.  
+			/// 
+			/// If nProcessor is set to kProcessorAny, EAThread will automatically determine  
+			/// which processor to launch threads to.
+			///
+			/// Please refer to SetProcessor for valid values for the nProcessor argument.
+			static void SetDefaultProcessor(int nProcessor) 
+			  { sDefaultProcessor = nProcessor; }
+
+
+			/// GetDefaultProcessor
+			/// Gets the default processor to create threads with.
+			static int GetDefaultProcessor()
+				{ return sDefaultProcessor; }
+
+
+			/// SetDefaultProcessorMask
+			/// Sets which processors created threads should be explicitly run on. 
+			/// The default value is 0xffffffffffffffff.
+			/// Each bit refers to the associated processor. A mask of 0xffffffffffffffff
+			/// means to allow running on any processor, and on desktop platforms such
+			/// as Windows it means that the OS decides what processor to use on its own.
+			/// Not all platforms support this functionality, even if multiple processors are present.
+			static void SetDefaultProcessorMask(uint64_t mask)
+				{ sDefaultProcessorMask.SetValue(mask); }
+
+
+			/// GetDefaultProcessorMask
+			/// Returns the mask set by SetDefaultProcessorMask.
+			static uint64_t GetDefaultProcessorMask()
+				{ return sDefaultProcessorMask.GetValue(); }
+
+
+			/// GetPlatformData
+			/// Returns platform-specific data for this thread for debugging uses or 
+			/// other cases whereby special (and non-portable) uses are required.
+			/// The value returned is a struct of type EAThreadData.
+			void* GetPlatformData()
+				{ return &mThreadData; }
+
+		protected:
+			static RunnableFunctionUserWrapper sGlobalRunnableFunctionUserWrapper;
+			static RunnableClassUserWrapper    sGlobalRunnableClassUserWrapper;
+			static EA::Thread::AtomicInt32     sDefaultProcessor;
+			static EA::Thread::AtomicUint64    sDefaultProcessorMask;
+			EAThreadData                       mThreadData;
+		};
+
+
+		/// ThreadFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class Thread.
+		/// A primary use of this would be to allow the Thread implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		class EATHREADLIB_API ThreadFactory
+		{
+		public:
+			static Thread* CreateThread();                  // Internally implemented as: return new Thread;
+			static void    DestroyThread(Thread* pThread);  // Internally implemented as: delete pThread;
+
+			static size_t  GetThreadSize();                 // Internally implemented as: return sizeof(Thread);
+			static Thread* ConstructThread(void* pMemory);  // Internally implemented as: return new(pMemory) Thread;
+			static void    DestructThread(Thread* pThread); // Internally implemented as: pThread->~Thread();
+		};
+
+
+		/// MakeThread
+		///
+		/// Simplify creating threads with lambdas
+		///
+		template <typename F>
+		auto MakeThread(F&& f, const EA::Thread::ThreadParameters& params = EA::Thread::ThreadParameters())
+		{
+			typedef std::decay_t<F> decayed_f_t;
+
+			auto get_memory = [] 
+			{
+				const auto sz = sizeof(decayed_f_t);
+				auto* pAllocator = EA::Thread::GetAllocator();
+
+				if(pAllocator)
+					return pAllocator->Alloc(sz);
+				else
+					return malloc(sz);
+			};
+
+			auto thread_enty = [](void* pMemory) -> intptr_t
+			{
+				auto free_memory = [](void* p)
+				{
+					auto* pAllocator = EA::Thread::GetAllocator();
+					if(pAllocator)
+						return pAllocator->Free(p);
+					else
+						return free(p);
+				};
+
+				auto* pF = reinterpret_cast<decayed_f_t*>(pMemory);
+				(*pF)();
+				pF->~decayed_f_t();
+				free_memory(pF);
+				return 0;
+			};
+
+			EA::Thread::Thread thread;
+			thread.Begin(thread_enty, new(get_memory()) decayed_f_t(std::forward<F>(f)), &params);  // deleted in the thread entry function
+			return thread;
+		}
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+#if defined(EA_DLL) && defined(_MSC_VER)
+	// re-enable warning 4251 (it's a level-1 warning and should not be suppressed globally)
+	#pragma warning(pop)
+#endif
+
+
+#endif // EATHREAD_EATHREAD_THREAD_H
+
+
+
+
+
+
@@ -0,0 +1,190 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+#ifndef EATHREAD_GCC_EATHREAD_ATOMIC_GCC_H
+#define EATHREAD_GCC_EATHREAD_ATOMIC_GCC_H
+
+#include <EABase/eabase.h>
+#include <stddef.h>
+#include <eathread/internal/eathread_atomic_standalone.h>
+
+#define EA_THREAD_ATOMIC_IMPLEMENTED
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class AtomicInt
+		/// Actual implementation may vary per platform. May require certain alignments, sizes, 
+		/// and declaration specifications per platform.
+		template <class T>
+		class AtomicInt
+		{
+		public:
+			typedef AtomicInt<T> ThisType;
+			typedef T            ValueType;
+
+			/// AtomicInt
+			/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+			/// This is done so that an AtomicInt acts like a standard built-in integer.
+			AtomicInt()
+				{}
+
+			AtomicInt(ValueType n) 
+				{ SetValue(n); }
+
+			AtomicInt(const ThisType& x)
+				: mValue(x.GetValue()) {}
+
+			AtomicInt& operator=(const ThisType& x)
+				{ mValue = x.GetValue(); return *this; }
+
+			ValueType GetValue() const
+				{ return mValue; }
+
+			ValueType GetValueRaw() const
+				{ return mValue; }
+
+			ValueType SetValue(ValueType n);
+			bool      SetValueConditional(ValueType n, ValueType condition);
+			ValueType Increment();
+			ValueType Decrement();
+			ValueType Add(ValueType n);
+
+			// operators
+			inline            operator const ValueType() const { return GetValue(); }
+			inline ValueType  operator =(ValueType n)          {        SetValue(n); return n; }
+			inline ValueType  operator+=(ValueType n)          { return Add(n);}
+			inline ValueType  operator-=(ValueType n)          { return Add(-n);}
+			inline ValueType  operator++()                     { return Increment();}
+			inline ValueType  operator++(int)                  { return Increment() - 1;}
+			inline ValueType  operator--()                     { return Decrement(); }
+			inline ValueType  operator--(int)                  { return Decrement() + 1;}
+
+		protected:
+			volatile ValueType mValue;
+		};
+
+
+		// Recent versions of GCC have atomic primitives built into the compiler and standard library.
+		#if defined(EA_COMPILER_CLANG) || defined(__APPLE__) || (defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 403)) || defined(EA_COMPILER_RVCT) // GCC 4.3 or later. Depends on the GCC implementation.
+
+			template <> inline
+			AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::GetValue() const
+				{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+			template <> inline
+			AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::GetValue() const
+				{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+			template <> inline
+			AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+				{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+			template <> inline
+			AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+				{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+			template <> inline
+			bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+				{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+			template <> inline
+			bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+				{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+			template <> inline
+			AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+				{ return __sync_add_and_fetch(&mValue, 1); }
+
+			template <> inline
+			AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+				{ return __sync_add_and_fetch(&mValue, 1); }
+
+			template <> inline
+			AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+				{ return __sync_sub_and_fetch(&mValue, 1); }
+
+			template <> inline
+			AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+				{ return __sync_sub_and_fetch(&mValue, 1); }
+
+			template <> inline
+			AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+				{ return __sync_add_and_fetch(&mValue, n); }
+
+			template <> inline
+			AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+				{ return __sync_add_and_fetch(&mValue, n); }
+
+			template <> inline
+			AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const
+				{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+			template <> inline
+			AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const
+				{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+			template <> inline
+			AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n)
+				{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+			template <> inline
+			AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n)
+				{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+			template <> inline
+			bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition)
+				{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+			template <> inline
+			bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition)
+				{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+			template <> inline
+			AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment()
+				{ return __sync_add_and_fetch(&mValue, 1); }
+
+			template <> inline
+			AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment()
+				{ return __sync_add_and_fetch(&mValue, 1); }
+
+			template <> inline
+			AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement()
+				{ return __sync_sub_and_fetch(&mValue, 1); }
+
+			template <> inline
+			AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement()
+				{ return __sync_sub_and_fetch(&mValue, 1); }
+
+			template <> inline
+			AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n)
+				{ return __sync_add_and_fetch(&mValue, n); }
+
+			template <> inline
+			AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n)
+				{ return __sync_add_and_fetch(&mValue, n); }
+		#endif
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+#endif // EATHREAD_GCC_EATHREAD_ATOMIC_GCC_H
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,73 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Functionality related to memory and code generation synchronization.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_GCC_EATHREAD_SYNC_GCC_H
+#define EATHREAD_GCC_EATHREAD_SYNC_GCC_H
+
+
+#include <EABase/eabase.h>
+
+
+#define EA_THREAD_SYNC_IMPLEMENTED
+
+
+// EAProcessorPause
+// Intel has defined a 'pause' instruction for x86 processors starting with the P4, though this simply
+// maps to the otherwise undocumented 'rep nop' instruction. This pause instruction is important for 
+// high performance spinning, as otherwise a high performance penalty incurs. 
+
+#if defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)
+	#define EAProcessorPause() __asm__ __volatile__ ("rep ; nop")
+#else
+	#define EAProcessorPause()
+#endif
+
+
+
+// EAReadBarrier / EAWriteBarrier / EAReadWriteBarrier
+// The x86 processor memory architecture ensures read and write consistency on both single and
+// multi processing systems. This makes programming simpler but limits maximimum system performance.
+// We define EAReadBarrier here to be the same as EACompilerMemory barrier in order to limit the 
+// compiler from making any assumptions at its level about memory usage. Year 2003+ versions of the 
+// Microsoft SDK define a 'MemoryBarrier' statement which has the same effect as EAReadWriteBarrier.
+
+#if (((__GNUC__ * 100) + __GNUC_MINOR__) >= 401) // GCC 4.1 or later
+	#define EAReadBarrier      __sync_synchronize
+	#define EAWriteBarrier     __sync_synchronize
+	#define EAReadWriteBarrier __sync_synchronize
+#else
+	#define EAReadBarrier      EACompilerMemoryBarrier
+	#define EAWriteBarrier     EACompilerMemoryBarrier
+	#define EAReadWriteBarrier EACompilerMemoryBarrier
+#endif
+
+
+// EACompilerMemoryBarrier
+
+#if defined(EA_PROCESSOR_ARM) || defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)
+	#define EACompilerMemoryBarrier() __asm__ __volatile__ ("":::"memory")
+#else
+	#define EACompilerMemoryBarrier()
+#endif
+
+
+
+#endif // EATHREAD_GCC_EATHREAD_SYNC_GCC_H
+
+
+
+
+
+
+
+
@@ -0,0 +1,29 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EATHREAD_INTERNAL_ATOMIC_H
+#define EATHREAD_INTERNAL_ATOMIC_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+namespace EA
+{
+	namespace Thread
+	{
+		typedef int64_t(*AtomicAdd64Function)(volatile int64_t *ptr, int64_t value);
+		typedef int64_t(*AtomicGetValue64Function)(volatile int64_t *ptr);
+		typedef int64_t(*AtomicSetValue64Function)(volatile int64_t *ptr, int64_t value);
+		typedef bool(*AtomicSetValueConditional64Function)(volatile int64_t *ptr, int64_t value, int64_t condition);
+
+
+		extern AtomicAdd64Function AtomicAdd64;
+		extern AtomicGetValue64Function AtomicGetValue64;
+		extern AtomicSetValue64Function AtomicSetValue64;
+		extern AtomicSetValueConditional64Function AtomicSetValueConditional64;
+	}
+}
+
+#endif
@@ -0,0 +1,638 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_INTERNAL_CONFIG_H
+#define EATHREAD_INTERNAL_CONFIG_H
+
+
+#include <EABase/eabase.h>
+
+EA_DISABLE_VC_WARNING(4574)
+#include <stddef.h>
+EA_RESTORE_VC_WARNING()
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_VERSION
+//
+// We more or less follow the conventional EA packaging approach to versioning 
+// here. A primary distinction here is that minor versions are defined as two
+// digit entities (e.g. .03") instead of minimal digit entities ".3"). The logic
+// here is that the value is a counter and not a floating point fraction.
+// Note that the major version doesn't have leading zeros.
+//
+// Example version strings:
+//      "0.91.00"   // Major version 0, minor version 91, patch version 0. 
+//      "1.00.00"   // Major version 1, minor and patch version 0.
+//      "3.10.02"   // Major version 3, minor version 10, patch version 02.
+//     "12.03.01"   // Major version 12, minor version 03, patch version 
+//
+// Example usage:
+//     printf("EATHREAD_VERSION version: %s", EATHREAD_VERSION);
+//     printf("EATHREAD_VERSION version: %d.%d.%d", EATHREAD_VERSION_N / 10000 % 100, EATHREAD_VERSION_N / 100 % 100, EATHREAD_VERSION_N % 100);
+//
+#ifndef EATHREAD_VERSION
+	#define EATHREAD_VERSION   "1.32.09"
+	#define EATHREAD_VERSION_N  13209
+
+	// Older style version info
+	#define EATHREAD_VERSION_MAJOR (EATHREAD_VERSION_N / 100 / 100 % 100)
+	#define EATHREAD_VERSION_MINOR (EATHREAD_VERSION_N       / 100 % 100)
+	#define EATHREAD_VERSION_PATCH (EATHREAD_VERSION_N             % 100)
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// _GNU_SOURCE
+//
+// Defined or not defined.
+// If this is defined then GlibC extension functionality is enabled during 
+// calls to glibc header files.
+//
+#if !defined(_GNU_SOURCE)
+	#define _GNU_SOURCE
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_TLS_COUNT
+//
+// Defined as compile-time constant integer > 0.
+//
+#if !defined(EATHREAD_TLS_COUNT)
+	#define EATHREAD_TLS_COUNT 16
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EA_THREADS_AVAILABLE
+//
+// Defined as 0 or 1
+// Defines if threading is supported on the given platform.
+// If 0 then the EAThread implementation is not capable of creating threads,
+// but other facilities (e.g. mutex) work in a non-thread-aware way.
+//
+#ifndef EA_THREADS_AVAILABLE
+	#define EA_THREADS_AVAILABLE 1
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EA_USE_CPP11_CONCURRENCY
+//
+// Defined as 0 or 1
+//
+#ifndef EA_USE_CPP11_CONCURRENCY
+	#if defined(EA_PLATFORM_WINDOWS) && !EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP) 
+		#define EA_USE_CPP11_CONCURRENCY 1
+	#else
+		#define EA_USE_CPP11_CONCURRENCY 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EA_USE_COMMON_ATOMICINT_IMPLEMENTATION
+//
+// Use the common EAThread AtomicInt implementation on all platforms.
+//
+// Defined as 0 or 1
+//
+#ifndef EA_USE_COMMON_ATOMICINT_IMPLEMENTATION
+	#define EA_USE_COMMON_ATOMICINT_IMPLEMENTATION 1
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EA_POSIX_THREADS_AVAILABLE
+//
+// Defined as 0 or 1
+//
+#ifndef EA_POSIX_THREADS_AVAILABLE
+	#if defined(__unix__) || defined(__linux__) || defined(__APPLE__) 
+		#define EA_POSIX_THREADS_AVAILABLE 1
+	#elif defined(EA_PLATFORM_SONY)
+	   #define EA_POSIX_THREADS_AVAILABLE 0  // POSIX threading API is present but use is discouraged by Sony.  They want shipping code to use their scePthreads* API.
+	#else
+		#define EA_POSIX_THREADS_AVAILABLE 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EAT_ASSERT_ENABLED
+//
+// Defined as 0 or 1, default is 1 if EA_DEBUG or _DEBUG is defined.
+// If defined as 1, then assertion failures are reported via EA::Thread::AssertionFailure(). 
+// 
+#ifndef EAT_ASSERT_ENABLED
+	#if defined(EA_DEBUG) || defined(_DEBUG)
+		#define EAT_ASSERT_ENABLED 1
+	#else
+		#define EAT_ASSERT_ENABLED 0
+	#endif
+#endif
+
+
+
+#if EAT_ASSERT_ENABLED
+	#define EAT_ASSERT(expression) \
+		EA_DISABLE_VC_WARNING(4127) \
+		do { \
+			EA_ANALYSIS_ASSUME(expression); \
+			if (!(expression) ) \
+				EA::Thread::AssertionFailure(__FILE__ "(" EA_STRINGIFY(__LINE__) "): " #expression); \
+		} while(0) \
+		EA_RESTORE_VC_WARNING()
+#else
+	#define EAT_ASSERT(expression)
+#endif
+
+#if EAT_ASSERT_ENABLED
+	#define EAT_ASSERT_MSG(expression, msg) \
+		EA_DISABLE_VC_WARNING(4127) \
+		do { \
+			EA_ANALYSIS_ASSUME(expression); \
+			if (!(expression) ) \
+				EA::Thread::AssertionFailure(msg); \
+		} while(0) \
+		EA_RESTORE_VC_WARNING()
+#else
+	#define EAT_ASSERT_MSG(expression, msg)
+#endif
+
+#if EAT_ASSERT_ENABLED
+	#define EAT_ASSERT_FORMATTED(expression, pFormat, ...) \
+		EA_DISABLE_VC_WARNING(4127) \
+		do { \
+			EA_ANALYSIS_ASSUME(expression); \
+			if (!(expression) ) \
+				EA::Thread::AssertionFailureV(pFormat, __VA_ARGS__); \
+		} while(0) \
+		EA_RESTORE_VC_WARNING()
+#else
+	#define EAT_ASSERT_FORMATTED(expression, pFormat, ...)
+#endif
+
+#if EAT_ASSERT_ENABLED
+	#define EAT_FAIL_MSG(msg) (EA::Thread::AssertionFailure(msg))
+#else
+	#define EAT_FAIL_MSG(msg)
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EAT_COMPILETIME_ASSERT   
+//
+// Compile-time assertion for this module.
+// C-like declaration:
+//    void EAT_COMPILETIME_ASSERT(bool bExpression);
+//
+#if !defined(EAT_COMPILETIME_ASSERT)
+	#define EAT_COMPILETIME_ASSERT(expression) static_assert(expression, EA_STRINGIFY(expression))
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_TLSALLOC_DTOR_ENABLED
+//
+// Defined as 0 or 1. Default is 1.
+// Defines if the TLSAlloc class destructor frees the TLS thread handle.
+// This won't make a difference unless you were using EAThread in a DLL and 
+// you were repeatedly loading and unloading DLLs.
+// See eathread_pc.cpp for usage of this and more info about the situation.
+//
+#ifndef EATHREAD_TLSALLOC_DTOR_ENABLED
+	#define EATHREAD_TLSALLOC_DTOR_ENABLED 1
+#endif
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_LIKELY / EATHREAD_UNLIKELY
+//
+// Defined as a macro which gives a hint to the compiler for branch
+// prediction. GCC gives you the ability to manually give a hint to 
+// the compiler about the result of a comparison, though it's often
+// best to compile shipping code with profiling feedback under both
+// GCC (-fprofile-arcs) and VC++ (/LTCG:PGO, etc.). However, there 
+// are times when you feel very sure that a boolean expression will
+// usually evaluate to either true or false and can help the compiler
+// by using an explicity directive...
+//
+// Example usage:
+//     if(EATHREAD_LIKELY(a == 0)) // Tell the compiler that a will usually equal 0.
+//         { ... }
+//
+// Example usage:
+//     if(EATHREAD_UNLIKELY(a == 0)) // Tell the compiler that a will usually not equal 0.
+//         { ... }
+//
+#ifndef EATHREAD_LIKELY
+	#define EATHREAD_LIKELY(x) EA_LIKELY(x)
+	#define EATHREAD_UNLIKELY(x) EA_UNLIKELY(x)
+#endif
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_NAMING
+//
+// Defined as 0, 1 (enabled), or 2 (enabled only when debugger is present). 
+// 
+#define EATHREAD_NAMING_DISABLED 0
+#define EATHREAD_NAMING_ENABLED  1
+#define EATHREAD_NAMING_OPTIONAL 2
+
+#ifndef EATHREAD_NAMING
+	#if defined(EA_SHIP) || defined(EA_FINAL) // These are two de-facto standard EA defines for identifying a shipping build.
+		#define EATHREAD_NAMING 0
+	#else
+		#define EATHREAD_NAMING EATHREAD_NAMING_ENABLED // or EATHREAD_NAMING_OPTIONAL? 
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_NAME_SIZE
+//
+// Specifies the max size to support for naming threads.
+// This value can be changed as desired.
+//
+#ifndef EATHREAD_NAME_SIZE
+	#if defined(EA_PLATFORM_WINDOWS) || defined(EA_PLATFORM_UNIX)
+		#define EATHREAD_NAME_SIZE 64
+	#else
+		#define EATHREAD_NAME_SIZE 32
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EA_XBDM_ENABLED
+//
+// Defined as 0 or 1, with 1 being the default for debug builds.
+// This controls whether xbdm library usage is enabled on XBox 360. This library
+// allows for runtime debug functionality. But shipping applications are not
+// allowed to use xbdm. 
+//
+#if !defined(EA_XBDM_ENABLED)
+	#if defined(EA_DEBUG)
+		#define EA_XBDM_ENABLED 1
+	#else
+		#define EA_XBDM_ENABLED 0
+	#endif
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_DLL
+//
+// Defined as 0 or 1. The default is dependent on the definition of EA_DLL.
+// If EA_DLL is defined, then EATHREAD_DLL is 1, else EATHREAD_DLL is 0.
+// EA_DLL is a define that controls DLL builds within the EAConfig build system. 
+// EATHREAD_DLL controls whether EATHREAD_VERSION is built and used as a DLL. 
+// Normally you wouldn't do such a thing, but there are use cases for such
+// a thing, particularly in the case of embedding C++ into C# applications.
+//
+#ifndef EATHREAD_DLL
+	#if defined(EA_DLL)
+		#define EATHREAD_DLL 1
+	#else
+		#define EATHREAD_DLL 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREADLIB_API
+//
+// This is used to label functions as DLL exports under Microsoft platforms.
+// If EA_DLL is defined, then the user is building EAThread as a DLL and EAThread's
+// non-templated functions will be exported. EAThread template functions are not
+// labelled as EATHREADLIB_API (and are thus not exported in a DLL build). This is 
+// because it's not possible (or at least unsafe) to implement inline templated 
+// functions in a DLL.
+//
+// Example usage of EATHREADLIB_API:
+//    EATHREADLIB_API int someVariable = 10;         // Export someVariable in a DLL build.
+//
+//    struct EATHREADLIB_API SomeClass{              // Export SomeClass and its member functions in a DLL build.
+//        EATHREADLIB_LOCAL void PrivateMethod();    // Not exported.
+//    };
+//
+//    EATHREADLIB_API void SomeFunction();           // Export SomeFunction in a DLL build.
+//
+// For GCC, see http://gcc.gnu.org/wiki/Visibility
+//
+#ifndef EATHREADLIB_API // If the build file hasn't already defined this to be dllexport...
+	#if EATHREAD_DLL 
+		#if defined(_MSC_VER)
+			#define EATHREADLIB_API      __declspec(dllimport)
+			#define EATHREADLIB_LOCAL
+		#elif defined(__CYGWIN__)
+			#define EATHREADLIB_API      __attribute__((dllimport))
+			#define EATHREADLIB_LOCAL
+		#elif (defined(__GNUC__) && (__GNUC__ >= 4))
+			#define EATHREADLIB_API      __attribute__ ((visibility("default")))
+			#define EATHREADLIB_LOCAL    __attribute__ ((visibility("hidden")))
+		#else
+			#define EATHREADLIB_API
+			#define EATHREADLIB_LOCAL
+		#endif
+	#else
+		#define EATHREADLIB_API
+		#define EATHREADLIB_LOCAL
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_ALLOC_PREFIX
+//
+// Defined as a string literal. Defaults to this package's name.
+// Can be overridden by the user by predefining it or by editing this file.
+// This define is used as the default name used by this package for naming
+// memory allocations and memory allocators.
+//
+// All allocations names follow the same naming pattern:
+//     <package>/<module>[/<specific usage>]
+// 
+// Example usage:
+//     void* p = pCoreAllocator->Alloc(37, EATHREAD_ALLOC_PREFIX, 0);
+//
+// Example usage:
+//     gMessageServer.GetMessageQueue().get_allocator().set_name(EATHREAD_ALLOC_PREFIX "MessageSystem/Queue");
+//
+#ifndef EATHREAD_ALLOC_PREFIX
+	#define EATHREAD_ALLOC_PREFIX "EAThread/"
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_USE_STANDARD_NEW
+//
+// Defines whether we use the basic standard operator new or the named
+// extended version of operator new, as per the EASTL package.
+//
+#ifndef EATHREAD_USE_STANDARD_NEW
+	#if EATHREAD_DLL  // A DLL must provide its own implementation of new, so we just use built-in new.
+		#define EATHREAD_USE_STANDARD_NEW 1
+	#else
+		#define EATHREAD_USE_STANDARD_NEW 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_NEW
+//
+// This is merely a wrapper for operator new which can be overridden and 
+// which has debug/release forms.
+//
+// Example usage:
+//    SomeClass* pObject = EATHREAD_NEW("SomeClass") SomeClass(1, 2, 3);
+//
+#ifndef EATHREAD_NEW
+	#if EATHREAD_USE_STANDARD_NEW
+			#define EATHREAD_NEW(name)                            new
+			#define EATHREAD_NEW_ALIGNED(alignment, offset, name) new
+			#define EATHREAD_DELETE                               delete
+	#else
+		#if defined(EA_DEBUG)
+			#define EATHREAD_NEW(name)                            new(name, 0, 0, __FILE__, __LINE__)
+			#define EATHREAD_NEW_ALIGNED(alignment, offset, name) new(alignment, offset, name, 0, 0, __FILE__, __LINE__)
+			#define EATHREAD_DELETE                               delete
+		#else
+			#define EATHREAD_NEW(name)                            new(name, 0, 0, 0, 0)
+			#define EATHREAD_NEW_ALIGNED(alignment, offset, name) new(alignment, offset, name, 0, 0, 0, 0)
+			#define EATHREAD_DELETE                               delete
+		#endif
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_HAS_EMULATED_AND_NATIVE_ATOMICS
+//
+// This symbol is defined if a platform has both native and emulated atomics.
+// Currently the only platform that requires this is iOS as earlier versions 
+// of the operating system (ie: iOS 3) do not provide OS support for 64-bit
+// atomics while later versions (ie: iOS 4/5) do.
+#ifndef EATHREAD_HAS_EMULATED_AND_NATIVE_ATOMICS
+	#if defined(__APPLE__)
+		#define EATHREAD_HAS_EMULATED_AND_NATIVE_ATOMICS 1 
+	#else
+		#define EATHREAD_HAS_EMULATED_AND_NATIVE_ATOMICS 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_GLIBC_BACKTRACE_AVAILABLE
+//
+// You generally need to be using GCC, GLIBC, and Linux for backtrace to be available.
+// And even then it's available only some of the time.
+//
+#if !defined(EATHREAD_GLIBC_BACKTRACE_AVAILABLE)
+	#if (defined(__clang__) || defined(__GNUC__)) && (defined(EA_PLATFORM_LINUX) || defined(__APPLE__)) && !defined(__CYGWIN__) && !defined(EA_PLATFORM_ANDROID)
+		#define EATHREAD_GLIBC_BACKTRACE_AVAILABLE 1
+	#else
+		#define EATHREAD_GLIBC_BACKTRACE_AVAILABLE 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_GLIBC_VERSION
+//
+// We provide our own GLIBC numeric version to determine when system library 
+// calls are available.
+//
+#if defined(__GLIBC__)
+	#define EATHREAD_GLIBC_VERSION ((__GLIBC__ * 1000) + __GLIBC_MINOR__) 
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_GETCALLSTACK_SUPPORTED
+//
+// Defined as 0 or 1.
+// Identifies whether runtime callstack unwinding (i.e. GetCallstack()) is 
+// supported for the given platform. In some cases it may be that unwinding 
+// support code is present but it hasn't been tested for reliability and may
+// have bugs preventing it from working properly. In some cases (e.g. x86) 
+// it may be that optimized builds make it difficult to read the callstack 
+// reliably, despite that we flag the platform as supported.
+//
+#if !defined(EATHREAD_GETCALLSTACK_SUPPORTED)
+	#if EATHREAD_GLIBC_BACKTRACE_AVAILABLE          // Typically this means Linux on x86.
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 1
+	#elif defined(EA_PLATFORM_IPHONE)
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 1
+	#elif defined(EA_PLATFORM_ANDROID)
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 1
+	#elif defined(EA_PLATFORM_IPHONE_SIMULATOR)
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 1
+	#elif defined(EA_PLATFORM_WINDOWS_PHONE) && defined(EA_PROCESSOR_ARM)       
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 0
+	#elif defined(EA_PLATFORM_MICROSOFT)
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 1
+	#elif defined(EA_PLATFORM_LINUX)
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 1
+	#elif defined(EA_PLATFORM_OSX)
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 1
+	#elif defined(EA_PLATFORM_SONY)
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 1
+	#elif defined(EA_PLATFORM_CYGWIN)               // Support hasn't been verified.
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 0
+	#elif defined(EA_PLATFORM_MINGW)                // Support hasn't been verified.
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_DEBUG_DETAIL_ENABLED
+//
+// Defined as 0 or 1. 
+// If true then detailed debug info is displayed. Can be enabled in opt builds.
+//
+#ifndef EATHREAD_DEBUG_DETAIL_ENABLED
+	#define EATHREAD_DEBUG_DETAIL_ENABLED 0
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_MIN_ABSOLUTE_TIME
+//
+// Defined as a time in milliseconds. 
+// Locks and waits allow the user to specify an absolute timeout time. In order
+// to detect that the user accidentally specified a relative time, we define a
+// minimum allowed absolute time which we assert on. This minimum time is one
+// that in practice is impossible to be a future absolute time.
+//
+#ifndef EATHREAD_MIN_ABSOLUTE_TIME
+	#define EATHREAD_MIN_ABSOLUTE_TIME  10000
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_THREAD_AFFINITY_MASK_SUPPORTED
+//
+// Defined as 0 or 1. 
+// If true then the platform supports a user specified thread affinity mask.
+//
+#ifndef EATHREAD_THREAD_AFFINITY_MASK_SUPPORTED
+	#if   defined(EA_PLATFORM_XBOXONE)
+		#define EATHREAD_THREAD_AFFINITY_MASK_SUPPORTED 1
+	#elif defined(EA_PLATFORM_SONY)
+		#define EATHREAD_THREAD_AFFINITY_MASK_SUPPORTED 1
+	#elif defined(EA_USE_CPP11_CONCURRENCY) && EA_USE_CPP11_CONCURRENCY
+		// CPP11 doesn't not provided a mechanism to set thread affinities.
+		#define EATHREAD_THREAD_AFFINITY_MASK_SUPPORTED 0
+	#elif defined(EA_PLATFORM_ANDROID) || defined(EA_PLATFORM_APPLE) || defined(EA_PLATFORM_UNIX)
+		#define EATHREAD_THREAD_AFFINITY_MASK_SUPPORTED 0
+	#else
+		#define EATHREAD_THREAD_AFFINITY_MASK_SUPPORTED 1
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_GLOBAL_VARIABLE_DLL_SAFETY
+//
+// Defined as 0 or 1. 
+// 
+//
+#ifndef EATHREAD_GLOBAL_VARIABLE_DLL_SAFETY
+	#define EATHREAD_GLOBAL_VARIABLE_DLL_SAFETY 0
+#endif
+
+
+	
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_SCEDBG_ENABLED 
+//
+// Defined as 0 or 1. 
+// Informs EAThread if Sony Debug libraries are available for us. 
+//
+#ifndef EATHREAD_SCEDBG_ENABLED 
+	#ifndef EA_SCEDBG_ENABLED
+		#define EATHREAD_SCEDBG_ENABLED 0
+	#else
+		#define EATHREAD_SCEDBG_ENABLED  EA_SCEDBG_ENABLED
+	#endif 
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_DEBUG_BREAK
+//
+#ifndef EATHREAD_DEBUG_BREAK
+	#ifdef __MSC_VER
+		#define EATHREAD_DEBUG_BREAK() __debugbreak()
+	#else
+		#define EATHREAD_DEBUG_BREAK() *(volatile int*)(0) = 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_C11_ATOMICS_AVAILABLE
+//
+#ifndef EATHREAD_C11_ATOMICS_AVAILABLE
+	#if (defined(EA_ANDROID_SDK_LEVEL) && (EA_ANDROID_SDK_LEVEL >= 21))  
+		#define EATHREAD_C11_ATOMICS_AVAILABLE 1
+	#else
+		#define EATHREAD_C11_ATOMICS_AVAILABLE 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_ALIGNMENT_CHECK
+//
+
+namespace EA {
+namespace Thread {
+namespace detail {
+	// Used to assert that memory accesses on x86-64 are atomic when "naturally" aligned to the size of registers.
+	template <typename T>
+	inline bool IsNaturallyAligned(T* p)
+	{
+		return ((uintptr_t)p & (sizeof(EA_PLATFORM_WORD_SIZE) - 1)) == 0;
+	}
+}}}
+
+#ifndef EATHREAD_ALIGNMENT_CHECK
+	#define EATHREAD_ALIGNMENT_CHECK(address) EAT_ASSERT_MSG(EA::Thread::detail::IsNaturallyAligned(address), "address is not naturally aligned.")	
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_APPLE_GETMODULEINFO_ENABLED 
+//
+// This functionality has been migrated to EACallstack.  We provide a preprocessor switch for backwards compatibility
+// until the code path is removed completely in a future release.
+//
+// Defined as 0 or 1. 
+//
+#ifndef EATHREAD_APPLE_GETMODULEINFO_ENABLED 
+	#define EATHREAD_APPLE_GETMODULEINFO_ENABLED 0
+#endif
+
+
+
+#endif // Header include guard
+
+
+
@@ -0,0 +1,15 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EATHREAD_DLLINFO_H
+#define EATHREAD_DLLINFO_H
+
+
+#include <eathread/internal/config.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+#endif
@@ -0,0 +1,143 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// eathread_atomic.h
+//
+// Defines functionality for thread-safe primitive operations.
+// 
+// EAThread atomics do NOT imply the use of read/write barriers.  This is 
+// partly due to historical reasons and partly due to EAThread's internal 
+// code being optimized for not using barriers.
+//
+// In future, we are considering migrating the atomics interface which  
+// defaults atomics to use full read/write barriers while allowing users
+// to opt-out of full barrier usage.  The new C++11 interface already provides
+// similar interfaces.
+//
+// http://en.cppreference.com/w/cpp/atomic/memory_order
+//
+// Created by Rob Parolin
+/////////////////////////////////////////////////////////////////////////////
+
+#ifndef EATHREAD_INTERNAL_EATHREAD_ATOMIC_H
+#define EATHREAD_INTERNAL_EATHREAD_ATOMIC_H
+
+#include <EABase/eabase.h>
+#include <eathread/internal/config.h>
+#include <eathread/internal/eathread_atomic_standalone.h>
+#include <atomic>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+#define EA_THREAD_ATOMIC_IMPLEMENTED
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class AtomicInt
+		///
+		/// Implements thread-safe access to an integer and primary operations on that integer.
+		/// AtomicIntegers are commonly used as lightweight flags and signals between threads
+		/// or as the synchronization object for spinlocks. Those familiar with the Win32 API
+		/// will find that AtomicInt32 is essentially a platform independent interface to 
+		/// the Win32 InterlockedXXX family of functions. Those familiar with Linux may 
+		/// find that AtomicInt32 is essentially a platform independent interface to atomic_t 
+		/// functionality.
+		///
+		/// Note that the reference implementation defined here is itself not thread-safe.
+		/// A thread-safe version requires platform-specific code.
+		///
+		/// Example usage
+		///     AtomicInt32 i = 0;
+		///
+		///     ++i;
+		///     i--;
+		///     i += 7;
+		///     i -= 3;
+		///     i = 2;
+		///     
+		///     int x = i.GetValue();
+		///     i.Increment();
+		///     bool oldValueWas6 = i.SetValueConditional(3, 6);
+		///     i.Add(4);
+		///
+		template <class T>
+		class AtomicInt
+		{
+		public:
+			typedef AtomicInt<T> ThisType;
+			typedef T ValueType;
+
+			/// AtomicInt
+			/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+			/// This is done so that an AtomicInt acts like a standard built-in integer.
+			AtomicInt()
+				{}
+
+			AtomicInt(ValueType n) 
+				{ SetValue(n); }
+
+			AtomicInt(const ThisType& x) 
+				{ SetValue(x.GetValue()); }
+
+			AtomicInt& operator=(const ThisType& x)
+				{ SetValue(x.GetValue()); return *this; }
+
+			ValueType GetValue() const 
+				{ return mValue.load(); }
+
+			ValueType GetValueRaw() const
+				{ return mValue; }
+
+			ValueType SetValue(ValueType n)
+				{ return mValue.exchange(n); }
+
+			bool SetValueConditional(ValueType n, ValueType condition)
+				{ return mValue.compare_exchange_strong(condition, n); }
+
+			ValueType Increment()
+				{ return mValue.operator++(); }
+
+			ValueType Decrement()
+				{ return mValue.operator--(); }
+
+			ValueType Add(ValueType n)
+				{ return mValue.fetch_add(n) + n; }
+
+			// operators
+			inline            operator const ValueType() const { return GetValue(); }
+			inline ValueType  operator =(ValueType n)          { return mValue.operator=(n); }
+			inline ValueType  operator+=(ValueType n)          { return mValue.operator+=(n); }
+			inline ValueType  operator-=(ValueType n)          { return mValue.operator-=(n); }
+			inline ValueType  operator++()                     { return mValue.operator++(); }
+			inline ValueType  operator++(int)                  { return mValue.operator++(0); }
+			inline ValueType  operator--()                     { return mValue.operator--(); }
+			inline ValueType  operator--(int)                  { return mValue.operator--(0); }
+
+		protected:
+			std::atomic<ValueType> mValue;
+		};
+
+	} // namespace Thread
+} // namespace EA
+
+
+#endif // EATHREAD_INTERNAL_EATHREAD_ATOMIC_H
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,36 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/// Standalone atomic functions
+/// These act the same as the class functions below.
+/// The T return values are the previous value, except for the
+/// AtomicFetchSwap function which returns the swapped out value.
+///
+/// T    AtomicGetValue(volatile T*);
+/// T    AtomicGetValue(const volatile T*);
+/// void AtomicSetValue(volatile T*, T value);
+/// T    AtomicFetchIncrement(volatile T*);
+/// T    AtomicFetchDecrement(volatile T*);
+/// T    AtomicFetchAdd(volatile T*, T value);
+/// T    AtomicFetchSub(volatile T*, T value);
+/// T    AtomicFetchOr(volatile T*, T value);
+/// T    AtomicFetchAnd(volatile T*, T value);
+/// T    AtomicFetchXor(volatile T*, T value);
+/// T    AtomicFetchSwap(volatile T*, T value);
+/// T    AtomicFetchSwapConditional(volatile T*, T value, T condition);
+/// bool AtomicSetValueConditional(volatile T*, T value, T condition);
+
+#if defined(EA_COMPILER_MSVC)
+	#include <eathread/internal/eathread_atomic_standalone_msvc.h>
+#elif defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
+	#include <eathread/internal/eathread_atomic_standalone_gcc.h>
+#else
+	#error unsupported platform
+#endif
+
+
@@ -0,0 +1,199 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+namespace EA
+{
+namespace Thread
+{
+
+// TODO(rparolin): Consider use of clang builtin __sync_swap.
+// https://clang.llvm.org/docs/LanguageExtensions.html#sync-swap
+
+// TODO(rparolin):  Consider use of C11 atomics
+// https://clang.llvm.org/docs/LanguageExtensions.html#c11-atomic-builtins
+
+namespace detail
+{
+	template<class T>
+	inline T AtomicGetValue(volatile T* ptr);
+} // namespace detail
+
+// int
+inline int AtomicGetValue(volatile int* ptr) { return detail::AtomicGetValue(ptr); }
+inline int AtomicGetValue(const volatile int* ptr) { return AtomicGetValue(const_cast<volatile int*>(ptr)); }
+inline int AtomicSetValue(volatile int* dest, int value) { return __sync_lock_test_and_set(dest, value); }
+inline int AtomicFetchIncrement(volatile int* dest) { return __sync_fetch_and_add(dest, int(1)); }
+inline int AtomicFetchDecrement(volatile int* dest) { return __sync_fetch_and_add(dest, int(-1)); }
+inline int AtomicFetchAdd(volatile int* dest, int value) { return __sync_fetch_and_add(dest, value); }
+inline int AtomicFetchSub(volatile int* dest, int value) { return __sync_fetch_and_sub(dest, value); }
+inline int AtomicFetchOr(volatile int* dest, int value) { return __sync_fetch_and_or(dest, value); }
+inline int AtomicFetchAnd(volatile int* dest, int value) { return __sync_fetch_and_and(dest, value); }
+inline int AtomicFetchXor(volatile int* dest, int value) { return __sync_fetch_and_xor(dest, value); }
+inline int AtomicFetchSwap(volatile int* dest, int value) { return __sync_lock_test_and_set(dest, value); }
+inline int AtomicFetchSwapConditional(volatile int* dest, int value, int condition) { return __sync_val_compare_and_swap(dest, condition, value); }
+inline bool AtomicSetValueConditional(volatile int* dest, int value, int condition) { return __sync_bool_compare_and_swap(dest, condition, value); }
+
+// unsigned int
+inline unsigned int AtomicGetValue(volatile unsigned int* ptr) { return detail::AtomicGetValue(ptr); }
+inline unsigned int AtomicGetValue(const volatile unsigned int* ptr) { return AtomicGetValue(const_cast<volatile unsigned int*>(ptr)); }
+inline unsigned int AtomicSetValue(volatile unsigned int* dest, unsigned int value) { return __sync_lock_test_and_set(dest, value); }
+inline unsigned int AtomicFetchIncrement(volatile unsigned int* dest) { return __sync_fetch_and_add(dest, (unsigned int)(1)); }
+inline unsigned int AtomicFetchDecrement(volatile unsigned int* dest) { return __sync_fetch_and_add(dest, (unsigned int)(-1)); }
+inline unsigned int AtomicFetchAdd(volatile unsigned int* dest, unsigned int value) { return __sync_fetch_and_add(dest, value); } 
+inline unsigned int AtomicFetchSub(volatile unsigned int* dest, unsigned int value) { return __sync_fetch_and_sub(dest, value); }
+inline unsigned int AtomicFetchOr(volatile unsigned int* dest, unsigned int value) { return __sync_fetch_and_or(dest, value); }
+inline unsigned int AtomicFetchAnd(volatile unsigned int* dest, unsigned int value) { return __sync_fetch_and_and(dest, value); }
+inline unsigned int AtomicFetchXor(volatile unsigned int* dest, unsigned int value) { return __sync_fetch_and_xor(dest, value); }
+inline unsigned int AtomicFetchSwap(volatile unsigned int* dest, unsigned int value) { return __sync_lock_test_and_set(dest, value); }
+inline unsigned int AtomicFetchSwapConditional(volatile unsigned int* dest, unsigned int value, unsigned int condition) { return __sync_val_compare_and_swap(dest, condition, value); }
+inline bool AtomicSetValueConditional(volatile unsigned int* dest, unsigned int value, unsigned int condition) { return __sync_bool_compare_and_swap(dest, condition, value); }
+
+// short
+inline short AtomicGetValue(volatile short* ptr) { return detail::AtomicGetValue(ptr); }
+inline short AtomicGetValue(const volatile short* ptr) { return AtomicGetValue(const_cast<volatile short*>(ptr)); }
+inline short AtomicSetValue(volatile short* dest, short value) { return __sync_lock_test_and_set(dest, value); }
+inline short AtomicFetchIncrement(volatile short* dest) { return __sync_fetch_and_add(dest, short(1)); }
+inline short AtomicFetchDecrement(volatile short* dest) { return __sync_fetch_and_add(dest, short(-1)); }
+inline short AtomicFetchAdd(volatile short* dest, short value) { return __sync_fetch_and_add(dest, value); }
+inline short AtomicFetchSub(volatile short* dest, short value) { return __sync_fetch_and_sub(dest, value); }
+inline short AtomicFetchOr(volatile short* dest, short value) { return __sync_fetch_and_or(dest, value); }
+inline short AtomicFetchAnd(volatile short* dest, short value) { return __sync_fetch_and_and(dest, value); }
+inline short AtomicFetchXor(volatile short* dest, short value) { return __sync_fetch_and_xor(dest, value); }
+inline short AtomicFetchSwap(volatile short* dest, short value) { return __sync_lock_test_and_set(dest, value); }
+inline short AtomicFetchSwapConditional(volatile short* dest, short value, short condition) { return __sync_val_compare_and_swap(reinterpret_cast<volatile unsigned short*>(dest), static_cast<unsigned short>(condition), static_cast<unsigned short>(value)); }
+inline bool AtomicSetValueConditional(volatile short* dest, short value, short condition) { return __sync_bool_compare_and_swap(reinterpret_cast<volatile unsigned short*>(dest), static_cast<unsigned short>(condition), static_cast<unsigned short>(value)); }
+
+// unsigned short
+inline unsigned short AtomicGetValue(volatile unsigned short* ptr) { return detail::AtomicGetValue(ptr); }
+inline unsigned short AtomicGetValue(const volatile unsigned short* ptr) { return AtomicGetValue(const_cast<volatile unsigned short*>(ptr)); }
+inline unsigned short AtomicSetValue(volatile unsigned short* dest, unsigned short value) { return __sync_lock_test_and_set(dest, value); }
+inline unsigned short AtomicFetchIncrement(volatile unsigned short* dest) { return __sync_fetch_and_add(dest, (unsigned short)(1)); }
+inline unsigned short AtomicFetchDecrement(volatile unsigned short* dest) { return __sync_fetch_and_add(dest, (unsigned short)(-1)); }
+inline unsigned short AtomicFetchAdd(volatile unsigned short* dest, unsigned short value) { return __sync_fetch_and_add(dest, value); }
+inline unsigned short AtomicFetchSub(volatile unsigned short* dest, unsigned short value) { return __sync_fetch_and_sub(dest, value); }
+inline unsigned short AtomicFetchOr(volatile unsigned short* dest, unsigned short value) { return __sync_fetch_and_or(dest, value); }
+inline unsigned short AtomicFetchAnd(volatile unsigned short* dest, unsigned short value) { return __sync_fetch_and_and(dest, value); }
+inline unsigned short AtomicFetchXor(volatile unsigned short* dest, unsigned short value) { return __sync_fetch_and_xor(dest, value); }
+inline unsigned short AtomicFetchSwap(volatile unsigned short* dest, unsigned short value) { return __sync_lock_test_and_set(dest, value); }
+inline unsigned short AtomicFetchSwapConditional(volatile unsigned short* dest, unsigned short value, unsigned short condition) { return __sync_val_compare_and_swap(dest, condition, value); }
+inline bool AtomicSetValueConditional(volatile unsigned short* dest, unsigned short value, unsigned short condition) { return __sync_bool_compare_and_swap(dest, condition, value); }
+
+// long
+inline long AtomicGetValue(volatile long* ptr) { return detail::AtomicGetValue(ptr); }
+inline long AtomicGetValue(const volatile long* ptr) { return AtomicGetValue(const_cast<volatile long*>(ptr)); }
+inline long AtomicSetValue(volatile long* dest, long value) { return __sync_lock_test_and_set(dest, value); }
+inline long AtomicFetchIncrement(volatile long* dest) { return __sync_fetch_and_add(dest, long(1)); }
+inline long AtomicFetchDecrement(volatile long* dest) { return __sync_fetch_and_add(dest, long(-1)); }
+inline long AtomicFetchAdd(volatile long* dest, long value) { return __sync_fetch_and_add(dest, value); }
+inline long AtomicFetchSub(volatile long* dest, long value) { return __sync_fetch_and_sub(dest, value); }
+inline long AtomicFetchOr(volatile long* dest, long value) { return __sync_fetch_and_or(dest, value); }
+inline long AtomicFetchAnd(volatile long* dest, long value) { return __sync_fetch_and_and(dest, value); }
+inline long AtomicFetchXor(volatile long* dest, long value) { return __sync_fetch_and_xor(dest, value); }
+inline long AtomicFetchSwap(volatile long* dest, long value) { return __sync_lock_test_and_set(dest, value); }
+inline long AtomicFetchSwapConditional(volatile long* dest, long value, long condition) { return __sync_val_compare_and_swap(dest, condition, value); }
+inline bool AtomicSetValueConditional(volatile long* dest, long value, long condition) { return __sync_bool_compare_and_swap(dest, condition, value); }
+
+// unsigned long
+inline unsigned long AtomicGetValue(volatile unsigned long* ptr) { return detail::AtomicGetValue(ptr); }
+inline unsigned long AtomicGetValue(const volatile unsigned long* ptr) { return AtomicGetValue(const_cast<volatile unsigned long*>(ptr)); }
+inline unsigned long AtomicSetValue(volatile unsigned long* dest, unsigned long value) { return __sync_lock_test_and_set(dest, value); }
+inline unsigned long AtomicFetchIncrement(volatile unsigned long* dest) { return __sync_fetch_and_add(dest, (unsigned long)(1)); }
+inline unsigned long AtomicFetchDecrement(volatile unsigned long* dest) { return __sync_fetch_and_add(dest, (unsigned long)(-1)); }
+inline unsigned long AtomicFetchAdd(volatile unsigned long* dest, unsigned long value) { return __sync_fetch_and_add(dest, value); }
+inline unsigned long AtomicFetchSub(volatile unsigned long* dest, unsigned long value) { return __sync_fetch_and_sub(dest, value); }
+inline unsigned long AtomicFetchOr(volatile unsigned long* dest, unsigned long value) { return __sync_fetch_and_or(dest, value); }
+inline unsigned long AtomicFetchAnd(volatile unsigned long* dest, unsigned long value) { return __sync_fetch_and_and(dest, value); }
+inline unsigned long AtomicFetchXor(volatile unsigned long* dest, unsigned long value) { return __sync_fetch_and_xor(dest, value); }
+inline unsigned long AtomicFetchSwap(volatile unsigned long* dest, unsigned long value) { return __sync_lock_test_and_set(dest, value); }
+inline unsigned long AtomicFetchSwapConditional(volatile unsigned long* dest, unsigned long value, unsigned long condition) { return __sync_val_compare_and_swap(dest, condition, value); }
+inline bool AtomicSetValueConditional(volatile unsigned long* dest, unsigned long value, unsigned long condition) { return __sync_bool_compare_and_swap(dest, condition, value); }
+
+// char32_t 
+#if EA_CHAR32_NATIVE
+	inline char32_t AtomicGetValue(volatile char32_t* ptr) { return detail::AtomicGetValue(ptr); }
+	inline char32_t AtomicGetValue(const volatile char32_t* ptr) { return AtomicGetValue(const_cast<volatile char32_t*>(ptr)); }
+    inline char32_t AtomicSetValue(volatile char32_t* dest, char32_t value) { return __sync_lock_test_and_set(dest, value); }
+	inline char32_t AtomicFetchIncrement(volatile char32_t* dest) { return __sync_fetch_and_add(dest, char32_t(1)); }
+	inline char32_t AtomicFetchDecrement(volatile char32_t* dest) { return __sync_fetch_and_add(dest, char32_t(-1)); }
+	inline char32_t AtomicFetchAdd(volatile char32_t* dest, char32_t value) { return __sync_fetch_and_add(dest, value); }
+	inline char32_t AtomicFetchSub(volatile char32_t* dest, char32_t value) { return __sync_fetch_and_sub(dest, value); }
+	inline char32_t AtomicFetchOr(volatile char32_t* dest, char32_t value) { return __sync_fetch_and_or(dest, value); }
+	inline char32_t AtomicFetchAnd(volatile char32_t* dest, char32_t value) { return __sync_fetch_and_and(dest, value); }
+	inline char32_t AtomicFetchXor(volatile char32_t* dest, char32_t value) { return __sync_fetch_and_xor(dest, value); }
+	inline char32_t AtomicFetchSwap(volatile char32_t* dest, char32_t value) { return __sync_lock_test_and_set(dest, value); }
+	inline char32_t AtomicFetchSwapConditional(volatile char32_t* dest, char32_t value, char32_t condition) { return __sync_val_compare_and_swap(dest, condition, value); }
+	inline bool AtomicSetValueConditional(volatile char32_t* dest, char32_t value, char32_t condition) { return __sync_bool_compare_and_swap(dest, condition, value); }
+#endif
+
+// long long
+inline long long AtomicGetValue(volatile long long* ptr) { return detail::AtomicGetValue(ptr); }
+inline long long AtomicGetValue(const volatile long long* ptr) { return AtomicGetValue(const_cast<volatile long long*>(ptr)); }
+inline long long AtomicSetValue(volatile long long* dest, long long value) { return __sync_lock_test_and_set(dest, value); }
+inline long long AtomicFetchIncrement(volatile long long* dest) { return __sync_fetch_and_add(dest, (long long)(1)); }
+inline long long AtomicFetchDecrement(volatile long long* dest) { return __sync_fetch_and_add(dest, (long long)(-1)); }
+inline long long AtomicFetchAdd(volatile long long* dest, long long value) { return __sync_fetch_and_add(dest, value); }
+inline long long AtomicFetchSub(volatile long long* dest, long long value) { return __sync_fetch_and_sub(dest, value); }
+inline long long AtomicFetchOr(volatile long long* dest, long long value) { return __sync_fetch_and_or(dest, value); }
+inline long long AtomicFetchAnd(volatile long long* dest, long long value) { return __sync_fetch_and_and(dest, value); }
+inline long long AtomicFetchXor(volatile long long* dest, long long value) { return __sync_fetch_and_xor(dest, value); }
+inline long long AtomicFetchSwap(volatile long long* dest, long long value) { return __sync_lock_test_and_set(dest, value); }
+inline long long AtomicFetchSwapConditional(volatile long long* dest, long long value, long long condition) { return __sync_val_compare_and_swap(dest, condition, value); }
+inline bool AtomicSetValueConditional(volatile long long* dest, long long value, long long condition) { return __sync_bool_compare_and_swap(dest, condition, value); }
+
+// unsigned long long
+inline unsigned long long AtomicGetValue(volatile unsigned long long* ptr) { return detail::AtomicGetValue(ptr); }
+inline unsigned long long AtomicGetValue(const volatile unsigned long long* ptr) { return AtomicGetValue(const_cast<volatile unsigned long long*>(ptr)); }
+inline unsigned long long AtomicSetValue(volatile unsigned long long* dest, unsigned long long value) { return __sync_lock_test_and_set(dest, value); }
+inline unsigned long long AtomicFetchIncrement(volatile unsigned long long* dest) { return __sync_fetch_and_add(dest, (unsigned long long)(1)); }
+inline unsigned long long AtomicFetchDecrement(volatile unsigned long long* dest) { return __sync_fetch_and_add(dest, (unsigned long long)(-1)); }
+inline unsigned long long AtomicFetchAdd(volatile unsigned long long* dest, unsigned long long value) { return __sync_fetch_and_add(dest, value); }
+inline unsigned long long AtomicFetchSub(volatile unsigned long long* dest, unsigned long long value) { return __sync_fetch_and_sub(dest, value); }
+inline unsigned long long AtomicFetchOr(volatile unsigned long long* dest, unsigned long long value) { return __sync_fetch_and_or(dest, value); }
+inline unsigned long long AtomicFetchAnd(volatile unsigned long long* dest, unsigned long long value) { return __sync_fetch_and_and(dest, value); }
+inline unsigned long long AtomicFetchXor(volatile unsigned long long* dest, unsigned long long value) { return __sync_fetch_and_xor(dest, value); }
+inline unsigned long long AtomicFetchSwap(volatile unsigned long long* dest, unsigned long long value) { return __sync_lock_test_and_set(dest, value); }
+inline unsigned long long AtomicFetchSwapConditional(volatile unsigned long long* dest, unsigned long long value, unsigned long long condition) { return __sync_val_compare_and_swap(dest, condition, value); }
+inline bool AtomicSetValueConditional(volatile unsigned long long* dest, unsigned long long value, unsigned long long condition) { return __sync_bool_compare_and_swap(dest, condition, value); }
+
+//	
+// You can not simply define a template for the above atomics due to the explicit 128bit overloads
+// below.  The compiler will prefer those overloads during overload resolution and attempt to convert
+// temporaries as they are more specialized than a template.
+//
+// template<typename T> inline T AtomicGetValue(volatile T* source) { return __sync_fetch_and_add(source, (T)(0)); }
+// template<typename T> inline void AtomicSetValue(volatile T* dest, T value) { __sync_lock_test_and_set(dest, value); }
+// template<typename T> inline T AtomicFetchIncrement(volatile T* dest) { return __sync_fetch_and_add(dest, (T)(1)); }
+// template<typename T> inline T AtomicFetchDecrement(volatile T* dest) { return __sync_fetch_and_add(dest, (T)(-1)); }
+// template<typename T> inline T AtomicFetchAdd(volatile T* dest, T value) { return __sync_fetch_and_add(dest, value); }
+// template<typename T> inline T AtomicFetchOr(volatile T* dest, T value) { return __sync_fetch_and_or(dest, value); }
+// template<typename T> inline T AtomicFetchAnd(volatile T* dest, T value) { return __sync_fetch_and_and(dest, value); }
+// template<typename T> inline T AtomicFetchXor(volatile T* dest, T value) { return __sync_fetch_and_xor(dest, value); }
+// template<typename T> inline T AtomicFetchSwap(volatile T* dest, T value) { return __sync_lock_test_and_set(dest, value); }
+// template<typename T> inline bool AtomicSetValueConditional(volatile T* dest, T value, T condition) { return __sync_bool_compare_and_swap(dest, condition, value); }
+//
+
+namespace detail
+{
+	template<class T>
+	inline T AtomicGetValue(volatile T* ptr)
+	{
+	#if EA_PLATFORM_WORD_SIZE >= 8 && defined(EA_PROCESSOR_X86_64)
+		EATHREAD_ALIGNMENT_CHECK(ptr);
+		EACompilerMemoryBarrier();
+		T value = *ptr;
+		EACompilerMemoryBarrier();
+		return value;
+	#else
+		return AtomicFetchAdd(ptr, T(0));
+	#endif
+	}
+} // namespace detail
+
+} // namespace Thread
+} // namespace EA
+
@@ -0,0 +1,249 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////
+// InterlockedXXX intrinsics
+//
+#if defined(EA_PLATFORM_MICROSOFT)
+	EA_DISABLE_ALL_VC_WARNINGS()
+	#include <xatomic.h>
+	EA_RESTORE_ALL_VC_WARNINGS()
+
+	extern "C" long           _InterlockedIncrement(long volatile* Addend);
+	extern "C" long           _InterlockedDecrement(long volatile* Addend);
+	extern "C" long           _InterlockedCompareExchange(long volatile* Dest, long Exchange, long Comp);
+	extern "C" long           _InterlockedExchange(long volatile* Target, long Value);
+	extern "C" long           _InterlockedExchangeAdd(long volatile* Addend, long Value);
+	extern "C" int64_t        _InterlockedCompareExchange64(int64_t volatile* Dest, int64_t Exchange, int64_t Comp);
+
+	#pragma intrinsic (_InterlockedCompareExchange)
+	#define InterlockedCompareExchangeImp _InterlockedCompareExchange
+
+	#pragma intrinsic (_InterlockedExchange)
+	#define InterlockedExchangeImp        _InterlockedExchange 
+
+	#pragma intrinsic (_InterlockedExchangeAdd)
+	#define InterlockedExchangeAddImp     _InterlockedExchangeAdd
+
+	#pragma intrinsic (_InterlockedIncrement)
+	#define InterlockedIncrementImp       _InterlockedIncrement
+
+	#pragma intrinsic (_InterlockedDecrement)
+	#define InterlockedDecrementImp       _InterlockedDecrement
+
+	#pragma intrinsic (_InterlockedCompareExchange64)
+	#define InterlockedCompareExchange64Imp _InterlockedCompareExchange64
+
+	inline bool InterlockedSetIfEqual(volatile int64_t* dest, int64_t newValue, int64_t condition)
+	{
+		return (InterlockedCompareExchange64Imp(dest, newValue, condition) == condition);
+	}
+
+	inline bool InterlockedSetIfEqual(volatile uint64_t* dest, uint64_t newValue, uint64_t condition)
+	{
+		return (InterlockedCompareExchange64Imp((int64_t volatile*)dest, (int64_t)newValue, (int64_t)condition) == (int64_t)condition);
+	}
+
+	#ifndef InterlockedCompareExchangeImp // If the above intrinsics aren't used... 
+		extern "C" __declspec(dllimport) long __stdcall InterlockedIncrement(long volatile * pAddend);
+		extern "C" __declspec(dllimport) long __stdcall InterlockedDecrement(long volatile * pAddend);
+		extern "C" __declspec(dllimport) long __stdcall InterlockedExchange(long volatile * pTarget, long value);
+		extern "C" __declspec(dllimport) long __stdcall InterlockedExchangeAdd(long volatile * pAddend, long value);
+		extern "C" __declspec(dllimport) long __stdcall InterlockedCompareExchange(long volatile * pDestination, long value, long compare);
+
+		#define InterlockedCompareExchangeImp InterlockedCompareExchange
+		#define InterlockedExchangeImp        InterlockedExchange
+		#define InterlockedExchangeAddImp     InterlockedExchangeAdd
+		#define InterlockedIncrementImp       InterlockedIncrement
+		#define InterlockedDecrementImp       InterlockedDecrement
+	#endif
+
+	#if defined(EA_PROCESSOR_X86)
+		#define _InterlockedExchange64		_InterlockedExchange64_INLINE
+		#define _InterlockedExchangeAdd64	_InterlockedExchangeAdd64_INLINE
+		#define _InterlockedAnd64			_InterlockedAnd64_INLINE
+		#define _InterlockedOr64			_InterlockedOr64_INLINE
+		#define _InterlockedXor64			_InterlockedXor64_INLINE
+	#endif
+#endif // EA_PLATFORM_MICROSOFT
+
+
+
+
+namespace EA
+{
+namespace Thread
+{
+
+namespace detail
+{
+	template<class T>
+	inline T AtomicGetValue(volatile T* ptr);
+} // namespace detail
+
+// int
+inline int AtomicGetValue(volatile int* ptr) { return detail::AtomicGetValue(ptr); }
+inline int AtomicGetValue(const volatile int* ptr) { return AtomicGetValue(const_cast<volatile int*>(ptr)); }
+inline int AtomicSetValue(volatile int* ptr, int value) { return _InterlockedExchange((long*)ptr, (long)value); }  
+inline int AtomicFetchIncrement(volatile int* ptr) { return static_cast<int>(_InterlockedIncrement((long*)ptr)) - 1; }
+inline int AtomicFetchDecrement(volatile int* ptr) { return static_cast<int>(_InterlockedDecrement((long*)ptr)) + 1; }
+inline int AtomicFetchAdd(volatile int* ptr, int value) { return static_cast<int>(_InterlockedExchangeAdd((long*)ptr, (long)value)); }
+inline int AtomicFetchSub(volatile int* ptr, int value) { return static_cast<int>(_InterlockedExchangeAdd((long*)ptr, -(long)value)); }
+inline int AtomicFetchOr(volatile int* ptr, int value) { return static_cast<int>(_InterlockedOr((long*)ptr, (long)value)); }
+inline int AtomicFetchAnd(volatile int* ptr, int value) { return static_cast<int>(_InterlockedAnd((long*)ptr, (long)value)); }
+inline int AtomicFetchXor(volatile int* ptr, int value) { return static_cast<int>(_InterlockedXor((long*)ptr, (long)value)); }
+inline int AtomicFetchSwap(volatile int* ptr, int value) { return static_cast<int>(_InterlockedExchange((long*)ptr, (long)value)); }
+inline int AtomicFetchSwapConditional(volatile int* ptr, int value, int condition) { return _InterlockedCompareExchange((long*)ptr, (long)value, (long)condition); }
+inline bool AtomicSetValueConditional(volatile int* ptr, int value, int condition) { return _InterlockedCompareExchange((long*)ptr, (long)value, (long)condition) == (long)condition; }
+
+// unsigned int
+inline unsigned int AtomicGetValue(volatile unsigned int* ptr) { return detail::AtomicGetValue(ptr); }
+inline unsigned int AtomicGetValue(const volatile unsigned int* ptr) { return AtomicGetValue(const_cast<volatile unsigned int*>(ptr)); }
+inline unsigned int AtomicSetValue(volatile unsigned int* ptr, unsigned int value) { return static_cast<unsigned int>(_InterlockedExchange((long*)ptr, (long)value)); }  
+inline unsigned int AtomicFetchIncrement(volatile unsigned int* ptr) { return static_cast<unsigned int>(_InterlockedExchangeAdd((long*)ptr, (long)1)); }
+inline unsigned int AtomicFetchDecrement(volatile unsigned int* ptr) { return static_cast<unsigned int>(_InterlockedExchangeAdd((long*)ptr, (long)-1)); }
+inline unsigned int AtomicFetchAdd(volatile unsigned int* ptr, unsigned int value) { return static_cast<unsigned int>(_InterlockedExchangeAdd((long*)ptr, (long)value)); }
+inline unsigned int AtomicFetchSub(volatile unsigned int* ptr, unsigned int value) { return static_cast<unsigned int>(_InterlockedExchangeAdd((long*)ptr, -(long)value)); }
+inline unsigned int AtomicFetchOr(volatile unsigned int* ptr, unsigned int value) { return static_cast<unsigned int>(_InterlockedOr((long*)ptr, (long)value)); }
+inline unsigned int AtomicFetchAnd(volatile unsigned int* ptr, unsigned int value) { return static_cast<unsigned int>(_InterlockedAnd((long*)ptr, (long)value)); }
+inline unsigned int AtomicFetchXor(volatile unsigned int* ptr, unsigned int value) { return static_cast<unsigned int>(_InterlockedXor((long*)ptr, (long)value)); }
+inline unsigned int AtomicFetchSwap(volatile unsigned int* ptr, unsigned int value) { return static_cast<unsigned int>(_InterlockedExchange((long*)ptr, (long)value)); }
+inline unsigned int AtomicFetchSwapConditional(volatile unsigned int* ptr, unsigned int value, unsigned int condition) { return (unsigned int)_InterlockedCompareExchange((long*)ptr, (long)value, (long)condition); }
+inline bool AtomicSetValueConditional(volatile unsigned int* ptr, unsigned int value, unsigned int condition) { return _InterlockedCompareExchange((long*)ptr, (long)value, (long)condition) == (long)condition; }
+
+// short
+inline short AtomicGetValue(volatile short* ptr) { return detail::AtomicGetValue(ptr); }
+inline short AtomicGetValue(const volatile short* ptr) { return AtomicGetValue(const_cast<volatile short*>(ptr)); }
+inline short AtomicSetValue(volatile short* ptr, short value) { return static_cast<short>(_InterlockedExchange16((short*)ptr, (short)value)); }
+inline short AtomicFetchIncrement(volatile short* ptr) { return static_cast<short>(_InterlockedExchangeAdd16((short*)ptr, (short)1)); }
+inline short AtomicFetchDecrement(volatile short* ptr) { return static_cast<short>(_InterlockedExchangeAdd16((short*)ptr, (short)-1)); }
+inline short AtomicFetchAdd(volatile short* ptr, short value) { return static_cast<short>(_InterlockedExchangeAdd16((short*)ptr, (short)value)); }
+inline short AtomicFetchSub(volatile short* ptr, short value) {  return static_cast<short>(_InterlockedExchangeAdd16((short*)ptr, -value)); } 
+inline short AtomicFetchOr(volatile short* ptr, short value) { return static_cast<short>(_InterlockedOr16((short*)ptr, (short)value)); }
+inline short AtomicFetchAnd(volatile short* ptr, short value) { return static_cast<short>(_InterlockedAnd16((short*)ptr, (short)value)); }
+inline short AtomicFetchXor(volatile short* ptr, short value) { return static_cast<short>(_InterlockedXor16((short*)ptr, (short)value)); }
+inline short AtomicFetchSwap(volatile short* ptr, short value) { return static_cast<short>(_InterlockedExchange16((short*)ptr, (short)value)); }
+inline short AtomicFetchSwapConditional(volatile short* ptr, short value, short condition) { return _InterlockedCompareExchange16(ptr, value, condition); }
+inline bool AtomicSetValueConditional(volatile short* ptr, short value, short condition) { return _InterlockedCompareExchange16(ptr, value, condition) == condition; }
+
+// unsigned short
+inline unsigned short AtomicGetValue(volatile unsigned short* ptr) { return detail::AtomicGetValue(ptr); }
+inline unsigned short AtomicGetValue(const volatile unsigned short* ptr) { return AtomicGetValue(const_cast<volatile unsigned short*>(ptr)); }
+inline unsigned short AtomicSetValue(volatile unsigned short* ptr, unsigned short value) { return static_cast<unsigned short>(_InterlockedExchange16((short*)ptr, (short)value)); }
+inline unsigned short AtomicFetchIncrement(volatile unsigned short* ptr) { return static_cast<unsigned short>(_InterlockedExchangeAdd16((short*)ptr, (short)1)); }
+inline unsigned short AtomicFetchDecrement(volatile unsigned short* ptr) { return static_cast<unsigned short>(_InterlockedExchangeAdd16((short*)ptr, (short)-1)); }
+inline unsigned short AtomicFetchAdd(volatile unsigned short* ptr, unsigned short value) { return static_cast<unsigned short>(_InterlockedExchangeAdd16((short*)ptr, (short)value)); }
+inline unsigned short AtomicFetchSub(volatile unsigned short* ptr, unsigned short value) { return static_cast<unsigned short>(_InterlockedExchangeAdd16((short*)ptr, -(short)value)); }
+inline unsigned short AtomicFetchOr(volatile unsigned short* ptr, unsigned short value) { return static_cast<unsigned short>(_InterlockedOr16((short*)ptr, (short)value)); }
+inline unsigned short AtomicFetchAnd(volatile unsigned short* ptr, unsigned short value) { return static_cast<unsigned short>(_InterlockedAnd16((short*)ptr, (short)value)); }
+inline unsigned short AtomicFetchXor(volatile unsigned short* ptr, unsigned short value) { return static_cast<unsigned short>(_InterlockedXor16((short*)ptr, (short)value)); }
+inline unsigned short AtomicFetchSwap(volatile unsigned short* ptr, unsigned short value) { return static_cast<unsigned short>(_InterlockedExchange16((short*)ptr, (short)value)); }
+inline unsigned short AtomicFetchSwapConditional(volatile unsigned short* ptr, unsigned short value, unsigned short condition) { return (unsigned short)_InterlockedCompareExchange16((short*)ptr, (short)value, (short)condition); }
+inline bool AtomicSetValueConditional(volatile unsigned short* ptr, unsigned short value, unsigned short condition) { return _InterlockedCompareExchange16((short*)ptr, (short)value, (short)condition) == (short)condition; }
+
+// long
+inline long AtomicGetValue(volatile long* ptr) { return detail::AtomicGetValue(ptr); }
+inline long AtomicGetValue(const volatile long* ptr) { return AtomicGetValue(const_cast<volatile long*>(ptr)); }
+inline long AtomicSetValue(volatile long* ptr, long value) { return _InterlockedExchange(ptr, value); }
+inline long AtomicFetchIncrement(volatile long* ptr) { return _InterlockedIncrement(ptr) - 1; }
+inline long AtomicFetchDecrement(volatile long* ptr) { return _InterlockedDecrement(ptr) + 1; }
+inline long AtomicFetchAdd(volatile long* ptr, long value)  { return _InterlockedExchangeAdd(ptr, value); }
+inline long AtomicFetchSub(volatile long* ptr, long value) { return _InterlockedExchangeAdd(ptr, -value); }
+inline long AtomicFetchOr(volatile long* ptr, long value)   { return _InterlockedOr(ptr, value); }
+inline long AtomicFetchAnd(volatile long* ptr, long value)  { return _InterlockedAnd(ptr, value); }
+inline long AtomicFetchXor(volatile long* ptr, long value)  { return _InterlockedXor(ptr, value); }
+inline long AtomicFetchSwap(volatile long* ptr, long value) { return _InterlockedExchange(ptr, value); }
+inline long AtomicFetchSwapConditional(volatile long* ptr, long value, long condition) { return _InterlockedCompareExchange(ptr, value, condition); }
+inline bool AtomicSetValueConditional(volatile long* ptr, long value, long condition) { return _InterlockedCompareExchange(ptr, value, condition) == condition; }
+
+// unsigned long
+inline unsigned long AtomicGetValue(volatile unsigned long* ptr) { return detail::AtomicGetValue(ptr); }
+inline unsigned long AtomicGetValue(const volatile unsigned long* ptr) { return AtomicGetValue(const_cast<volatile unsigned long*>(ptr)); }
+inline unsigned long AtomicSetValue(volatile unsigned long* ptr, unsigned long value) { return static_cast<unsigned long>(_InterlockedExchange((long*)ptr, (long)value)); }
+inline unsigned long AtomicFetchIncrement(volatile unsigned long* ptr) { return static_cast<unsigned long>(_InterlockedIncrement((long*)ptr)) - 1; }
+inline unsigned long AtomicFetchDecrement(volatile unsigned long* ptr) { return static_cast<unsigned long>(_InterlockedDecrement((long*)ptr)) + 1; }
+inline unsigned long AtomicFetchAdd(volatile unsigned long* ptr, unsigned long value) { return static_cast<unsigned long>(_InterlockedExchangeAdd((long*)ptr, (long)value)); }
+inline unsigned long AtomicFetchSub(volatile unsigned long* ptr, unsigned long value) { return static_cast<unsigned long>(_InterlockedExchangeAdd((long*)ptr, -(long)value)); }
+inline unsigned long AtomicFetchOr(volatile unsigned long* ptr, unsigned long value) { return static_cast<unsigned long>(_InterlockedOr((long*)ptr, (long)value)); }
+inline unsigned long AtomicFetchAnd(volatile unsigned long* ptr, unsigned long value) { return static_cast<unsigned long>(_InterlockedAnd((long*)ptr, (long)value)); }
+inline unsigned long AtomicFetchXor(volatile unsigned long* ptr, unsigned long value) { return static_cast<unsigned long>(_InterlockedXor((long*)ptr, (long)value)); }
+inline unsigned long AtomicFetchSwap(volatile unsigned long* ptr, unsigned long value) { return static_cast<unsigned long>(_InterlockedExchange((long*)ptr, (long)value)); }
+inline unsigned long AtomicFetchSwapConditional(volatile unsigned long* ptr, unsigned long value, unsigned long condition) { return static_cast<unsigned long>(_InterlockedCompareExchange((long*)ptr, (long)value, (long)condition)); }
+inline bool AtomicSetValueConditional(volatile unsigned long* ptr, unsigned long value, unsigned long condition) { return static_cast<unsigned long>(_InterlockedCompareExchange((long*)ptr, (long)value, (long)condition)) == condition; }
+
+// char32_t
+#if EA_CHAR32_NATIVE
+	inline char32_t AtomicGetValue(volatile char32_t* ptr) { return detail::AtomicGetValue(ptr); }
+	inline char32_t AtomicGetValue(const volatile char32_t* ptr) { return AtomicGetValue(const_cast<volatile char32_t*>(ptr)); }
+    inline char32_t AtomicSetValue(volatile char32_t* ptr, char32_t value) { return static_cast<char32_t>(_InterlockedExchange((long*)ptr, (long)value)); }
+	inline char32_t AtomicFetchIncrement(volatile char32_t* ptr) { return static_cast<char32_t>(_InterlockedExchangeAdd((long*)ptr, (long)1)); }
+	inline char32_t AtomicFetchDecrement(volatile char32_t* ptr) { return static_cast<char32_t>(_InterlockedExchangeAdd((long*)ptr, (long)-1)); }
+	inline char32_t AtomicFetchAdd(volatile char32_t* ptr, char32_t value) { return static_cast<char32_t>(_InterlockedExchangeAdd((long*)ptr, (long)value)); }
+	inline char32_t AtomicFetchSub(volatile char32_t* ptr, char32_t value) { return static_cast<char32_t>(_InterlockedExchangeAdd((long*)ptr, -(long)value)); }
+	inline char32_t AtomicFetchOr(volatile char32_t* ptr, char32_t value) { return static_cast<char32_t>(_InterlockedOr((long*)ptr, (long)value)); }
+	inline char32_t AtomicFetchAnd(volatile char32_t* ptr, char32_t value) { return static_cast<char32_t>(_InterlockedAnd((long*)ptr, (long)value)); }
+	inline char32_t AtomicFetchXor(volatile char32_t* ptr, char32_t value) { return static_cast<char32_t>(_InterlockedXor((long*)ptr, (long)value)); }
+	inline char32_t AtomicFetchSwap(volatile char32_t* ptr, char32_t value) { return static_cast<char32_t>(_InterlockedExchange((long*)ptr, (long)value)); }
+	inline char32_t AtomicFetchSwapConditional(volatile char32_t* ptr, char32_t value, unsigned int condition) { return static_cast<char32_t>(_InterlockedCompareExchange((long*)ptr, (long)value, (long)condition)); }
+	inline bool AtomicSetValueConditional(volatile char32_t* ptr, char32_t value, unsigned int condition) { return _InterlockedCompareExchange((long*)ptr, (long)value, (long)condition) == (long)condition; }
+#endif
+
+
+// long long
+inline long long AtomicGetValue(volatile long long* ptr) { return detail::AtomicGetValue(ptr); }
+inline long long AtomicGetValue(const volatile long long* ptr) { return AtomicGetValue(const_cast<volatile long long*>(ptr)); }
+inline long long AtomicSetValue(volatile long long* ptr, long long value) { return static_cast<long long>(_InterlockedExchange64(ptr, value)); }  
+inline long long AtomicFetchIncrement(volatile long long* ptr) { return static_cast<long long>(_InterlockedExchangeAdd64(ptr, (long long)1)); }
+inline long long AtomicFetchDecrement(volatile long long* ptr) { return static_cast<long long>(_InterlockedExchangeAdd64(ptr, (long long)-1)); }
+inline long long AtomicFetchAdd(volatile long long* ptr, long long value) { return static_cast<long long>(_InterlockedExchangeAdd64(ptr, value)); }
+inline long long AtomicFetchSub(volatile long long* ptr, long long value) { return static_cast<long long>(_InterlockedExchangeAdd64(ptr, -(long long)value)); }
+inline long long AtomicFetchOr(volatile long long* ptr, long long value) { return static_cast<long long>(_InterlockedOr64(ptr, value)); }
+inline long long AtomicFetchAnd(volatile long long* ptr, long long value) { return static_cast<long long>(_InterlockedAnd64(ptr, value)); }
+inline long long AtomicFetchXor(volatile long long* ptr, long long value) { return static_cast<long long>(_InterlockedXor64(ptr, value)); }
+inline long long AtomicFetchSwap(volatile long long* ptr, long long value) { return static_cast<long long>(_InterlockedExchange64(ptr, value)); }
+inline long long AtomicFetchSwapConditional(volatile long long* ptr, long long value, long long condition) { return _InterlockedCompareExchange64(ptr, value, condition); }
+inline bool AtomicSetValueConditional(volatile long long* ptr, long long value, long long condition) { return _InterlockedCompareExchange64(ptr, value, condition) == condition; }
+
+// unsigned long long 
+inline unsigned long long AtomicGetValue(volatile unsigned long long* ptr) { return detail::AtomicGetValue(ptr); }
+inline unsigned long long AtomicGetValue(const volatile unsigned long long* ptr) { return AtomicGetValue(const_cast<volatile unsigned long long*>(ptr)); }
+inline unsigned long long AtomicSetValue(volatile unsigned long long* ptr, unsigned long long value) { return static_cast<unsigned long long>(_InterlockedExchange64(reinterpret_cast<volatile long long*>(ptr), (long long)value)); }  
+inline unsigned long long AtomicFetchIncrement(volatile unsigned long long* ptr) { return static_cast<unsigned long long>(_InterlockedExchangeAdd64(reinterpret_cast<volatile long long*>(ptr), (long long)1)); }
+inline unsigned long long AtomicFetchDecrement(volatile unsigned long long* ptr) { return static_cast<unsigned long long>(_InterlockedExchangeAdd64(reinterpret_cast<volatile long long*>(ptr), (long long)-1)); }
+inline unsigned long long AtomicFetchAdd(volatile unsigned long long* ptr, unsigned long long value) { return static_cast<unsigned long long>(_InterlockedExchangeAdd64(reinterpret_cast<volatile long long*>(ptr), (long long)value)); }
+inline unsigned long long AtomicFetchSub(volatile unsigned long long* ptr, unsigned long long value) { return static_cast<unsigned long long>(_InterlockedExchangeAdd64(reinterpret_cast<volatile long long*>(ptr), -(long long)value)); }
+inline unsigned long long AtomicFetchOr(volatile unsigned long long* ptr, unsigned long long value) { return static_cast<unsigned long long>(_InterlockedOr64(reinterpret_cast<volatile long long*>(ptr), (long long)value)); }
+inline unsigned long long AtomicFetchAnd(volatile unsigned long long* ptr, unsigned long long value) { return static_cast<unsigned long long>(_InterlockedAnd64(reinterpret_cast<volatile long long*>(ptr),(long long) value)); }
+inline unsigned long long AtomicFetchXor(volatile unsigned long long* ptr, unsigned long long value) { return static_cast<unsigned long long>(_InterlockedXor64(reinterpret_cast<volatile long long*>(ptr),(long long) value)); }
+inline unsigned long long AtomicFetchSwap(volatile unsigned long long* ptr, unsigned long long value) { return static_cast<unsigned long long>(_InterlockedExchange64(reinterpret_cast<volatile long long*>(ptr),(long long) value)); }
+inline unsigned long long AtomicFetchSwapConditional(volatile unsigned long long* ptr, unsigned long long value, unsigned long long condition) { return static_cast<unsigned long long>(_InterlockedCompareExchange64(reinterpret_cast<volatile long long*>(ptr), (long long)value, (long long)condition)); }
+inline bool AtomicSetValueConditional(volatile unsigned long long* ptr, unsigned long long value, unsigned long long condition) { return static_cast<unsigned long long>(_InterlockedCompareExchange64(reinterpret_cast<volatile long long*>(ptr), (long long)value, (long long)condition)) == condition; }
+
+
+namespace detail
+{
+	template<class T>
+	inline T AtomicGetValue(volatile T* ptr)
+	{
+	#if EA_PLATFORM_WORD_SIZE >= 8 && defined(EA_PROCESSOR_X86_64)
+		EATHREAD_ALIGNMENT_CHECK(ptr);
+		EACompilerMemoryBarrier();
+		T value = *ptr;
+		EACompilerMemoryBarrier();
+		return value;
+	#else
+		return AtomicFetchAdd(ptr, T(0));
+	#endif
+	}
+} // namespace detail
+
+
+} // namespace Thread
+} // namespace EA
+
@@ -0,0 +1,32 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// NOTE(rparolin):  Provides a unified method of access to EAThread global
+// variables that (when specified by the user) can become DLL safe by adding a
+// dependency on EAStdC EAGlobal implementation.
+/////////////////////////////////////////////////////////////////////////////
+
+#ifndef EATHREAD_INTERNAL_GLOBAL_H
+#define EATHREAD_INTERNAL_GLOBAL_H
+
+#if EATHREAD_GLOBAL_VARIABLE_DLL_SAFETY
+	#include <EAStdC/EAGlobal.h>
+
+	#define EATHREAD_GLOBALVARS (*EA::StdC::AutoStaticOSGlobalPtr<EA::Thread::EAThreadGlobalVars, 0xdabbad00>().get())
+	#define EATHREAD_GLOBALVARS_CREATE_INSTANCE  EA::StdC::AutoStaticOSGlobalPtr<EA::Thread::EAThreadGlobalVars, 0xdabbad00> gGlobalVarsInstance;
+	#define EATHREAD_GLOBALVARS_EXTERN_INSTANCE  
+
+#else 
+	#define EATHREAD_GLOBALVARS gEAThreadGlobalVars
+	#define EATHREAD_GLOBALVARS_CREATE_INSTANCE EA::Thread::EAThreadGlobalVars gEAThreadGlobalVars
+	#define EATHREAD_GLOBALVARS_EXTERN_INSTANCE extern EA::Thread::EAThreadGlobalVars gEAThreadGlobalVars 
+
+#endif
+
+#endif
@@ -0,0 +1,50 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+#ifndef EATHREAD_INTERNAL_TIMINGS_H
+#define EATHREAD_INTERNAL_TIMINGS_H
+
+namespace EA
+{
+	namespace Thread
+	{
+		
+#if defined(EA_PLATFORM_SONY)
+		// RelativeTimeoutFromAbsoluteTimeout returns a relative timeout in microseconds.
+		inline uint32_t RelativeTimeoutFromAbsoluteTimeout(EA::Thread::ThreadTime timeoutAbsolute)
+		{
+			using namespace EA::Thread;
+
+			EAT_ASSERT((timeoutAbsolute == kTimeoutImmediate) || (timeoutAbsolute > EATHREAD_MIN_ABSOLUTE_TIME)); // Assert that the user didn't make the mistake of treating time as relative instead of absolute.
+
+			uint32_t timeoutRelative = 0;
+
+			if (timeoutAbsolute == kTimeoutNone)
+			{
+				timeoutRelative = 0xffffffff;
+			}
+			else if (timeoutAbsolute == kTimeoutImmediate)
+			{
+				timeoutRelative = 0;
+			}
+			else
+			{
+				ThreadTime timeCurrent(GetThreadTime());
+				timeoutRelative = (timeoutAbsolute > timeCurrent) ?  EA_THREADTIME_AS_UINT_MICROSECONDS(timeoutAbsolute - timeCurrent) : 0;
+			}
+
+			EAT_ASSERT((timeoutRelative == 0xffffffff) || (timeoutRelative < 100000000)); // Assert that the timeout is a sane value and didn't wrap around.
+	
+			return timeoutRelative;
+		}
+#endif
+
+	}
+}
+
+#endif
@@ -0,0 +1,431 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This is a multithread-safe version of shared_array_mt.
+// For basic documentation, see shared_array_mt.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_SHARED_ARRAY_MT_H
+#define EATHREAD_SHARED_ARRAY_MT_H
+
+#ifndef INCLUDED_eabase_H
+   #include <EABase/eabase.h>
+#endif
+#ifndef EATHREAD_EATHREAD_FUTEX_H
+   #include <eathread/eathread_futex.h>
+#endif
+#include <stddef.h> // More properly: #include <cstddef> // Definition of std::ptrdiff_t
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+
+/// namespace EA
+/// The standard Electronic Arts namespace
+namespace EA
+{
+   namespace Thread
+   {
+	  /// class shared_array_mt
+	  /// A shared_array_mt is the same as shared_ptr but for arrays. 
+	  template<class T>
+	  class shared_array_mt
+	  {
+	  private:
+		 /// this_type
+		 /// This is an alias for shared_array_mt<T>, this class.
+		 typedef shared_array_mt<T> this_type;
+
+		 /// reference_count_type
+		 /// An internal reference count type. Must be convertable to int
+		 /// so that the public use_count function can work.
+		 typedef EA::Thread::AtomicInt32 reference_count_type;
+
+		 T*                    mpArray;      /// The owned pointer. Points to an array of T.
+		 reference_count_type* mpRefCount;   /// Reference count for owned pointer.
+		 mutable Futex         mMutex;       /// Mutex guarding access to this class.
+
+	  public:
+		 typedef T element_type;
+		 typedef T value_type;
+
+		 /// shared_array_mt
+		 /// Takes ownership of the pointer and sets the reference count
+		 /// to the pointer to 1. It is OK if the input pointer is null.
+		 /// The shared reference count is allocated on the heap via operator new.
+		 /// If an exception occurs during the allocation of the shared 
+		 /// reference count, the owned pointer is deleted and the exception
+		 /// is rethrown. A null pointer is given a reference count of 1.
+		 explicit shared_array_mt(T* pArray = 0)
+			: mpArray(pArray), mMutex()
+		 {
+			// We don't lock our mutex in this function, as this is the constructor
+			// and we assume that construction is already done in a thread-safe way
+			// by the owner of this object.
+			#if defined(EA_COMPILER_NO_EXCEPTIONS) || defined(EA_COMPILER_NO_UNWIND)
+			   mpRefCount = new reference_count_type(1);
+			#else
+				EA_DISABLE_VC_WARNING(4571)
+				try
+				{
+					mpRefCount = new reference_count_type(1);
+				}
+				catch(...)
+				{
+					delete[] mpArray;
+					//mpRefCount = 0; shouldn't be necessary.
+					throw;
+				}
+				EA_RESTORE_VC_WARNING()
+			#endif
+		 }
+
+		 /// shared_array_mt
+		 /// Shares ownership of a pointer with another instance of shared_array_mt.
+		 /// This function increments the shared reference count on the pointer.
+		 shared_array_mt(shared_array_mt const& sharedArray)
+			: mMutex()
+		 {
+			sharedArray.lock();
+			mpArray    = sharedArray.mpArray;
+			mpRefCount = sharedArray.mpRefCount;
+			mpRefCount->Increment(); // Atomic operation
+			sharedArray.unlock();
+		 }
+
+		 /// ~shared_array_mt
+		 /// Decrements the reference count for the owned pointer. If the 
+		 /// reference count goes to zero, the owned pointer is deleted and
+		 /// the shared reference count is deleted.
+		 ~shared_array_mt()
+		 {
+			lock();
+			const reference_count_type newRefCount(mpRefCount->Decrement()); // Atomic operation
+			// EAT_ASSERT(newRefCount >= 0);
+			if(newRefCount == 0)
+			{
+			   delete[] mpArray;
+			   delete mpRefCount;
+			}
+			unlock();
+		 }
+
+		 /// operator=
+		 /// Copies another shared_array_mt to this object. Note that this object
+		 /// may already own a shared pointer with another different pointer
+		 /// (but still of the same type) before this call. In that case,
+		 /// this function releases the old pointer, decrementing its reference
+		 /// count and deleting it if zero, takes shared ownership of the new 
+		 /// pointer and increments its reference count.
+		 shared_array_mt& operator=(shared_array_mt const& sharedArray)
+		 {
+			// We don't lock mutexes here because we let the swap function
+			// below do the locking and assignment. The if statement below
+			// isn't protected within a lock operation because it wouldn't
+			// help by being so because if mpValue is changing during the 
+			// the execution of this function then the user has an external 
+			// race condition that needs to be managed at that level.
+			if(mpArray != sharedArray.mpArray)
+			{
+			   // The easiest thing to do is to create a temporary and 
+			   // copy ourselves ourselves into it. This is a standard 
+			   // method for switching pointer ownership in systems like this.
+			   shared_array_mt(sharedArray).swap(*this);
+			}
+			return *this;
+		 }
+
+		 // operator=
+		 // We do not defined this function in order to maintain compatibility 
+		 // with the currently proposed (2003) C++ standard addition.  Use reset instead.
+		 // shared_array_mt& operator=(T* pValue)
+		 // {
+		 //     reset(pValue);
+		 //     return *this;
+		 // }
+
+		 /// lock
+		 /// @brief Locks our mutex for thread-safe access.
+		 /// It is a const function because const-ness refers to the underlying pointer being
+		 /// held and not this class.
+		 void lock() const
+		 {
+			mMutex.Lock(); 
+		 }
+
+		 /// unlock
+		 /// @brief Unlocks our mutex which was previous locked.
+		 /// It is a const function because const-ness refers to the underlying pointer being
+		 /// held and not this class.
+		 void unlock() const
+		 {
+			mMutex.Unlock(); 
+		 }
+
+		 /// reset
+		 /// Releases the owned pointer and takes ownership of the 
+		 /// passed in pointer. If the passed in pointer is the same
+		 /// as the owned pointer, nothing is done. The passed in pointer
+		 /// can be null, in which case the use count is set to 1.
+		 void reset(T* pArray = 0)
+		 {
+			// We don't lock any mutexes here because we let the swap function do that.
+			// We don't lock for the 'if' statement below because that wouldn't really buy anything.
+			if(pArray != mpArray)
+			{
+			   // The easiest thing to do is to create a temporary and 
+			   // copy ourselves ourselves into it. This is a standard 
+			   // method for switching pointer ownership in systems like this.
+			   shared_array_mt(pArray).swap(*this);
+			}
+		 }
+
+		 /// swap
+		 /// Exchanges the owned pointer beween two shared_array_mt objects.
+		 void swap(shared_array_mt<T>& sharedArray)
+		 {
+			lock();
+			sharedArray.lock();
+
+			// std::swap(mpArray, sharedArray.mpArray); // Not used so that we can reduce a dependency.
+			T* const pArray     = sharedArray.mpArray;
+			sharedArray.mpArray = mpArray;
+			mpArray             = pArray;
+
+			// std::swap(mpRefCount, sharedArray.mpRefCount); // Not used so that we can reduce a dependency.
+			reference_count_type* const pRefCount = sharedArray.mpRefCount;
+			sharedArray.mpRefCount = mpRefCount;
+			mpRefCount             = pRefCount;
+
+			sharedArray.unlock();
+			unlock();
+		 }
+
+		 /// operator[]
+		 /// Returns a reference to the specified item in the owned pointer
+		 /// array. 
+		 /// Example usage:
+		 ///   shared_array_mt<int> ptr = new int[6];
+		 ///   int x = ptr[2];
+		 T& operator[](ptrdiff_t i) const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			// EAT_ASSERT(mpArray && (i >= 0));
+			return mpArray[i];
+		 }
+
+		 /// operator*
+		 /// Returns the owner pointer dereferenced.
+		 /// Example usage:
+		 ///   shared_array_mt<int> ptr = new int(3);
+		 ///   int x = *ptr;
+		 T& operator*() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			// EAT_ASSERT(mpArray);
+			return *mpArray;
+		 }
+
+		 /// operator->
+		 /// Allows access to the owned pointer via operator->()
+		 /// Example usage:
+		 ///   struct X{ void DoSomething(); }; 
+		 ///   shared_array_mt<int> ptr = new X;
+		 ///   ptr->DoSomething();
+		 T* operator->() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			// EAT_ASSERT(mpArray);
+			return mpArray;
+		 }
+
+		 /// get
+		 /// Returns the owned pointer. Note that this class does 
+		 /// not provide an operator T() function. This is because such
+		 /// a thing (automatic conversion) is deemed unsafe.
+		 /// Example usage:
+		 ///   struct X{ void DoSomething(); }; 
+		 ///   shared_array_mt<int> ptr = new X;
+		 ///   X* pX = ptr.get();
+		 ///   pX->DoSomething();
+		 T* get() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			return mpArray;
+		 }
+
+		 /// use_count
+		 /// Returns the reference count on the owned pointer.
+		 /// The return value is one if the owned pointer is null.
+		 int use_count() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			// EAT_ASSERT(mpRefCount);
+			return (int)*mpRefCount;
+		 }
+
+		 /// unique
+		 /// Returns true if the reference count on the owned pointer is one.
+		 /// The return value is true if the owned pointer is null.
+		 bool unique() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			// EAT_ASSERT(mpRefCount);
+			return (*mpRefCount == 1);
+		 }
+
+		 /// add_ref
+		 /// Manually increments the reference count on the owned pointer.
+		 /// This is currently disabled because it isn't in part of the 
+		 /// proposed C++ language addition.
+		 /// int add_ref()
+		 /// {
+		 ///    lock();
+		 ///    // EAT_ASSERT(mpRefCount);
+		 ///    ++*mpRefCount; // Atomic operation
+		 ///    unlock();
+		 /// }
+
+		 /// release_ref
+		 /// Manually increments the reference count on the owned pointer.
+		 /// If the reference count becomes zero, then the owned pointer 
+		 /// is deleted and reset(0) is called. For any given instance of
+		 /// shared_ptr, release_ref can only be called as many times as -- 
+		 /// but no more than -- the number of times add_ref was called
+		 /// for that same shared_ptr. Otherwise, separate instances of 
+		 /// shared_ptr would be left with dangling owned pointer instances.
+		 /// This is currently disabled because it isn't in part of the 
+		 /// proposed C++ language addition.
+		 /// int release_ref()
+		 /// {
+		 ///    lock();
+		 ///    // EAT_ASSERT(mpRefCount);
+		 ///    if(*mpRefCount > 1){
+		 ///       const int nReturnValue = --*mpRefCount; // Atomic operation
+		 ///       unlock();
+		 ///       return nReturnValue;
+		 ///    }
+		 ///    reset(0);
+		 ///    unlock();
+		 ///    return 0;
+		 /// }
+
+		 /// Implicit operator bool
+		 /// Allows for using a scoped_ptr as a boolean. 
+		 /// Example usage:
+		 ///   shared_array_mt<int> ptr = new int(3);
+		 ///   if(ptr)
+		 ///      ++*ptr;
+		 ///    
+		 /// Note that below we do not use operator bool(). The reason for this
+		 /// is that booleans automatically convert up to short, int, float, etc.
+		 /// The result is that this: if(scopedPtr == 1) would yield true (bad).
+		 typedef T* (this_type::*bool_)() const;
+		 operator bool_() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			if(mpArray)
+			   return &this_type::get;
+			return 0;
+		 }
+
+		 /// operator!
+		 /// This returns the opposite of operator bool; it returns true if 
+		 /// the owned pointer is null. Some compilers require this and some don't.
+		 ///   shared_array_mt<int> ptr = new int(3);
+		 ///   if(!ptr)
+		 ///      EAT_ASSERT(false);
+		 bool operator!() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			return (mpArray == 0);
+		 }
+
+	  }; // class shared_array_mt
+
+
+	  /// get_pointer
+	  /// returns shared_array_mt::get() via the input shared_array_mt. 
+	  template<class T>
+	  inline T* get_pointer(const shared_array_mt<T>& sharedArray)
+	  {
+		 return sharedArray.get();
+	  }
+
+	  /// swap
+	  /// Exchanges the owned pointer beween two shared_array_mt objects.
+	  /// This non-member version is useful for compatibility of shared_array_mt
+	  /// objects with the C++ Standard Library and other libraries.
+	  template<class T>
+	  inline void swap(shared_array_mt<T>& sharedArray1, shared_array_mt<T>& sharedArray2)
+	  {
+		 sharedArray1.swap(sharedArray2);
+	  }
+
+
+	  /// operator!=
+	  /// Compares two shared_array_mt objects for equality. Equality is defined as 
+	  /// being true when the pointer shared between two shared_array_mt objects is equal.
+	  /// It is debatable what the appropriate definition of equality is between two
+	  /// shared_array_mt objects, but we follow the current 2nd generation C++ standard proposal.
+	  template<class T, class U>
+	  inline bool operator==(const shared_array_mt<T>& sharedArray1, const shared_array_mt<U>& sharedArray2)
+	  {
+		 // EAT_ASSERT((sharedArray1.get() != sharedArray2.get()) || (sharedArray1.use_count() == sharedArray2.use_count()));
+		 return (sharedArray1.get() == sharedArray2.get());
+	  }
+
+
+	  /// operator!=
+	  /// Compares two shared_array_mt objects for inequality. Equality is defined as 
+	  /// being true when the pointer shared between two shared_array_mt objects is equal.
+	  /// It is debatable what the appropriate definition of equality is between two
+	  /// shared_array_mt objects, but we follow the current 2nd generation C++ standard proposal.
+	  template<class T, class U>
+	  inline bool operator!=(const shared_array_mt<T>& sharedArray1, const shared_array_mt<U>& sharedArray2)
+	  {
+		 // EAT_ASSERT((sharedArray1.get() != sharedArray2.get()) || (sharedArray1.use_count() == sharedArray2.use_count()));
+		 return (sharedArray1.get() != sharedArray2.get());
+	  }
+
+
+	  /// operator<
+	  /// Returns which shared_array_mt is 'less' than the other. Useful when storing
+	  /// sorted containers of scoped_ptr objects.
+	  template<class T, class U>
+	  inline bool operator<(const shared_array_mt<T>& sharedArray1, const shared_array_mt<U>& sharedArray2)
+	  {
+		 return (sharedArray1.get() < sharedArray2.get()); // Alternatively use: std::less<T*>(a.get(), b.get());
+	  }
+
+   } // namespace Thread
+
+} // namespace EA
+
+
+
+
+#endif // EATHREAD_SHARED_ARRAY_MT_H
+
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,472 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This is a multithread-safe version of shared_ptr_mt.
+// For basic documentation, see shared_ptr_mt.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_SHARED_PTR_MT_H
+#define EATHREAD_SHARED_PTR_MT_H
+
+#ifndef INCLUDED_eabase_H
+   #include <EABase/eabase.h>
+#endif
+#ifndef EATHREAD_EATHREAD_FUTEX_H
+   #include <eathread/eathread_futex.h>
+#endif
+#ifndef EATHREAD_EATHREAD_ATOMIC_H
+   #include <eathread/eathread_atomic.h>
+#endif
+// #include <memory> Temporarily disabled while we wait for compilers to modernize. // Declaration of std::auto_ptr.
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+
+/// namespace EA
+/// The standard Electronic Arts namespace
+namespace EA
+{
+   namespace Thread
+   {
+	  /// class shared_ptr_mt
+	  /// @brief Implements a thread-safe version of shared_ptr.
+	  template<class T>
+	  class shared_ptr_mt
+	  {
+	  private:
+		 /// this_type
+		 /// This is an alias for shared_ptr_mt<T>, this class.
+		 typedef shared_ptr_mt<T> this_type;
+
+		 /// reference_count_type
+		 /// An internal reference count type. Must be convertable to int
+		 /// so that the public use_count function can work.
+		 typedef EA::Thread::AtomicInt32 reference_count_type;
+
+		 T*                    mpValue;      /// The owned pointer.
+		 reference_count_type* mpRefCount;   /// Reference count for owned pointer.
+		 mutable Futex         mMutex;       /// Mutex guarding access to this class.
+
+	  public:
+		 typedef T element_type;
+		 typedef T value_type;
+
+		 /// shared_ptr_mt
+		 /// Takes ownership of the pointer and sets the reference count
+		 /// to the pointer to 1. It is OK if the input pointer is null.
+		 /// The shared reference count is allocated on the heap via operator new.
+		 /// If an exception occurs during the allocation of the shared 
+		 /// reference count, the owned pointer is deleted and the exception
+		 /// is rethrown. A null pointer is given a reference count of 1.
+		 explicit shared_ptr_mt(T* pValue = 0)
+			: mpValue(pValue), mMutex()
+		 {
+			// We don't lock our mutex in this function, as this is the constructor
+			// and we assume that construction is already done in a thread-safe way
+			// by the owner of this object.
+			#if defined(EA_COMPILER_NO_EXCEPTIONS) || defined(EA_COMPILER_NO_UNWIND)
+			   mpRefCount = new reference_count_type(1);
+			#else
+				EA_DISABLE_VC_WARNING(4571)
+				try
+				{
+					mpRefCount = new reference_count_type(1);
+				}
+				catch(...)
+				{
+					delete pValue;
+					//mpRefCount = 0; shouldn't be necessary.
+					throw;
+				}
+				EA_RESTORE_VC_WARNING()
+			#endif
+		 }
+
+		 /// shared_ptr_mt
+		 /// Shares ownership of a pointer with another instance of shared_ptr_mt.
+		 /// This function increments the shared reference count on the pointer.
+		 shared_ptr_mt(shared_ptr_mt const& sharedPtr)
+			: mMutex()
+		 {
+			// We don't lock our mutex in this function, as this is the constructor
+			// and we assume that construction is already done in a thread-safe way
+			// by the owner of this object.
+			sharedPtr.lock();
+			mpValue    = sharedPtr.mpValue;
+			mpRefCount = sharedPtr.mpRefCount;
+			mpRefCount->Increment(); // Atomic operation
+			sharedPtr.unlock();
+		 }
+
+		 // Temporarily disabled while we wait for compilers to modernize.
+		 // 
+		 // shared_ptr_mt
+		 // Constructs a shared_ptr_mt from a std::auto_ptr. This class  
+		 // transfers ownership of the pointer from the auto_ptr by 
+		 // calling its release function.
+		 // If an exception occurs during the allocation of the shared 
+		 // reference count, the owned pointer is deleted and the exception
+		 // is rethrown.
+		 //explicit shared_ptr_mt(std::auto_ptr<T>& autoPtr)
+		 //   : mMutex()
+		 //{
+		 //   // We don't lock our mutex in this function, as this is the constructor
+		 //   // and we assume that construction is already done in a thread-safe way
+		 //   // by the owner of this object.
+		 //   mpValue = autoPtr.release();
+		 //
+		 //   #if defined(EA_COMPILER_NO_EXCEPTIONS) || defined(EA_COMPILER_NO_UNWIND)
+		 //      mpRefCount = new reference_count_type(1);
+		 //   #else
+		 //      try
+		 //      {
+		 //         mpRefCount = new reference_count_type(1);
+		 //      }
+		 //      catch(...)
+		 //      {
+		 //         delete mpValue;
+		 //         mpValue = 0;
+		 //         //mpRefCount = 0; shouldn't be necessary.
+		 //         throw;
+		 //      }
+		 //   #endif
+		 //} 
+
+		 /// ~shared_ptr_mt
+		 /// Decrements the reference count for the owned pointer. If the 
+		 /// reference count goes to zero, the owned pointer is deleted and
+		 /// the shared reference count is deleted.
+		 ~shared_ptr_mt()
+		 {
+			lock();
+			const reference_count_type newRefCount(mpRefCount->Decrement()); // Atomic operation
+			// EAT_ASSERT(newRefCount >= 0);
+			if(newRefCount == 0)
+			{
+				// we should only be deleting the pointer if it is not null.  It is possible that the 
+				// user has created a shared ptr without passing in a value.
+				if (mpValue)
+					delete mpValue;
+				delete mpRefCount;
+			}
+			unlock();
+		 }
+
+		 /// operator=
+		 /// Copies another shared_ptr_mt to this object. Note that this object
+		 /// may already own a shared pointer with another different pointer
+		 /// (but still of the same type) before this call. In that case,
+		 /// this function releases the old pointer, decrementing its reference
+		 /// count and deleting it if zero, takes shared ownership of the new 
+		 /// pointer and increments its reference count.
+		 shared_ptr_mt& operator=(shared_ptr_mt const& sharedPtr)
+		 {
+			// We don't lock mutexes here because we let the swap function
+			// below do the locking and assignment. The if statement below
+			// isn't protected within a lock operation because it wouldn't
+			// help by being so because if mpValue is changing during the 
+			// the execution of this function then the user has an external 
+			// race condition that needs to be managed at that level.
+			if(mpValue != sharedPtr.mpValue)
+			{
+			   // The easiest thing to do is to create a temporary and 
+			   // copy ourselves ourselves into it. This is a standard 
+			   // method for switching pointer ownership in systems like this.
+			   shared_ptr_mt(sharedPtr).swap(*this);
+			}
+			return *this;
+		 }
+
+		 // Temporarily disabled while we wait for compilers to modernize.
+		 // 
+		 // operator=
+		 // Transfers ownership of a std::auto_ptr to this class.
+		 //shared_ptr_mt& operator=(std::auto_ptr<T>& autoPtr)
+		 //{
+		 //   // We don't lock any mutexes here because we let the swap function do that.
+		 //   // EAT_ASSERT(mpValue != autoPtr.get());
+		 //   shared_ptr_mt(autoPtr).swap(*this);
+		 //   return *this;
+		 //}
+
+		 // operator=
+		 // We do not defined this function in order to maintain compatibility 
+		 // with the currently proposed (2003) C++ standard addition. Use reset instead.
+		 // shared_ptr_mt& operator=(T* pValue);
+		 // {
+		 //     reset(pValue);
+		 //     return *this;
+		 // }
+
+		 /// lock
+		 /// @brief Locks our mutex for thread-safe access.
+		 /// It is a const function because const-ness refers to the underlying pointer being
+		 /// held and not this class.
+		 void lock() const
+		 {
+			mMutex.Lock(); 
+		 }
+
+		 /// unlock
+		 /// @brief Unlocks our mutex which was previous locked.
+		 /// It is a const function because const-ness refers to the underlying pointer being
+		 /// held and not this class.
+		 void unlock() const
+		 {
+			mMutex.Unlock(); 
+		 }
+
+		 /// reset
+		 /// Releases the owned pointer and takes ownership of the 
+		 /// passed in pointer. If the passed in pointer is the same
+		 /// as the owned pointer, nothing is done. The passed in pointer
+		 /// can be null, in which case the use count is set to 1.
+		 void reset(T* pValue = 0)
+		 {
+			// We don't lock any mutexes here because we let the swap function do that.
+			// We don't lock for the 'if' statement below because that wouldn't really buy anything.
+			if(pValue != mpValue)
+			{
+			   // The easiest thing to do is to create a temporary and 
+			   // copy ourselves ourselves into it. This is a standard 
+			   // method for switching pointer ownership in systems like this.
+			   shared_ptr_mt(pValue).swap(*this);
+			}
+		 }
+
+		 /// swap
+		 /// Exchanges the owned pointer beween two shared_ptr_mt objects.
+		 void swap(shared_ptr_mt<T>& sharedPtr)
+		 {
+			lock();
+			sharedPtr.lock();
+
+			// std::swap(mpValue, sharedPtr.mpValue); // Not used so that we can reduce a dependency.
+			T* const pValue   = sharedPtr.mpValue;
+			sharedPtr.mpValue = mpValue;
+			mpValue           = pValue;
+
+			// std::swap(mpRefCount, sharedPtr.mpRefCount); // Not used so that we can reduce a dependency.
+			reference_count_type* const pRefCount = sharedPtr.mpRefCount;
+			sharedPtr.mpRefCount = mpRefCount;
+			mpRefCount           = pRefCount;
+
+			sharedPtr.unlock();
+			unlock();
+		 }
+
+		 /// operator*
+		 /// Returns the owner pointer dereferenced.
+		 /// Example usage:
+		 ///   shared_ptr_mt<int> ptr = new int(3);
+		 ///   int x = *ptr;
+		 T& operator*() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			// EAT_ASSERT(mpValue);
+			return *mpValue;
+		 }
+
+		 /// operator->
+		 /// Allows access to the owned pointer via operator->()
+		 /// Example usage:
+		 ///   struct X{ void DoSomething(); }; 
+		 ///   shared_ptr_mt<int> ptr = new X;
+		 ///   ptr->DoSomething();
+		 T* operator->() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			// EAT_ASSERT(mpValue);
+			return mpValue;
+		 }
+
+		 /// get
+		 /// Returns the owned pointer. Note that this class does 
+		 /// not provide an operator T() function. This is because such
+		 /// a thing (automatic conversion) is deemed unsafe.
+		 /// Example usage:
+		 ///   struct X{ void DoSomething(); }; 
+		 ///   shared_ptr_mt<int> ptr = new X;
+		 ///   X* pX = ptr.get();
+		 ///   pX->DoSomething();
+		 T* get() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			return mpValue;
+		 }
+
+		 /// use_count
+		 /// Returns the reference count on the owned pointer.
+		 /// The return value is one if the owned pointer is null.
+		 int use_count() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			// EAT_ASSERT(mpRefCount);
+			return (int)*mpRefCount;
+		 }
+
+		 /// unique
+		 /// Returns true if the reference count on the owned pointer is one.
+		 /// The return value is true if the owned pointer is null.
+		 bool unique() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			// EAT_ASSERT(mpRefCount);
+			return (*mpRefCount == 1);
+		 }
+
+		 /// add_ref
+		 /// Manually increments the reference count on the owned pointer.
+		 /// This is currently disabled because it isn't in part of the 
+		 /// proposed C++ language addition.
+		 /// int add_ref()
+		 /// {
+		 ///    lock();
+		 ///    // EAT_ASSERT(mpRefCount);
+		 ///    ++*mpRefCount; // Atomic operation
+		 ///    unlock();
+		 /// }
+
+		 /// release_ref
+		 /// Manually increments the reference count on the owned pointer.
+		 /// If the reference count becomes zero, then the owned pointer 
+		 /// is deleted and reset(0) is called. For any given instance of
+		 /// shared_ptr_mt, release_ref can only be called as many times as -- 
+		 /// but no more than -- the number of times add_ref was called
+		 /// for that same shared_ptr_mt. Otherwise, separate instances of 
+		 /// shared_ptr_mt would be left with dangling owned pointer instances.
+		 /// This is currently disabled because it isn't in part of the 
+		 /// proposed C++ language addition.
+		 /// int release_ref()
+		 /// {
+		 ///    lock();
+		 ///    // EAT_ASSERT(mpRefCount);
+		 ///    if(*mpRefCount > 1){
+		 ///       const int nReturnValue = --*mpRefCount; // Atomic operation
+		 ///       unlock();
+		 ///       return nReturnValue;
+		 ///    }
+		 ///    reset(0);
+		 ///    unlock();
+		 ///    return 0;
+		 /// }
+
+		 /// Implicit operator bool
+		 /// Allows for using a scoped_ptr as a boolean. 
+		 /// Example usage:
+		 ///   shared_ptr_mt<int> ptr = new int(3);
+		 ///   if(ptr)
+		 ///      ++*ptr;
+		 ///    
+		 /// Note that below we do not use operator bool(). The reason for this
+		 /// is that booleans automatically convert up to short, int, float, etc.
+		 /// The result is that this: if(scopedPtr == 1) would yield true (bad).
+		 typedef T* (this_type::*bool_)() const;
+		 operator bool_() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			if(mpValue)
+			   return &this_type::get;
+			return 0;
+		 }
+
+		 /// operator!
+		 /// This returns the opposite of operator bool; it returns true if 
+		 /// the owned pointer is null. Some compilers require this and some don't.
+		 ///   shared_ptr_mt<int> ptr = new int(3);
+		 ///   if(!ptr)
+		 ///      EAT_ASSERT(false);
+		 bool operator!() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			return (mpValue == 0);
+		 }
+
+	  }; // class shared_ptr_mt
+
+
+	  /// get_pointer
+	  /// returns shared_ptr_mt::get() via the input shared_ptr_mt. 
+	  template<class T>
+	  inline T* get_pointer(const shared_ptr_mt<T>& sharedPtr)
+	  {
+		 return sharedPtr.get();
+	  }
+
+	  /// swap
+	  /// Exchanges the owned pointer beween two shared_ptr_mt objects.
+	  /// This non-member version is useful for compatibility of shared_ptr_mt
+	  /// objects with the C++ Standard Library and other libraries.
+	  template<class T>
+	  inline void swap(shared_ptr_mt<T>& sharedPtr1, shared_ptr_mt<T>& sharedPtr2)
+	  {
+		 sharedPtr1.swap(sharedPtr2);
+	  }
+
+
+	  /// operator!=
+	  /// Compares two shared_ptr_mt objects for equality. Equality is defined as 
+	  /// being true when the pointer shared between two shared_ptr_mt objects is equal.
+	  /// It is debatable what the appropriate definition of equality is between two
+	  /// shared_ptr_mt objects, but we follow the current 2nd generation C++ standard proposal.
+	  template<class T, class U>
+	  inline bool operator==(const shared_ptr_mt<T>& sharedPtr1, const shared_ptr_mt<U>& sharedPtr2)
+	  {
+		 // EAT_ASSERT((sharedPtr1.get() != sharedPtr2.get()) || (sharedPtr1.use_count() == sharedPtr2.use_count()));
+		 return (sharedPtr1.get() == sharedPtr2.get());
+	  }
+
+
+	  /// operator!=
+	  /// Compares two shared_ptr_mt objects for inequality. Equality is defined as 
+	  /// being true when the pointer shared between two shared_ptr_mt objects is equal.
+	  /// It is debatable what the appropriate definition of equality is between two
+	  /// shared_ptr_mt objects, but we follow the current 2nd generation C++ standard proposal.
+	  template<class T, class U>
+	  inline bool operator!=(const shared_ptr_mt<T>& sharedPtr1, const shared_ptr_mt<U>& sharedPtr2)
+	  {
+		 // EAT_ASSERT((sharedPtr1.get() != sharedPtr2.get()) || (sharedPtr1.use_count() == sharedPtr2.use_count()));
+		 return (sharedPtr1.get() != sharedPtr2.get());
+	  }
+
+
+	  /// operator<
+	  /// Returns which shared_ptr_mt is 'less' than the other. Useful when storing
+	  /// sorted containers of shared_ptr_mt objects.
+	  template<class T, class U>
+	  inline bool operator<(const shared_ptr_mt<T>& sharedPtr1, const shared_ptr_mt<U>& sharedPtr2)
+	  {
+		 return (sharedPtr1.get() < sharedPtr2.get()); // Alternatively use: std::less<T*>(a.get(), b.get());
+	  }
+
+   } // namespace Thread
+
+} // namespace EA
+
+
+
+
+#endif // EATHREAD_SHARED_PTR_MT_H
+
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,47 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_VERSION_H
+#define EATHREAD_VERSION_H
+
+
+#include <eathread/internal/config.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// Version contains the version of the library when it was built.
+		/// This can be used to verify the correct version has been linked
+		/// into the executable or loaded by the O/S (in the case of a DLL).
+		struct Version
+		{
+			int mMajor;
+			int mMinor;
+			int mPatch;
+		};
+
+		/// Get the library version information.
+		EATHREADLIB_API const Version *GetVersion();
+
+		/// Check that the linked/loaded library is the same as the headers 
+		/// are expecting.
+		///
+		/// If the version numbers passed to CheckVersion match those
+		/// built into the library when it was compiled, true is returned. 
+		/// If not, false is returned.
+		EATHREADLIB_API bool CheckVersion(int majorVersion, int minorVersion, int patchVersion);
+
+	}
+
+}
+
+#endif
@@ -0,0 +1,462 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Defines functionality for threadsafe primitive operations.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_X86_64_EATHREAD_ATOMIC_X86_64_H
+#define EATHREAD_X86_64_EATHREAD_ATOMIC_X86_64_H
+
+#include "EABase/eabase.h"
+#include <stddef.h>
+#include <eathread/internal/eathread_atomic_standalone.h>
+
+
+#ifdef _MSC_VER
+	#pragma warning(push, 0)
+	#include <math.h>   // VS2008 has an acknowledged bug that requires math.h (and possibly also string.h) to be #included before intrin.h.
+	#include <intrin.h>
+	#pragma warning(pop)
+
+	#pragma warning(push)
+	#pragma warning(disable: 4146)  // unary minus operator applied to unsigned type, result still unsigned
+#endif
+
+
+#if defined(EA_PROCESSOR_X86_64)
+
+	#define EA_THREAD_ATOMIC_IMPLEMENTED
+
+	namespace EA
+	{
+		namespace Thread
+		{
+			///
+			/// Non-member 128-bit Atomics implementation 
+			///
+			#if (_MSC_VER >= 1500) // VS2008+
+
+				#define EATHREAD_ATOMIC_128_SUPPORTED 1
+
+				// Algorithm for implementing an arbitrary atomic modification via AtomicCompareAndSwap:
+				//     int128_t oldValue;
+				//
+				//     do {
+				//         oldValue = AtomicGetValue(dest);
+				//         newValue = <modification of oldValue>
+				//     } while(!AtomicCompareAndSwap(dest, oldValue, newValue));
+ 
+				// The following function is a wrapper for the Microsoft _InterlockedCompareExchange128 function.
+				// Early versions of AMD 64-bit hardware do not support 128 bit atomics. To check for hardware support 
+				// for the cmpxchg16b instruction, call the __cpuid intrinsic with InfoType=0x00000001 (standard function 1). 
+				// Bit 13 of CPUInfo[2] (ECX) is 1 if the instruction is supported.
+
+				inline bool AtomicSetValueConditionall28(volatile int64_t* dest128, const int64_t* value128, const int64_t* condition128)
+				{
+					__int64 conditionCopy[2] = { condition128[0], condition128[1] };                              // We make a copy because Microsoft modifies the output, which is inconsistent with the rest of our atomic API.
+					return _InterlockedCompareExchange128(dest128, value128[1], value128[0], conditionCopy) == 1; // Question: Do we need to reverse the order of value128 if running on big-endian? Microsoft's documentation currently doesn't address this.
+				}
+
+				inline bool AtomicSetValueConditionall28(volatile uint64_t* dest128, const uint64_t* value128, const uint64_t* condition128)
+				{ 
+					__int64 conditionCopy[2] = { (int64_t) condition128[0],  (int64_t)condition128[1] };                                               // We make a copy because Microsoft modifies the output, which is inconsistent with the rest of our atomic API.
+					return _InterlockedCompareExchange128((volatile int64_t*)dest128, (int64_t)value128[1], (int64_t)value128[0], conditionCopy) == 1; // Question: Do we need to reverse the order of value128 if running on big-endian? Microsoft's documentation currently doesn't address this.
+				}
+
+			#elif defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
+
+				#if defined(EA_COMPILER_CLANG) || (defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 403)) // GCC 4.3 or later for 128 bit atomics
+
+					#define EATHREAD_ATOMIC_128_SUPPORTED 1
+
+					// GCC on x64 implements all of its __sync functions below via the cmpxchg16b instruction,
+					// usually in the form of a loop.
+					// Use of 128 bit atomics on GCC requires compiling with the -mcx16 compiler argument. 
+					// See http://gcc.gnu.org/onlinedocs/gcc/i386-and-x86_002d64-Options.html.
+
+					inline __int128_t AtomicGetValue(volatile __int128_t* source)
+					{
+						return __sync_add_and_fetch(source, __int128_t(0)); // Is there a better way to do an atomic read?
+					}
+
+					inline void AtomicSetValue(volatile __int128_t* dest, __int128_t value)
+					{
+						__sync_lock_test_and_set(dest, value);
+					}
+
+					inline __int128_t AtomicIncrement(volatile __int128_t* dest)
+					{
+						return __sync_add_and_fetch(dest, __int128_t(1));
+					}
+
+					inline __int128_t AtomicDecrement(volatile __int128_t* dest)
+					{
+						return __sync_add_and_fetch(dest, __int128_t(-1));
+					}
+
+					inline __int128_t AtomicAdd(volatile __int128_t* dest, __int128_t value)
+					{
+						return __sync_add_and_fetch(dest, value);
+					}
+
+					inline __int128_t AtomicOr(volatile __int128_t* dest, __int128_t value)
+					{
+						return __sync_or_and_fetch(dest, value);
+					}
+
+					inline __int128_t AtomicAnd(volatile __int128_t* dest, __int128_t value)
+					{
+						return __sync_and_and_fetch(dest, value);
+					}
+
+					inline __int128_t AtomicXor(volatile __int128_t* dest, __int128_t value)
+					{
+						return __sync_xor_and_fetch(dest, value);
+					}
+
+					inline __int128_t AtomicSwap(volatile __int128_t* dest, __int128_t value)
+					{
+						return __sync_lock_test_and_set(dest, value);
+					}
+
+					inline bool AtomicSetValueConditional(volatile __int128_t* dest, __int128_t value, __int128_t condition)
+					{
+						return __sync_bool_compare_and_swap(dest, condition, value);
+					}
+
+					inline bool AtomicSetValueConditional(volatile __uint128_t* dest, __uint128_t value, __uint128_t condition)
+					{
+						return __sync_bool_compare_and_swap(dest, condition, value);
+					}
+
+					// The following 64-bit-based 128 bit atomic is provided for compatibility with the Microsoft version.
+					// GCC supports the native __int128_t data type and thus can support a 128-bit-based 128 bit atomic.
+
+					inline bool AtomicSetValueConditionall28(volatile int64_t* dest128, const int64_t* value128, const int64_t* condition128)
+					{
+						// Use of this requires compiling with the -mcx16 compiler argument. See http://gcc.gnu.org/onlinedocs/gcc/i386-and-x86_002d64-Options.html.
+						return __sync_bool_compare_and_swap((volatile __int128_t*)dest128, *(volatile __int128_t*)condition128, *(volatile __int128_t*)value128);
+					}
+
+					inline bool AtomicSetValueConditionall28(volatile uint64_t* dest128, const uint64_t* value128, const uint64_t* condition128)
+					{
+						// Use of this requires compiling with the -mcx16 compiler argument. See http://gcc.gnu.org/onlinedocs/gcc/i386-and-x86_002d64-Options.html.
+						return __sync_bool_compare_and_swap((volatile __uint128_t*)dest128, *(volatile __uint128_t*)condition128, *(volatile __uint128_t*)value128);
+					}
+
+				#endif
+
+			#endif
+
+
+
+			/// class AtomicInt
+			/// Actual implementation may vary per platform. May require certain alignments, sizes, 
+			/// and declaration specifications per platform.
+
+			template <class T>
+			class  AtomicInt
+			{
+			public:
+				typedef AtomicInt<T> ThisType;
+				typedef T            ValueType;
+
+				/// AtomicInt
+				/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+				/// This is done so that an AtomicInt acts like a standard built-in integer.
+				AtomicInt()
+					{}
+
+				AtomicInt(ValueType n) 
+					{ SetValue(n); }
+
+				AtomicInt(const ThisType& x)
+					: mValue(x.GetValue()) {}
+
+				AtomicInt& operator=(const ThisType& x)
+					{ mValue = x.GetValue(); return *this; }
+
+				ValueType GetValueRaw() const
+					{ return mValue; }
+
+				ValueType GetValue() const;
+				ValueType SetValue(ValueType n);
+				bool      SetValueConditional(ValueType n, ValueType condition);
+				ValueType Increment();
+				ValueType Decrement();
+				ValueType Add(ValueType n);
+
+				// operators
+				inline            operator const ValueType() const { return GetValue(); }  // Should this be provided? Is it safe enough? Return value of 'const' attempts to make this safe from misuse.
+				inline ValueType  operator =(ValueType n)          {        SetValue(n); return n; }
+				inline ValueType  operator+=(ValueType n)          { return Add(n);}
+				inline ValueType  operator-=(ValueType n)          { return Add(-n);}
+				inline ValueType  operator++()                     { return Increment();}
+				inline ValueType  operator++(int)                  { return Increment() - 1;}
+				inline ValueType  operator--()                     { return Decrement(); }
+				inline ValueType  operator--(int)                  { return Decrement() + 1;}
+
+			protected:
+				volatile ValueType mValue;
+			};
+
+
+			#if defined(EA_COMPILER_MSVC)
+				#pragma intrinsic(_InterlockedExchange)
+				#pragma intrinsic(_InterlockedExchangeAdd)
+				#pragma intrinsic(_InterlockedCompareExchange)
+				#pragma intrinsic(_InterlockedIncrement)
+				#pragma intrinsic(_InterlockedDecrement)
+				#pragma intrinsic(_InterlockedExchange64)
+				#pragma intrinsic(_InterlockedExchangeAdd64)
+				#pragma intrinsic(_InterlockedCompareExchange64)
+				#pragma intrinsic(_InterlockedIncrement64)
+				#pragma intrinsic(_InterlockedDecrement64)
+
+				// The following should work under any compiler, including such compilers as GCC under
+				// WINE or some other Win32 emulation. Win32 InterlockedXXX functions must exist on
+				// any system that supports the Windows API, be it 32 or 64 bit Windows.
+
+				// 32 bit versions
+				template<> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::GetValue() const
+					{ return (ValueType)_InterlockedExchangeAdd((long*)&mValue, 0); } // We shouldn't need to do this, as far as I know, given the x86 architecture.
+
+				template<> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::GetValue() const
+					{ return (ValueType)_InterlockedExchangeAdd((long*)&mValue, 0); } // We shouldn't need to do this, as far as I know, given the x86 architecture.
+
+				template<> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+					{ return (ValueType)_InterlockedExchange((long*)&mValue, (long)n); } // Even though we shouldn't need to use _InterlockedExchange on x86, the intrinsic x86 _InterlockedExchange is at least as fast as C code we would otherwise put here.
+
+				template<> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+					{ return (ValueType)_InterlockedExchange((long*)&mValue, (long)n); } // Even though we shouldn't need to use _InterlockedExchange on x86, the intrinsic x86 _InterlockedExchange is at least as fast as C code we would otherwise put here.
+
+				template<> inline
+				bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+					{ return ((ValueType)_InterlockedCompareExchange((long*)&mValue, (long)n, (long)condition) == condition); }
+
+				template<> inline
+				bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+					{ return ((ValueType)_InterlockedCompareExchange((long*)&mValue, (long)n, (long)condition) == condition); }
+
+				template<> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+					{ return (ValueType)_InterlockedIncrement((long*)&mValue); }
+
+				template<> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+					{ return (ValueType)_InterlockedIncrement((long*)&mValue); }
+
+				template<> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+					{ return (ValueType)_InterlockedDecrement((long*)&mValue); }
+
+				template<> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+					{ return (ValueType)_InterlockedDecrement((long*)&mValue); }
+
+				template<> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+					{ return ((ValueType)_InterlockedExchangeAdd((long*)&mValue, (long)n) + n); }
+
+				template<> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+					{ return ((ValueType)_InterlockedExchangeAdd((long*)&mValue, (long)n) + n); }
+
+
+
+				// 64 bit versions
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const
+					{ return (ValueType)_InterlockedExchangeAdd64((__int64*)&mValue, 0); } // We shouldn't need to do this, as far as I know, given the x86 architecture.
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const
+					{ return (ValueType)_InterlockedExchangeAdd64((__int64*)&mValue, 0); } // We shouldn't need to do this, as far as I know, given the x86 architecture.
+
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n)
+					{ return (ValueType)_InterlockedExchange64((__int64*)&mValue, (__int64)n); } // Even though we shouldn't need to use _InterlockedExchange on x86, the intrinsic x86 _InterlockedExchange is at least as fast as C code we would otherwise put here.
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n)
+					{ return (ValueType)_InterlockedExchange64((__int64*)&mValue, (__int64)n); } // Even though we shouldn't need to use _InterlockedExchange on x86, the intrinsic x86 _InterlockedExchange is at least as fast as C code we would otherwise put here.
+
+				template<> inline
+				bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition)
+					{ return ((ValueType)_InterlockedCompareExchange64((__int64*)&mValue, (__int64)n, (__int64)condition) == condition); }
+
+				template<> inline
+				bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition)
+					{ return ((ValueType)_InterlockedCompareExchange64((__int64*)&mValue, (__int64)n, (__int64)condition) == condition); }
+
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment()
+					{ return (ValueType)_InterlockedIncrement64((__int64*)&mValue); }
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment()
+					{ return (ValueType)_InterlockedIncrement64((__int64*)&mValue); }
+
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement()
+					{ return (ValueType)_InterlockedDecrement64((__int64*)&mValue); }
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement()
+					{ return (ValueType)_InterlockedDecrement64((__int64*)&mValue); }
+
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n)
+					{ return ((ValueType)_InterlockedExchangeAdd64((__int64*)&mValue, (__int64)n) + n); }
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n)
+					{ return ((ValueType)_InterlockedExchangeAdd64((__int64*)&mValue, (__int64)n) + n); }
+
+
+			#elif defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
+
+				// Recent versions of GCC have atomic primitives built into the compiler and standard library.
+				#if defined(EA_COMPILER_CLANG) || (defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 401)) // GCC 4.1 or later
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::GetValue() const
+						{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::GetValue() const
+						{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+						{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+						{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+					template <> inline
+					bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+					template <> inline
+					bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+						{ return __sync_add_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+						{ return __sync_add_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+						{ return __sync_sub_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+						{ return __sync_sub_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+						{ return __sync_add_and_fetch(&mValue, n); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+						{ return __sync_add_and_fetch(&mValue, n); }
+
+
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const
+						{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const
+						{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n)
+						{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n)
+						{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+					template <> inline
+					bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+					template <> inline
+					bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment()
+						{ return __sync_add_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment()
+						{ return __sync_add_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement()
+						{ return __sync_sub_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement()
+						{ return __sync_sub_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n)
+						{ return __sync_add_and_fetch(&mValue, n); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n)
+						{ return __sync_add_and_fetch(&mValue, n); }
+
+				#endif // GCC 4.1 or later
+
+			#endif // GCC
+
+		} // namespace Thread
+
+
+	} // namespace EA
+
+
+#endif // EA_PROCESSOR_X86_64
+
+
+#ifdef _MSC_VER
+	 #pragma warning(pop)
+#endif
+
+
+#endif // EATHREAD_X86_64_EATHREAD_ATOMIC_X86_64_H
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,108 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Functionality related to memory and code generation synchronization.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_X86_64_EATHREAD_SYNC_X86_64_H
+#define EATHREAD_X86_64_EATHREAD_SYNC_X86_64_H
+
+
+#ifndef INCLUDED_eabase_H
+	#include "EABase/eabase.h"
+#endif
+
+
+#if defined(EA_PROCESSOR_X86_64)
+	#define EA_THREAD_SYNC_IMPLEMENTED
+
+	#ifdef _MSC_VER
+		#pragma warning(push, 0)
+		#include <math.h>   // VS2008 has an acknowledged bug that requires math.h (and possibly also string.h) to be #included before intrin.h.
+		#include <intrin.h>
+		#pragma warning(pop)
+	#endif
+
+	// By default, we define EA_TARGET_SMP to be true. The reason for this is that most 
+	// applications that users of this code are likely to write are going to be executables
+	// which run properly on any system, be it multiprocessing or not.
+	#ifndef EA_TARGET_SMP
+		#define EA_TARGET_SMP 1
+	#endif
+
+	// EAProcessorPause
+	// Intel has defined a 'pause' instruction for x86 processors starting with the P4, though this simply
+	// maps to the otherwise undocumented 'rep nop' instruction. This pause instruction is important for 
+	// high performance spinning, as otherwise a high performance penalty incurs. 
+
+	#if defined(EA_COMPILER_MSVC) || defined(EA_COMPILER_INTEL) || defined(EA_COMPILER_BORLAND)
+		// Year 2003+ versions of the Microsoft SDK define 'rep nop' as YieldProcessor and/or __yield or _mm_pause. 
+		#pragma intrinsic(_mm_pause)
+		#define EAProcessorPause() _mm_pause() // The __yield() intrinsic currently doesn't work on x86-64.
+	#elif defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
+		#define EAProcessorPause() __asm__ __volatile__ ("rep ; nop")
+	#else
+		// In this case we use an Intel-style asm statement. If this doesn't work for your compiler then 
+		// there most likely is some way to make the `rep nop` inline asm statement. 
+		#define EAProcessorPause() __asm { rep nop } // Alternatively: { __asm { _emit 0xf3 }; __asm { _emit 0x90 } }
+	#endif
+
+
+	// EAReadBarrier / EAWriteBarrier / EAReadWriteBarrier
+	// The x86 processor memory architecture ensures read and write consistency on both single and
+	// multi processing systems. This makes programming simpler but limits maximimum system performance.
+	// We define EAReadBarrier here to be the same as EACompilerMemory barrier in order to limit the 
+	// compiler from making any assumptions at its level about memory usage. Year 2003+ versions of the 
+	// Microsoft SDK define a 'MemoryBarrier' statement which has the same effect as EAReadWriteBarrier.
+	#if defined(EA_COMPILER_MSVC)
+		#pragma intrinsic(_ReadBarrier)
+		#pragma intrinsic(_WriteBarrier)
+		#pragma intrinsic(_ReadWriteBarrier)
+
+		#define EAReadBarrier()      _ReadBarrier()
+		#define EAWriteBarrier()     _WriteBarrier()
+		#define EAReadWriteBarrier() _ReadWriteBarrier()
+	#elif defined(EA_PLATFORM_PS4)
+		#define EAReadBarrier()      __asm__ __volatile__ ("lfence" ::: "memory");
+		#define EAWriteBarrier()     __asm__ __volatile__ ("sfence" ::: "memory");
+		#define EAReadWriteBarrier() __asm__ __volatile__ ("mfence" ::: "memory");
+	#elif defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 401) // GCC 4.1 or later
+		#define EAReadBarrier      __sync_synchronize
+		#define EAWriteBarrier     __sync_synchronize
+		#define EAReadWriteBarrier __sync_synchronize
+	#else
+		#define EAReadBarrier      EACompilerMemoryBarrier // Need to implement this for non-VC++
+		#define EAWriteBarrier     EACompilerMemoryBarrier // Need to implement this for non-VC++
+		#define EAReadWriteBarrier EACompilerMemoryBarrier // Need to implement this for non-VC++
+	#endif
+
+
+	// EACompilerMemoryBarrier
+	#if defined(EA_COMPILER_MSVC)
+		#define EACompilerMemoryBarrier() _ReadWriteBarrier()
+	#elif defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
+		#define EACompilerMemoryBarrier() __asm__ __volatile__ ("":::"memory")
+	#else
+		#define EACompilerMemoryBarrier() // Possibly `EAT_ASSERT(false)` here?
+	#endif
+
+
+#endif // EA_PROCESSOR_X86
+
+
+#endif // EATHREAD_X86_64_EATHREAD_SYNC_X86_64_H
+
+
+
+
+
+
+
+
@@ -0,0 +1,742 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Defines functionality for threadsafe primitive operations.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_X86_EATHREAD_ATOMIC_X86_H
+#define EATHREAD_X86_EATHREAD_ATOMIC_X86_H
+
+
+#include <EABase/eabase.h>
+#include <stddef.h>
+#include <eathread/internal/eathread_atomic_standalone.h>
+
+
+#ifdef _MSC_VER
+	 #pragma warning(push)
+	 #pragma warning(disable: 4146)  // unary minus operator applied to unsigned type, result still unsigned
+	 #pragma warning(disable: 4339)  // use of undefined type detected in CLR meta-data
+#endif
+
+
+// This is required for Windows Phone (ARM) because we are temporarily not using
+// CPP11 style atomics and we are depending on the MSVC intrinics.
+#if defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_ARM)
+	#define EA_THREAD_ATOMIC_IMPLEMENTED
+
+	namespace EA
+	{
+		namespace Thread
+		{
+			/// class AtomicInt
+			/// Actual implementation may vary per platform. May require certain alignments, sizes, 
+			/// and declaration specifications per platform.
+			template <class T>
+			class AtomicInt
+			{
+			public:
+				typedef AtomicInt<T> ThisType;
+				typedef T            ValueType;
+
+				/// AtomicInt
+				/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+				/// This is done so that an AtomicInt acts like a standard built-in integer.
+				/// Problem: C/C++ has two ways to initialize a built-in type x: x and x(),
+				///          and they have different semantics, as the first does nothing but 
+				///          the second initializes x to zero. C++ does not provide a means 
+				///          to tell which of tell which of these two ways a C++ class instance
+				///          initialized. Thus we probably can't easily argue that this constructor 
+				///          should do nothing vs. initialize the variable to 0. It's probably
+				///          safer for us to make it initialize to 0, and it wouldn't break 
+				///          users to do so, though it would add a tiny runtime cost.
+				AtomicInt()
+					{}
+
+				AtomicInt(ValueType n) : mValue(0) // Initialize mValue because otherwise SetValue may read it before it's initialized. 
+					{ SetValue(n); }
+
+				AtomicInt(const ThisType& x)
+					: mValue(x.GetValue()) {}
+
+				AtomicInt& operator=(const ThisType& x)
+					{ mValue = x.GetValue(); return *this; }
+
+				ValueType GetValue() const
+					{ return mValue; }
+
+				ValueType GetValueRaw() const
+					{ return mValue; }
+
+				ValueType SetValue(ValueType n);
+				bool      SetValueConditional(ValueType n, ValueType condition);
+				ValueType Increment();
+				ValueType Decrement();
+				ValueType Add(ValueType n);
+
+				// operators
+				inline            operator const ValueType() const { return GetValue(); }
+				inline ValueType  operator =(ValueType n)          {        SetValue(n); return n; }
+				inline ValueType  operator+=(ValueType n)          { return Add(n);}
+				inline ValueType  operator-=(ValueType n)          { return Add(-n);}
+				inline ValueType  operator++()                     { return Increment();}
+				inline ValueType  operator++(int)                  { return Increment() - 1;}
+				inline ValueType  operator--()                     { return Decrement(); }
+				inline ValueType  operator--(int)                  { return Decrement() + 1;}
+
+			protected:
+				volatile ValueType mValue;
+			};
+
+			#if defined(EA_PLATFORM_MICROSOFT) && defined(_MSC_VER)
+
+				// 32 bit versions
+				template<> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+					{ return (ValueType)InterlockedExchangeImp((long*)&mValue, (long)n); } // Even though we shouldn't need to use InterlockedExchange on x86, the intrinsic x86 InterlockedExchange is at least as fast as C code we would otherwise put here.
+
+				template<> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+					{ return (ValueType)InterlockedExchangeImp((long*)&mValue, (long)n); } // Even though we shouldn't need to use InterlockedExchange on x86, the intrinsic x86 InterlockedExchange is at least as fast as C code we would otherwise put here.
+
+				template<> inline
+				bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+					{ return ((ValueType)InterlockedCompareExchangeImp((long*)&mValue, (long)n, (long)condition) == condition); }
+
+				template<> inline
+				bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+					{ return ((ValueType)InterlockedCompareExchangeImp((long*)&mValue, (long)n, (long)condition) == condition); }
+
+				template<> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+					{ return (ValueType)InterlockedIncrementImp((long*)&mValue); }
+
+				template<> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+					{ return (ValueType)InterlockedIncrementImp((long*)&mValue); }
+
+				template<> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+					{ return (ValueType)InterlockedDecrementImp((long*)&mValue); }
+
+				template<> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+					{ return (ValueType)InterlockedDecrementImp((long*)&mValue); }
+
+				template<> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+					{ return ((ValueType)InterlockedExchangeAddImp((long*)&mValue, (long)n) + n); }
+
+				template<> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+					{ return ((ValueType)InterlockedExchangeAddImp((long*)&mValue, (long)n) + n); }
+
+
+
+				// 64 bit versions
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const{
+					int64_t condition, nNewValue;
+					do{
+						nNewValue = condition = mValue; // Todo: This function has a problem unless the assignment of mValue to condition is atomic.
+					} while(!InterlockedSetIfEqual(const_cast<int64_t*>(&mValue), nNewValue, condition));
+					return nNewValue;
+				}
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const{
+					uint64_t condition, nNewValue;
+					do{
+						nNewValue = condition = mValue; // Todo: This function has a problem unless the assignment of mValue to condition is atomic.
+					} while(!InterlockedSetIfEqual(const_cast<uint64_t*>(&mValue), nNewValue, condition));
+					return nNewValue;
+				}
+
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n){
+					int64_t condition;
+					do{
+						condition = mValue;
+					} while(!InterlockedSetIfEqual(&mValue, n, condition));
+					return condition;
+				}
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n){
+					uint64_t condition;
+					do{
+						condition = mValue;
+					} while(!InterlockedSetIfEqual(&mValue, n, condition));
+					return condition;
+				}
+
+				template<> inline
+				bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition){
+					return InterlockedSetIfEqual(&mValue, n, condition);
+				}
+
+				template<> inline
+				bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition){
+					return InterlockedSetIfEqual(&mValue, n, condition);
+				}
+
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment(){
+					int64_t condition, nNewValue;
+					do{
+						condition = mValue;
+						nNewValue = condition + 1;
+					} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+					return nNewValue;
+				}
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment(){
+					uint64_t condition, nNewValue;
+					do{
+						condition = mValue;
+						nNewValue = condition + 1;
+					} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+					return nNewValue;
+				}
+
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement(){
+					int64_t condition, nNewValue;
+					do{
+						condition = mValue;
+						nNewValue = condition - 1;
+					} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+					return nNewValue;
+				}
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement(){
+					uint64_t condition, nNewValue;
+					do{
+						condition = mValue;
+						nNewValue = condition - 1;
+					} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+					return nNewValue;
+				}
+
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n){
+					int64_t condition, nNewValue;
+					do{
+						condition = mValue;
+						nNewValue = condition + n;
+					} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+					return nNewValue;
+				}
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n){
+					uint64_t condition, nNewValue;
+					do{
+						condition = mValue;
+						nNewValue = condition + n;
+					} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+					return nNewValue;
+				}
+
+
+			#elif defined(EA_COMPILER_GNUC) || defined (EA_COMPILER_CLANG)
+
+				// Recent versions of GCC have atomic primitives built into the compiler and standard library.
+				#if defined (EA_COMPILER_CLANG) || defined(__APPLE__) || (defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 403)) // GCC 4.3 or later
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::GetValue() const
+						{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::GetValue() const
+						{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+						{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+						{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+					template <> inline
+					bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+					template <> inline
+					bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+						{ return __sync_add_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+						{ return __sync_add_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+						{ return __sync_sub_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+						{ return __sync_sub_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+						{ return __sync_add_and_fetch(&mValue, n); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+						{ return __sync_add_and_fetch(&mValue, n); }
+
+
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const
+						{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const
+						{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n)
+						{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n)
+						{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+					template <> inline
+					bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+					template <> inline
+					bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment()
+						{ return __sync_add_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment()
+						{ return __sync_add_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement()
+						{ return __sync_sub_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement()
+						{ return __sync_sub_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n)
+						{ return __sync_add_and_fetch(&mValue, n); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n)
+						{ return __sync_add_and_fetch(&mValue, n); }
+
+				#else
+
+					// If the above intrinsics aren't used...
+					#ifndef InterlockedCompareExchangeImp
+					namespace
+					{
+						int32_t InterlockedExchange(volatile int32_t* m, int32_t n)
+						{
+							int32_t result;
+
+							__asm__ __volatile__ (
+								"xchgl %%eax, (%2)" // The xchg instruction does an implicit lock instruction.
+								: "=a" (result)     // outputs
+								: "a" (n), "q" (m)  // inputs
+								: "memory"          // clobbered
+								);
+
+							return result;
+						}
+
+						int32_t InterlockedCompareExchange(volatile int32_t* m, int32_t n, int32_t condition)
+						{
+							int32_t result;
+
+							__asm__ __volatile__(
+								"lock; cmpxchgl %3, (%1) \n"        // Test *m against EAX, if same, then *m = n
+								: "=a" (result), "=q" (m)           // outputs
+								: "a" (condition), "q" (n), "1" (m) // inputs
+								: "memory"                          // clobbered
+								);
+
+							return result;
+						}
+
+						#define InterlockedExchangeImp        InterlockedExchange
+						#define InterlockedCompareExchangeImp InterlockedCompareExchange
+					}
+					#endif
+
+					// 32 bit versions
+					template<> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+						{ return (ValueType)InterlockedExchangeImp(&mValue, n); }
+
+					template<> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+						{ return (ValueType)InterlockedExchangeImp((int32_t*)&mValue, n); }
+
+					template<> inline
+					bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return ((ValueType)InterlockedCompareExchangeImp(&mValue, n, condition) == condition); }
+
+					template<> inline
+					bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return ((ValueType)InterlockedCompareExchangeImp((int32_t*)&mValue, n, condition) == condition); }
+
+					template<> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+					{
+						int32_t result;
+
+						__asm__ __volatile__ ("lock; xaddl %0, %1"
+											: "=r" (result), "=m" (mValue)
+											: "0" (1), "m" (mValue)
+											: "memory"
+											);
+						return result + 1;
+					}
+
+					template<> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+					{
+						int32_t result;
+
+						__asm__ __volatile__ ("lock; xaddl %0, %1"
+											: "=r" (result), "=m" (mValue)
+											: "0" (1), "m" (mValue)
+											: "memory"
+											);
+						return result + 1;
+					}
+
+					template<> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+					{
+						int32_t result;
+
+						__asm__ __volatile__ ("lock; xaddl %0, %1"
+											: "=r" (result), "=m" (mValue)
+											: "0" (-1), "m" (mValue)
+											: "memory"
+											);
+						return result - 1;
+					}
+
+					template<> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+					{
+						uint32_t result;
+
+						__asm__ __volatile__ ("lock; xaddl %0, %1"
+											: "=r" (result), "=m" (mValue)
+											: "0" (-1), "m" (mValue)
+											: "memory"
+											);
+						return result - 1;
+					}
+
+					template<> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+					{
+						int32_t result;
+
+						__asm__ __volatile__ ("lock; xaddl %0, %1"
+											: "=r" (result), "=m" (mValue)
+											: "0" (n), "m" (mValue)
+											: "memory"
+											);
+						return result + n;
+					}
+
+					template<> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+					{
+						uint32_t result;
+
+						__asm__ __volatile__ ("lock; xaddl %0, %1"
+											: "=r" (result), "=m" (mValue)
+											: "0" (n), "m" (mValue)
+											: "memory"
+											);
+						return result + n;
+					}
+
+
+
+					// 64 bit versions
+
+					inline bool
+					InterlockedSetIfEqual(volatile int64_t* dest, int64_t newValue, int64_t condition)
+					{
+						int64_t oldValue;
+
+						__asm __volatile ("lock; cmpxchg8b %1"
+										 : "=A" (oldValue), "=m" (*dest)
+										 : "b" (((int32_t) newValue) & 0xffffffff),
+										   "c" ((int32_t)(newValue >> 32)),
+										   "m" (*dest), "a" (((int32_t) condition) & 0xffffffff),
+										   "d" ((int32_t)(condition >> 32)));
+
+						return oldValue == condition;
+
+						// Reference non-thread-safe implementation:
+						// if(*dest == condition)
+						// {
+						//     *dest = newValue
+						//     return true;
+						// }
+						// return false;
+					}
+
+					inline bool
+					InterlockedSetIfEqual(volatile uint64_t* dest, uint64_t newValue, uint64_t condition)
+					{
+						uint64_t oldValue;
+
+						__asm __volatile ("lock; cmpxchg8b %1"
+										 : "=A" (oldValue), "=m" (*dest)
+										 : "b" (((uint32_t) newValue) & 0xffffffff),
+										   "c" ((uint32_t)(newValue >> 32)),
+										   "m" (*dest), "a" (((uint32_t) condition) & 0xffffffff),
+										   "d" ((uint32_t)(condition >> 32)));
+
+						return oldValue == condition;
+
+						// Reference non-thread-safe implementation:
+						// if(*dest == condition)
+						// {
+						//     *dest = newValue
+						//     return true;
+						// }
+						// return false;
+					}
+
+					template<> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const{
+						int64_t condition, nNewValue;
+						do{
+							nNewValue = condition = mValue; // Todo: This function has a problem unless the assignment of mValue to condition is atomic.
+						} while(!InterlockedSetIfEqual(const_cast<int64_t*>(&mValue), nNewValue, condition));
+						return nNewValue;
+					}
+
+					template<> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const{
+						uint64_t condition, nNewValue;
+						do{
+							nNewValue = condition = mValue; // Todo: This function has a problem unless the assignment of mValue to condition is atomic.
+						} while(!InterlockedSetIfEqual(const_cast<uint64_t*>(&mValue), nNewValue, condition));
+						return nNewValue;
+					}
+
+					template<> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n){
+						int64_t condition;
+						do{
+							condition = mValue;
+						} while(!InterlockedSetIfEqual(&mValue, n, condition));
+						return condition;
+					}
+
+					template<> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n){
+						uint64_t condition;
+						do{
+							condition = mValue;
+						} while(!InterlockedSetIfEqual(&mValue, n, condition));
+						return condition;
+					}
+
+					template<> inline
+					bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition){
+						return InterlockedSetIfEqual(&mValue, n, condition);
+					}
+
+					template<> inline
+					bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition){
+						return InterlockedSetIfEqual(&mValue, n, condition);
+					}
+
+					template<> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment(){
+						int64_t condition, nNewValue;
+						do{
+							condition = mValue;
+							nNewValue = condition + 1;
+						} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+						return nNewValue;
+					}
+
+					template<> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment(){
+						uint64_t condition, nNewValue;
+						do{
+							condition = mValue;
+							nNewValue = condition + 1;
+						} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+						return nNewValue;
+					}
+
+					template<> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement(){
+						int64_t condition, nNewValue;
+						do{
+							condition = mValue;
+							nNewValue = condition - 1;
+						} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+						return nNewValue;
+					}
+
+					template<> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement(){
+						uint64_t condition, nNewValue;
+						do{
+							condition = mValue;
+							nNewValue = condition - 1;
+						} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+						return nNewValue;
+					}
+
+					template<> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n){
+						int64_t condition, nNewValue;
+						do{
+							condition = mValue;
+							nNewValue = condition + n;
+						} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+						return nNewValue;
+					}
+
+					template<> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n){
+						uint64_t condition, nNewValue;
+						do{
+							condition = mValue;
+							nNewValue = condition + n;
+						} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+						return nNewValue;
+					}
+
+				#endif
+
+			#elif defined(EA_COMPILER_INTEL) || defined(EA_COMPILER_MSVC) || defined(EA_COMPILER_BORLAND)
+
+				// This is won't compile when ValueType is 64 bits.
+
+				template<class T> inline 
+				typename AtomicInt<T>::ValueType AtomicInt<T>::SetValue(ValueType n)
+				{
+					__asm{
+						mov  ecx, this                 // mValue is expected to be at offset zero of this.
+						mov  eax, n 
+						xchg eax, dword ptr [ecx]      // The xchg instruction does an implicit lock instruction.
+					}
+				}
+
+				template<class T> inline 
+				bool AtomicInt<T>::SetValueConditional(ValueType n, ValueType condition)
+				{
+					__asm{
+						mov  ecx, this                       // mValue is expected to be at offset zero of this.
+						mov  edx, n 
+						mov  eax, condition
+						lock cmpxchg dword ptr [ecx], edx    // Compares mValue to condition. If equal, z flag is set and n is copied into mValue.
+						jz    condition_met
+						xor  eax, eax
+						jmp  end
+						condition_met:
+						mov  eax, 1
+						end:
+					}
+				}
+
+				template<class T>  inline 
+				bool typename AtomicInt<T>::ValueType AtomicInt<T>::Increment()
+				{
+					__asm{
+						mov  ecx, this                 // mValue is expected to be at offset zero of this.
+						mov  eax, 1 
+						lock xadd dword ptr [ecx], eax // Sum goes into [ecx], old mValue goes into eax.
+						inc  eax                       // Increment eax because the return value is the new value.
+					}
+				}
+
+				template<class T>  inline 
+				bool typename AtomicInt<T>::ValueType AtomicInt<T>::Decrement()
+				{
+					__asm{
+						mov  ecx, this                 // mValue is expected to be at offset zero of this.
+						mov  eax, 0xffffffff
+						lock xadd dword ptr [ecx], eax // Sum goes into [ecx], old mValue goes into eax.
+						dec  eax                       // Increment eax because the return value is the new value.
+					}
+				}
+
+				template<class T>  inline 
+				bool typename AtomicInt<T>::ValueType AtomicInt<T>::Add(ValueType n)
+				{
+					__asm{
+						mov  ecx, this                 // mValue is expected to be at offset zero of this.
+						mov  eax, n 
+						lock xadd dword ptr [ecx], eax // Sum goes into [ecx], old mValue goes into eax.
+						add  eax, n
+					}
+				}
+
+
+			#else
+				// Compiler not currently supported.
+
+			#endif
+
+		} // namespace Thread
+
+	} // namespace EA
+
+
+#endif // EA_PROCESSOR_X86
+
+
+#ifdef _MSC_VER
+	 #pragma warning(pop)
+#endif
+
+
+#endif // EATHREAD_X86_EATHREAD_ATOMIC_X86_H
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,89 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Functionality related to memory and code generation synchronization.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_X86_EATHREAD_SYNC_X86_H
+#define EATHREAD_X86_EATHREAD_SYNC_X86_H
+
+
+#ifndef INCLUDED_eabase_H
+	#include <EABase/eabase.h>
+#endif
+
+
+#if defined(EA_PROCESSOR_X86)
+	#define EA_THREAD_SYNC_IMPLEMENTED
+
+	// By default, we define EA_TARGET_SMP to be true. The reason for this is that most 
+	// applications that users of this code are likely to write are going to be executables
+	// which run properly on any system, be it multiprocessing or not.
+	#ifndef EA_TARGET_SMP
+		#define EA_TARGET_SMP 1
+	#endif
+
+	// EAProcessorPause
+	// Intel has defined a 'pause' instruction for x86 processors starting with the P4, though this simply
+	// maps to the otherwise undocumented 'rep nop' instruction. This pause instruction is important for 
+	// high performance spinning, as otherwise a high performance penalty incurs. 
+
+	#if defined(EA_COMPILER_MSVC) || defined(EA_COMPILER_INTEL) || defined(EA_COMPILER_BORLAND)
+		// Year 2003+ versions of the Microsoft SDK define 'rep nop' as YieldProcessor and/or __yield or _mm_pause. 
+		#define EAProcessorPause() __asm { rep nop } 
+	#elif defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
+		#define EAProcessorPause() __asm__ __volatile__ ("rep ; nop")
+	#else
+		// In this case we use an Intel-style asm statement. If this doesn't work for your compiler then 
+		// there most likely is some way to make the `rep nop` inline asm statement. 
+		#define EAProcessorPause() __asm { rep nop } // Alternatively: { __asm { _emit 0xf3 }; __asm { _emit 0x90 } }
+	#endif
+
+
+	// EAReadBarrier / EAWriteBarrier / EAReadWriteBarrier
+	// The x86 processor memory architecture ensures read and write consistency on both single and
+	// multi processing systems. This makes programming simpler but limits maximimum system performance.
+	// We define EAReadBarrier here to be the same as EACompilerMemory barrier in order to limit the 
+	// compiler from making any assumptions at its level about memory usage. Year 2003+ versions of the 
+	// Microsoft SDK define a 'MemoryBarrier' statement which has the same effect as EAReadWriteBarrier.
+	#if defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 401) // GCC 4.1 or later
+		#define EAReadBarrier      __sync_synchronize
+		#define EAWriteBarrier     __sync_synchronize
+		#define EAReadWriteBarrier __sync_synchronize
+	#else
+		#define EAReadBarrier      EACompilerMemoryBarrier
+		#define EAWriteBarrier     EACompilerMemoryBarrier
+		#define EAReadWriteBarrier EACompilerMemoryBarrier
+	#endif
+
+	// EACompilerMemoryBarrier
+	#if (defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1300) && defined(EA_PLATFORM_MICROSOFT)) || (defined(EA_COMPILER_INTEL) && (EA_COMPILER_VERSION >= 9999999)) // VC7+ or Intel (unknown version at this time)
+		extern "C" void _ReadWriteBarrier();
+		#pragma intrinsic(_ReadWriteBarrier)
+		#define EACompilerMemoryBarrier() _ReadWriteBarrier()
+	#elif defined(EA_COMPILER_GNUC)
+		#define EACompilerMemoryBarrier() __asm__ __volatile__ ("":::"memory")
+	#else
+		#define EACompilerMemoryBarrier() // Possibly `EAT_ASSERT(false)` here?
+	#endif
+
+
+#endif // EA_PROCESSOR_X86
+
+
+#endif // EATHREAD_X86_EATHREAD_SYNC_X86_H
+
+
+
+
+
+
+
+