This commit is contained in:
jeanlemotan
2024-07-02 18:10:39 +02:00
commit 48ab06b1d9
733 changed files with 321088 additions and 0 deletions
@@ -0,0 +1,536 @@
///////////////////////////////////////////////////////////////////////////////
// Copyright (c) Electronic Arts Inc. All rights reserved.
///////////////////////////////////////////////////////////////////////////////
#include <EABase/eabase.h>
#include <eathread/eathread_callstack.h>
#include <eathread/eathread_callstack_context.h>
#include <eathread/eathread_storage.h>
#if defined(EA_PLATFORM_WIN32) && EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP) // The following only works on Win32 and not Win64.
#if defined(_MSC_VER)
#pragma warning(push, 0)
#endif
#include <Windows.h>
#include <DbgHelp.h>
#include <stdio.h>
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
#ifdef _MSC_VER
#pragma warning(disable: 4740) // flow in or out of inline asm code suppresses global optimization
#pragma comment(lib, "dbghelp.lib")
#pragma comment(lib, "psapi.lib")
#endif
typedef BOOL (__stdcall *SYMINITIALIZE)(HANDLE, LPSTR, BOOL);
typedef BOOL (__stdcall *SYMCLEANUP)(HANDLE);
typedef BOOL (__stdcall *STACKWALK)(DWORD, HANDLE, HANDLE, LPSTACKFRAME, LPVOID,PREAD_PROCESS_MEMORY_ROUTINE, PFUNCTION_TABLE_ACCESS_ROUTINE,PGET_MODULE_BASE_ROUTINE, PTRANSLATE_ADDRESS_ROUTINE);
typedef LPVOID (__stdcall *SYMFUNCTIONTABLEACCESS)(HANDLE, DWORD);
typedef DWORD (__stdcall *SYMGETMODULEBASE)(HANDLE, DWORD);
typedef BOOL (__stdcall *SYMGETSYMFROMADDR)(HANDLE, DWORD, PDWORD, PIMAGEHLP_SYMBOL);
typedef BOOL (__stdcall *SYMGETLINEFROMADDR)(HANDLE, DWORD, PDWORD, PIMAGEHLP_LINE);
namespace // We construct an anonymous namespace because doing so keeps the definitions within it local to this module.
{
struct Win32DbgHelp
{
HMODULE mhDbgHelp;
bool mbSymInitialized;
SYMINITIALIZE mpSymInitialize;
SYMCLEANUP mpSymCleanup;
STACKWALK mpStackWalk;
SYMFUNCTIONTABLEACCESS mpSymFunctionTableAccess;
SYMGETMODULEBASE mpSymGetModuleBase;
SYMGETSYMFROMADDR mpSymGetSymFromAddr;
SYMGETLINEFROMADDR mpSymGetLineFromAddr;
Win32DbgHelp() : mhDbgHelp(0), mbSymInitialized(false), mpSymInitialize(0),
mpSymCleanup(0), mpStackWalk(0), mpSymFunctionTableAccess(0),
mpSymGetModuleBase(0), mpSymGetSymFromAddr(0), mpSymGetLineFromAddr(0)
{
// Empty. The initialization is done externally, due to tricky startup/shutdown ordering issues.
}
~Win32DbgHelp()
{
// Empty. The shutdown is done externally, due to tricky startup/shutdown ordering issues.
}
void Init()
{
if(!mhDbgHelp)
{
mhDbgHelp = ::LoadLibraryA("DbgHelp.dll");
if(mhDbgHelp)
{
mpSymInitialize = (SYMINITIALIZE)(uintptr_t) ::GetProcAddress(mhDbgHelp, "SymInitialize");
mpSymCleanup = (SYMCLEANUP)(uintptr_t) ::GetProcAddress(mhDbgHelp, "SymCleanup");
mpStackWalk = (STACKWALK)(uintptr_t) ::GetProcAddress(mhDbgHelp, "StackWalk");
mpSymFunctionTableAccess = (SYMFUNCTIONTABLEACCESS)(uintptr_t)::GetProcAddress(mhDbgHelp, "SymFunctionTableAccess");
mpSymGetModuleBase = (SYMGETMODULEBASE)(uintptr_t) ::GetProcAddress(mhDbgHelp, "SymGetModuleBase");
mpSymGetSymFromAddr = (SYMGETSYMFROMADDR)(uintptr_t) ::GetProcAddress(mhDbgHelp, "SymGetSymFromAddr");
mpSymGetLineFromAddr = (SYMGETLINEFROMADDR)(uintptr_t) ::GetProcAddress(mhDbgHelp, "SymGetLineFromAddr");
}
}
}
void Shutdown()
{
if(mhDbgHelp)
{
if(mbSymInitialized && mpSymCleanup)
mpSymCleanup(::GetCurrentProcess());
::FreeLibrary(mhDbgHelp);
}
}
};
static int sInitCount = 0;
static Win32DbgHelp sWin32DbgHelp;
}
namespace EA
{
namespace Thread
{
/* To consider: Enable usage of this below.
///////////////////////////////////////////////////////////////////////////////
// IsAddressReadable
//
static bool IsAddressReadable(const void* pAddress)
{
bool bPageReadable;
MEMORY_BASIC_INFORMATION mbi;
if(VirtualQuery(pAddress, &mbi, sizeof(mbi)))
{
const DWORD flags = (PAGE_EXECUTE_READ | PAGE_EXECUTE_READWRITE | PAGE_READONLY | PAGE_READWRITE);
bPageReadable = (mbi.State == MEM_COMMIT) && ((mbi.Protect & flags) != 0);
}
else
bPageReadable = false;
return bPageReadable;
}
*/
///////////////////////////////////////////////////////////////////////////////
// InitCallstack
//
EATHREADLIB_API void InitCallstack()
{
if(++sInitCount == 1)
sWin32DbgHelp.Init();
}
///////////////////////////////////////////////////////////////////////////////
// ShutdownCallstack
//
EATHREADLIB_API void ShutdownCallstack()
{
if(--sInitCount == 0)
sWin32DbgHelp.Shutdown();
}
///////////////////////////////////////////////////////////////////////////////
// GetCallstack
//
EATHREADLIB_API size_t GetCallstack(void* pReturnAddressArray[], size_t nReturnAddressArrayCapacity, const CallstackContext* pContext)
{
size_t nEntryIndex(0);
if(!sWin32DbgHelp.mhDbgHelp)
sWin32DbgHelp.Init();
if(sWin32DbgHelp.mpStackWalk)
{
CONTEXT context;
memset(&context, 0, sizeof(context));
context.ContextFlags = CONTEXT_CONTROL;
if(pContext)
{
context.Eip = pContext->mEIP;
context.Esp = pContext->mESP;
context.Ebp = pContext->mEBP;
}
else
{
// RtlCaptureStackBackTrace can only generate stack traces on Win32 when the stack frame contains frame
// pointers. This only a limitation on 32-bit Windows and is controlled by the following compilers switches.
//
// /Oy : removes frame-pointers
// /Oy- : emits frame-pointers
//
// The language is wierd here because Microsoft refers it as enabling/disabling an performance optimization.
// https://docs.microsoft.com/en-us/cpp/build/reference/oy-frame-pointer-omission?view=vs-2017
//
// EATHREAD_WIN32_FRAME_POINTER_OPTIMIZATION_DISABLED is enabled/disabled based on if the user has requested eaconfig to disable
// frame-pointer optimizations (enable frame-pointers). See property: 'eaconfig.disable_framepointer_optimization'.
//
#ifdef EATHREAD_WIN32_FRAME_POINTER_OPTIMIZATION_DISABLED
return RtlCaptureStackBackTrace(1, (ULONG)nReturnAddressArrayCapacity, pReturnAddressArray, NULL);
#else
// With VC++, EIP is not accessible directly, but we can use an assembly trick to get it.
// VC++ and Intel C++ compile this fine, but Metrowerks 7 has a bug and fails.
__asm{
mov context.Ebp, EBP
mov context.Esp, ESP
call GetEIP
GetEIP:
pop context.Eip
}
#endif
}
// Initialize the STACKFRAME structure for the first call. This is only
// necessary for Intel CPUs, and isn't mentioned in the documentation.
STACKFRAME sf;
memset(&sf, 0, sizeof(sf));
sf.AddrPC.Offset = context.Eip;
sf.AddrPC.Mode = AddrModeFlat;
sf.AddrStack.Offset = context.Esp;
sf.AddrStack.Mode = AddrModeFlat;
sf.AddrFrame.Offset = context.Ebp;
sf.AddrFrame.Mode = AddrModeFlat;
const HANDLE hCurrentProcess = ::GetCurrentProcess();
const HANDLE hCurrentThread = ::GetCurrentThread();
// To consider: We have had some other code which can read the stack with better success
// than the DbgHelp stack walk function that we use here. In particular, the DbgHelp
// stack walking function doesn't do well unless x86 stack frames are used.
for(int nStackIndex = 0; nEntryIndex < (nReturnAddressArrayCapacity - 1); ++nStackIndex)
{
if(!sWin32DbgHelp.mpStackWalk(IMAGE_FILE_MACHINE_I386, hCurrentProcess, hCurrentThread,
&sf, &context, NULL, sWin32DbgHelp.mpSymFunctionTableAccess,
sWin32DbgHelp.mpSymGetModuleBase, NULL))
{
break;
}
if(sf.AddrFrame.Offset == 0) // Basic sanity check to make sure the frame is OK. Bail if not.
break;
// If using the current execution context, then we ignore the first
// one because it is the one that is our stack walk function itself.
if(pContext || (nStackIndex > 0))
pReturnAddressArray[nEntryIndex++] = ((void*)(uintptr_t)sf.AddrPC.Offset);
}
}
pReturnAddressArray[nEntryIndex] = 0;
return nEntryIndex;
}
///////////////////////////////////////////////////////////////////////////////
// GetCallstackContext
//
EATHREADLIB_API void GetCallstackContext(CallstackContext& context, const Context* pContext)
{
#if defined(EA_PLATFORM_WIN32)
EAT_COMPILETIME_ASSERT(offsetof(EA::Thread::Context, Eip) == offsetof(CONTEXT, Eip));
EAT_COMPILETIME_ASSERT(offsetof(EA::Thread::Context, SegSs) == offsetof(CONTEXT, SegSs));
#endif
context.mEIP = pContext->Eip;
context.mESP = pContext->Esp;
context.mEBP = pContext->Ebp;
}
///////////////////////////////////////////////////////////////////////////////
// GetModuleFromAddress
//
EATHREADLIB_API size_t GetModuleFromAddress(const void* address, char* pModuleName, size_t moduleNameCapacity)
{
MEMORY_BASIC_INFORMATION mbi;
if(VirtualQuery(address, &mbi, sizeof(mbi)))
{
HMODULE hModule = (HMODULE)mbi.AllocationBase;
if(hModule)
return GetModuleFileNameA(hModule, pModuleName, (DWORD)moduleNameCapacity);
}
pModuleName[0] = 0;
return 0;
}
///////////////////////////////////////////////////////////////////////////////
// GetModuleHandleFromAddress
//
EATHREADLIB_API ModuleHandle GetModuleHandleFromAddress(const void* pAddress)
{
MEMORY_BASIC_INFORMATION mbi;
if(VirtualQuery(pAddress, &mbi, sizeof(mbi)))
return (ModuleHandle)mbi.AllocationBase;
return 0;
}
///////////////////////////////////////////////////////////////////////////////
// GetThreadIdFromThreadHandle
//
// This implementation is the same as the one in EAThread.
//
EATHREADLIB_API uint32_t GetThreadIdFromThreadHandle(intptr_t threadId)
{
struct THREAD_BASIC_INFORMATION_WIN32
{
BOOL ExitStatus;
PVOID TebBaseAddress;
DWORD UniqueProcessId;
DWORD UniqueThreadId;
DWORD AffinityMask;
DWORD Priority;
DWORD BasePriority;
};
static HMODULE hKernel32 = NULL;
if(!hKernel32)
hKernel32 = LoadLibraryA("kernel32.dll");
if(hKernel32)
{
typedef DWORD (WINAPI *GetThreadIdFunc)(HANDLE);
static GetThreadIdFunc pGetThreadIdFunc = NULL;
if(!pGetThreadIdFunc)
pGetThreadIdFunc = (GetThreadIdFunc)(uintptr_t)GetProcAddress(hKernel32, "GetThreadId");
if(pGetThreadIdFunc)
return pGetThreadIdFunc((HANDLE)threadId);
}
static HMODULE hNTDLL = NULL;
if(!hNTDLL)
hNTDLL = LoadLibraryA("ntdll.dll");
if(hNTDLL)
{
typedef LONG (WINAPI *NtQueryInformationThreadFunc)(HANDLE, int, PVOID, ULONG, PULONG);
static NtQueryInformationThreadFunc pNtQueryInformationThread = NULL;
if(!pNtQueryInformationThread)
pNtQueryInformationThread = (NtQueryInformationThreadFunc)(uintptr_t)GetProcAddress(hNTDLL, "NtQueryInformationThread");
if(pNtQueryInformationThread)
{
THREAD_BASIC_INFORMATION_WIN32 tbi;
if(pNtQueryInformationThread((HANDLE)threadId, 0, &tbi, sizeof(tbi), NULL) == 0)
return tbi.UniqueThreadId;
}
}
return 0;
}
///////////////////////////////////////////////////////////////////////////////
// GetCallstackContext
//
// The threadId is the same thing as the Windows' HANDLE GetCurrentThread() function
// and not the same thing as Windows' GetCurrentThreadId function. See the
// GetCallstackContextSysThreadId for the latter.
//
EATHREADLIB_API bool GetCallstackContext(CallstackContext& context, intptr_t threadId)
{
if((threadId == (intptr_t)kThreadIdInvalid) || (threadId == (intptr_t)kThreadIdCurrent))
threadId = (intptr_t)::GetCurrentThread(); // GetCurrentThread returns a thread 'pseudohandle' and not a real thread handle.
const DWORD sysThreadId = EA::Thread::GetThreadIdFromThreadHandle(threadId);
const DWORD sysThreadIdCurrent = ::GetCurrentThreadId();
CONTEXT win32CONTEXT;
NT_TIB* pTib;
if(sysThreadIdCurrent == sysThreadId)
{
// With VC++, EIP is not accessible directly, but we can use an assembly trick to get it.
// VC++ and Intel C++ compile this fine, but Metrowerks 7 has a bug and fails.
__asm{
mov win32CONTEXT.Ebp, EBP
mov win32CONTEXT.Esp, ESP
call GetEIP
GetEIP:
pop win32CONTEXT.Eip
}
// Offset 0x18 from the FS segment register gives a pointer to
// the thread information block for the current thread
__asm {
mov eax, fs:[18h]
mov pTib, eax
}
}
else
{
// In this case we are working with a separate thread, so we suspend it
// and read information about it and then resume it.
::SuspendThread((HANDLE)threadId);
win32CONTEXT.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS;
::GetThreadContext((HANDLE)threadId, &win32CONTEXT);
// TODO: This has not been tested!
pTib = *((NT_TIB**)(win32CONTEXT.SegFs * 16 + 18));
::ResumeThread((HANDLE)threadId);
}
context.mEBP = (uint32_t)win32CONTEXT.Ebp;
context.mESP = (uint32_t)win32CONTEXT.Esp;
context.mEIP = (uint32_t)win32CONTEXT.Eip;
context.mStackBase = (uintptr_t)pTib->StackBase;
context.mStackLimit = (uintptr_t)pTib->StackLimit;
context.mStackPointer = (uintptr_t)win32CONTEXT.Esp;
return true;
}
///////////////////////////////////////////////////////////////////////////////
// GetCallstackContextSysThreadId
//
// A sysThreadId is a Microsoft DWORD thread id, which can be obtained from
// the currently running thread via GetCurrentThreadId. It can be obtained from
// a Microsoft thread HANDLE via EA::Thread::GetThreadIdFromThreadHandle();
// A DWORD thread id can be converted to a thread HANDLE via the Microsoft OpenThread
// system function.
//
EATHREADLIB_API bool GetCallstackContextSysThreadId(CallstackContext& context, intptr_t sysThreadId)
{
bool bReturnValue = true;
const DWORD sysThreadIdCurrent = ::GetCurrentThreadId();
CONTEXT win32CONTEXT;
if(sysThreadIdCurrent == (DWORD)sysThreadId)
{
// With VC++, EIP is not accessible directly, but we can use an assembly trick to get it.
// VC++ and Intel C++ compile this fine, but Metrowerks 7 has a bug and fails.
__asm{
mov win32CONTEXT.Ebp, EBP
mov win32CONTEXT.Esp, ESP
call GetEIP
GetEIP:
pop win32CONTEXT.Eip
}
}
else
{
// In this case we are working with a separate thread, so we suspend it
// and read information about it and then resume it.
HANDLE threadId = ::OpenThread(THREAD_SUSPEND_RESUME | THREAD_GET_CONTEXT, TRUE, (DWORD)sysThreadId);
if(threadId)
{
::SuspendThread(threadId);
win32CONTEXT.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
::GetThreadContext(threadId, &win32CONTEXT);
::ResumeThread(threadId);
::CloseHandle(threadId);
}
else
{
memset(&win32CONTEXT, 0, sizeof(win32CONTEXT));
bReturnValue = false;
}
}
context.mEBP = (uint32_t)win32CONTEXT.Ebp;
context.mESP = (uint32_t)win32CONTEXT.Esp;
context.mEIP = (uint32_t)win32CONTEXT.Eip;
//context.mStackBase = (uintptr_t)pTib->StackBase; // To do. (Whoever added mStackBase to CallstackContext forgot to add this code)
//context.mStackLimit = (uintptr_t)pTib->StackLimit;
//context.mStackPointer = (uintptr_t)win32CONTEXT.Esp;
return bReturnValue;
}
///////////////////////////////////////////////////////////////////////////////
// SetStackBase
//
EATHREADLIB_API void SetStackBase(void* /*pStackBase*/)
{
// Nothing to do, as GetStackBase always works on its own.
}
///////////////////////////////////////////////////////////////////////////////
// GetStackBase
//
EATHREADLIB_API void* GetStackBase()
{
CallstackContext context;
GetCallstackContext(context, 0);
return (void*)context.mStackBase;
}
///////////////////////////////////////////////////////////////////////////////
// GetStackLimit
//
EATHREADLIB_API void* GetStackLimit()
{
CallstackContext context;
GetCallstackContext(context, 0);
return (void*)context.mStackLimit;
// Alternative which returns a slightly different answer:
// We return our stack pointer, which is a good approximation of the stack limit of the caller.
// void* pStack = NULL;
// __asm { mov pStack, ESP};
// return pStack;
}
} // namespace Thread
} // namespace EA
#else // Stub out function for WinRT / Windows Phone 8
namespace EA
{
namespace Thread
{
EATHREADLIB_API size_t GetCallstack(void* pReturnAddressArray[], size_t nReturnAddressArrayCapacity, const CallstackContext* pContext)
{
EA_UNUSED(pContext);
EA_UNUSED(pReturnAddressArray);
EA_UNUSED(nReturnAddressArrayCapacity);
return 0;
}
} // namespace Thread
} // namespace EA
#endif // defined(EA_PLATFORM_WIN32)
@@ -0,0 +1,622 @@
///////////////////////////////////////////////////////////////////////////////
// Copyright (c) Electronic Arts Inc. All rights reserved.
///////////////////////////////////////////////////////////////////////////////
#include <eathread/eathread_callstack.h>
#include <eathread/eathread_callstack_context.h>
#include <stdio.h>
#include <string.h>
#include <eathread/eathread_storage.h>
#if defined(_WIN32_WINNT) && (_WIN32_WINNT < 0x0500)
#undef _WIN32_WINNT
#define _WIN32_WINNT 0x0500
#endif
#ifdef _MSC_VER
#pragma warning(push, 0)
#include <Windows.h>
#include <math.h> // VS2008 has an acknowledged bug that requires math.h (and possibly also string.h) to be #included before intrin.h.
#include <intrin.h>
#pragma intrinsic(_ReturnAddress)
#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
#include <winternl.h>
#else
// Temporary while waiting for formal support:
extern "C" NTSYSAPI PEXCEPTION_ROUTINE NTAPI RtlVirtualUnwind(DWORD, DWORD64, DWORD64, PRUNTIME_FUNCTION, PCONTEXT, PVOID*, PDWORD64, PKNONVOLATILE_CONTEXT_POINTERS);
extern "C" WINBASEAPI DWORD WINAPI GetModuleFileNameA(HMODULE, LPSTR, DWORD);
#endif
#pragma warning(pop)
#else
#include <Windows.h>
#include <winternl.h>
#endif
// Disable optimization of this code under VC++ for x64.
// This is due to some as-yet undetermined crash that happens
// when compiler optimizations are enabled for this code.
// This function is not performance-sensitive and so disabling
// optimizations shouldn't matter.
#if defined(_MSC_VER) && (defined(_M_AMD64) || defined(_WIN64))
#pragma optimize("", off)
#endif
///////////////////////////////////////////////////////////////////////////////
// Stuff that is supposed to be in windows.h and/or winternl.h but isn't
// consistently present in all versions.
//
#ifndef UNW_FLAG_NHANDLER
#define UNW_FLAG_NHANDLER 0
#endif
#ifndef UNWIND_HISTORY_TABLE_SIZE
extern "C"
{
#define UNWIND_HISTORY_TABLE_SIZE 12
#define UNWIND_HISTORY_TABLE_NONE 0
#define UNWIND_HISTORY_TABLE_GLOBAL 1
#define UNWIND_HISTORY_TABLE_LOCAL 2
typedef struct _UNWIND_HISTORY_TABLE_ENTRY
{
ULONG64 ImageBase;
PRUNTIME_FUNCTION FunctionEntry;
} UNWIND_HISTORY_TABLE_ENTRY, *PUNWIND_HISTORY_TABLE_ENTRY;
typedef struct _UNWIND_HISTORY_TABLE
{
ULONG Count;
UCHAR Search;
ULONG64 LowAddress;
ULONG64 HighAddress;
UNWIND_HISTORY_TABLE_ENTRY Entry[UNWIND_HISTORY_TABLE_SIZE];
} UNWIND_HISTORY_TABLE, *PUNWIND_HISTORY_TABLE;
PVOID WINAPI RtlLookupFunctionEntry(ULONG64 ControlPC, PULONG64 ImageBase, PUNWIND_HISTORY_TABLE HistoryTable);
#if !defined(_MSC_VER) || (_MSC_VER < 1500) // if earlier than VS2008...
typedef struct _KNONVOLATILE_CONTEXT_POINTERS
{
PULONGLONG dummy;
} KNONVOLATILE_CONTEXT_POINTERS, *PKNONVOLATILE_CONTEXT_POINTERS;
typedef struct _FRAME_POINTERS
{
ULONGLONG MemoryStackFp;
ULONGLONG BackingStoreFp;
} FRAME_POINTERS, *PFRAME_POINTERS;
ULONGLONG WINAPI RtlVirtualUnwind(ULONG HandlerType, ULONGLONG ImageBase, ULONGLONG ControlPC,
PRUNTIME_FUNCTION FunctionEntry, PCONTEXT ContextRecord, PBOOLEAN InFunction,
PFRAME_POINTERS EstablisherFrame, PKNONVOLATILE_CONTEXT_POINTERS ContextPointers);
#endif
}
#endif
extern "C" WINBASEAPI DWORD WINAPI GetThreadId(_In_ HANDLE hThread);
///////////////////////////////////////////////////////////////////////////////
namespace EA
{
namespace Thread
{
/* To consider: Enable usage of this below.
///////////////////////////////////////////////////////////////////////////////
// IsAddressReadable
//
static bool IsAddressReadable(const void* pAddress)
{
bool bPageReadable;
MEMORY_BASIC_INFORMATION mbi;
if(VirtualQuery(pAddress, &mbi, sizeof(mbi)))
{
const DWORD flags = (PAGE_EXECUTE_READ | PAGE_EXECUTE_READWRITE | PAGE_READONLY | PAGE_READWRITE);
bPageReadable = (mbi.State == MEM_COMMIT) && ((mbi.Protect & flags) != 0);
}
else
bPageReadable = false;
return bPageReadable;
}
*/
#if !EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
// GetRSP
//
// Returns the RSP of the caller.
//
// We could also solve this with the following asm function.
// .CODE
// GetRSP PROC
// mov rax, rsp
// add rax, 8
// ret
// GetRSP ENDP
// END
//
static EA_NO_INLINE void* GetRSP()
{
#if defined(_MSC_VER)
uintptr_t ara = (uintptr_t)_AddressOfReturnAddress();
#else
uintptr_t ara = (uintptr_t)__builtin_frame_address();
#endif
return (void*)(ara + 8);
}
#endif
///////////////////////////////////////////////////////////////////////////////
// GetInstructionPointer
//
EATHREADLIB_API EA_NO_INLINE void GetInstructionPointer(void*& pInstruction)
{
#if defined(_MSC_VER)
pInstruction = _ReturnAddress();
#elif defined(__GNUC__) || defined(EA_COMPILER_CLANG)
pInstruction = __builtin_return_address(0);
#else
void* pReturnAddressArray[2] = { 0, 0 };
GetCallstack(pReturnAddressArray, 2, NULL);
pInstruction = pReturnAddressArray[1]; // This is the address of the caller.
#endif
}
///////////////////////////////////////////////////////////////////////////////
// InitCallstack
//
EATHREADLIB_API void InitCallstack()
{
// Nothing needed.
}
///////////////////////////////////////////////////////////////////////////////
// ShutdownCallstack
//
EATHREADLIB_API void ShutdownCallstack()
{
// Nothing needed.
}
///////////////////////////////////////////////////////////////////////////////
// GetCallstack
//
// With the x64 (a.k.a. x86-64) platform, the CPU supports call stack tracing
// natively, by design. This is as opposed to the x86 platform, in which call
// stack tracing (a.k.a. unwinding) is a crap-shoot. The Win64 OS provides
// two functions in particular that take care of the primary work of stack
// tracing: RtlLookupFunctionEntry and RtlVirtualUnwind/RtlUnwindEx.
//
// On x64 each non-leaf function must have an info struct (unwind metadata) in
// static memory associated with it. That info struct describes the prologue and
// epilogue of the function in such a way as tell identify where its return address
// is stored and how to restore non-volatile registers of the caller so that
// an unwind can happen during an exception and C++ object destructors can
// be called, etc. In order to implement a stack unwinding function for
// Microsoft x64, you can go the old x86 route of requiring the compiler to
// emit stack frames and reading the stack frame values. But that would work
// only where the frames were enabled (maybe just debug builds) and wouldn't
// work with third party code that didn't use the frames. But the Microsoft
// x64 ABI -requires- that all non-leaf functions have the info struct
// described above. And Microsoft provides the Rtl functions mentioned above
// to read the info struct (RtlLookupFunctionEntry) and use it to unwind a
// frame (RtlVirtualUnwind/RtlUnwindEx), whether you are in an exception or not.
//
// RtlVirtualUnwind implements a virtual (pretend) unwind of a stack and is
// useful for reading a call stack and its unwind info without necessarily
// executing an unwind (like in an exception handler). RtlVirtualUnwind provides
// the infrastructure upon which higher level exception and unwind handling
// support is implemented. It doesn't exist on x86, as x86 exception unwinding
// is entirely done by generated C++ code and isn't in the ABI. The Virtual in
// RtlVirtualUnwind has nothing to do with virtual memory, virtual functions,
// or virtual disks.
//
// RtlUnwindEx (replaces RtlUnwind) implements an actual unwind and thus is
// mostly useful only in the implementation of an exception handler and not
// for doing an ad-hoc stack trace.
//
// You can't use RtlLookupFunctionEntry on the IP (instruction pointer) of a
// leaf function, as the compiler isn't guaranteed to generate this info for
// such functions. But if a leaf function calls RtlLookupFunctionEntry on its
// own IP then it's no longer a leaf function and by virtue of calling RtlLookupFunctionEntry
// the info will necessarily be generated by the compiler. If you want to read
// the info associated with an IP of another function which may be a leaf
// function, it's best to read the return address of that associated with that
// function's callstack context, which is that that function's rsp register's
// value as a uintptr_t* dereferenced (i.e. rsp holds the address of the
// return address).
//
// UNWIND_HISTORY_TABLE "is used as a cache to speed up repeated exception handling lookups,
// and is typically optional as far as usage with RtlUnwindEx goes though certainly
// recommended from a performance perspective." This may be useful to us, though we'd need
// to make it a thread-safe static variable or similar and not a local variable.
// History table declaration and preparation for use, which needs to be done per-thread:
// UNWIND_HISTORY_TABLE unwindHistoryTable;
// RtlZeroMemory(&unwindHistoryTable, sizeof(UNWIND_HISTORY_TABLE));
// unwindHistoryTable.Unwind = TRUE;
// To do: Implement usage of the history table for faster callstack tracing.
//
// Reading for anybody wanting to understand this:
// http://www.nynaeve.net/?p=105
// http://www.nynaeve.net/?p=106
// http://blogs.msdn.com/b/freik/archive/2005/03/17/398200.aspx
// http://www.codemachine.com/article_x64deepdive.html
// http://blogs.msdn.com/b/ntdebugging/archive/2010/05/12/x64-manual-stack-reconstruction-and-stack-walking.aspx
// http://eli.thegreenplace.net/2011/09/06/stack-frame-layout-on-x86-64/
//
EATHREADLIB_API size_t GetCallstack(void* pReturnAddressArray[], size_t nReturnAddressArrayCapacity, const CallstackContext* pContext)
{
CONTEXT context;
PRUNTIME_FUNCTION pRuntimeFunction;
ULONG64 nImageBase = 0;
ULONG64 nPrevImageBase = 0;
size_t nFrameIndex = 0;
if(pContext)
{
RtlZeroMemory(&context, sizeof(context));
context.Rip = pContext->mRIP;
context.Rsp = pContext->mRSP;
context.Rbp = pContext->mRBP;
context.ContextFlags = CONTEXT_CONTROL; // CONTEXT_CONTROL actually specifies SegSs, Rsp, SegCs, Rip, and EFlags. But for callstack tracing and unwinding, all that matters is Rip and Rsp.
// In the case where we are calling 0, we might be able to unwind one frame and see if we are now in a valid stack frame for
// callstack generation. If not abort, otherwise we continue one frame past where the exception (calling 0) was performed
if (context.Rip == 0 && context.Rsp != 0)
{
context.Rip = (ULONG64)(*(PULONG64)context.Rsp); // To consider: Use IsAddressReadable(pFrame) before dereferencing this pointer.
context.Rsp += 8; // reset the stack pointer (+8 since we know there has been no prologue run requiring a larger number since RIP == 0)
}
if(context.Rip && (nFrameIndex < nReturnAddressArrayCapacity))
pReturnAddressArray[nFrameIndex++] = (void*)(uintptr_t)context.Rip;
}
else // Else we are reading the current thread's callstack.
{
// To consider: Don't call the RtlCaptureContext function for EA_WINAPI_PARTITION_DESKTOP and
// instead use the simpler version below it which writes Rip/Rsp/Rbp. RtlCaptureContext is much
// slower. We need to verify that the 'quality' and extent of returned callstacks is good for
// the simpler version before using it exclusively.
#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
// Apparently there is no need to memset the context struct.
context.ContextFlags = CONTEXT_ALL; // Actually we should need only CONTEXT_INTEGER, so let's test that next chance we get.
RtlCaptureContext(&context);
#elif defined(EA_PLATFORM_XBOXONE) // This probably isn't limited to just this platform, but until we can test any other platforms we'll leave it at just this.
return RtlCaptureStackBackTrace(1, (ULONG)nReturnAddressArrayCapacity, pReturnAddressArray, NULL);
#else
void* ip = NULL;
EAGetInstructionPointer(ip);
context.Rip = (uintptr_t)ip;
context.Rsp = (uintptr_t)GetRSP();
context.Rbp = 0; // RBP isn't actually needed for stack unwinding on x64, and don't typically need to use it in generated code, as the instruction set provides an efficient way to read/write via rsp offsets. Also, when frame pointers are omitted in the compiler settings then ebp won't be used.
context.ContextFlags = CONTEXT_CONTROL;
#endif
}
// The following loop intentionally skips the first call stack frame because
// that frame corresponds this function (GetCallstack).
while(context.Rip && (nFrameIndex < nReturnAddressArrayCapacity))
{
// Try to look up unwind metadata for the current function.
nPrevImageBase = nImageBase;
__try
{
pRuntimeFunction = (PRUNTIME_FUNCTION)RtlLookupFunctionEntry(context.Rip, &nImageBase, NULL /*&unwindHistoryTable*/);
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
// Something went wrong in RtlLookupFunctionEntry, and it is unknown
// if it is recoverable; so just get out.
return nFrameIndex;
}
if(pRuntimeFunction)
{
// RtlVirtualUnwind is not declared in the SDK headers for non-desktop apps,
// but for 64 bit targets it's always present and appears to be needed by the
// existing RtlUnwindEx function. If in the end we can't use RtlVirtualUnwind
// and Microsoft doesn't provide an alternative, we can implement RtlVirtualUnwind
// ourselves manually (not trivial, but has the best results) or we can use
// the old style stack frame following, which works only when stack frames are
// enabled in the build, which usually isn't so for optimized builds and for
// third party code.
__try // Under at least the XBox One platform, RtlVirtualUnwind can crash here. It may possibly be due to the context being incomplete.
{
VOID* handlerData = NULL;
ULONG64 establisherFramePointers[2] = { 0, 0 };
RtlVirtualUnwind(UNW_FLAG_NHANDLER, nImageBase, context.Rip, pRuntimeFunction, &context, &handlerData, establisherFramePointers, NULL);
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
context.Rip = NULL;
context.ContextFlags = 0;
}
}
else
{
// If we don't have a RUNTIME_FUNCTION, then we've encountered an error of some sort (mostly likely only for cases of corruption) or leaf function (which doesn't make sense, given that we are moving up in the call sequence). Adjust the stack appropriately.
context.Rip = (ULONG64)(*(PULONG64)context.Rsp); // To consider: Use IsAddressReadable(pFrame) before dereferencing this pointer.
context.Rsp += 8;
}
if(context.Rip)
{
if(nFrameIndex < nReturnAddressArrayCapacity)
pReturnAddressArray[nFrameIndex++] = (void*)(uintptr_t)context.Rip;
}
}
return nFrameIndex;
}
///////////////////////////////////////////////////////////////////////////////
// GetThreadIdFromThreadHandle
//
// This implementation is the same as the one in EAThread.
// Converts a thread HANDLE (threadId) to a thread id DWORD (sysThreadId).
// Recall that Windows has two independent thread identifier types.
//
EATHREADLIB_API uint32_t GetThreadIdFromThreadHandle(intptr_t threadId)
{
// Win64 has this function natively, unlike earlier versions of 32 bit Windows.
return (uint32_t)::GetThreadId((HANDLE)threadId);
}
///////////////////////////////////////////////////////////////////////////////
// GetCallstackContext
//
// The threadId is the same thing as the Windows' HANDLE GetCurrentThread() function
// and not the same thing as Windows' GetCurrentThreadId function. See the
// GetCallstackContextSysThreadId for the latter.
//
#if EA_USE_CPP11_CONCURRENCY
EATHREADLIB_API bool GetCallstackContext(CallstackContext& context, EA::Thread::ThreadId threadId)
{
// Retrieve the Windows thread identifier from the std::thread::id structure.
// This is unavoidable because GetCallstackContextSysThreadId compares the value of 'sysThreadId'
// against data from the Windows API function 'GetCurrentThreadId' which returns a Windows thread identifier.
// http://msdn.microsoft.com/en-us/library/windows/desktop/ms683183(v=vs.85).aspx
static_assert(sizeof(_Thrd_t) == sizeof(threadId), "We expect the 'std::thread::id' to have a single member of type '_Thrd_t'.");
_Thrd_t wThrd;
memcpy(&wThrd, &threadId, sizeof(wThrd)); // we use memcpy to avoid strict aliasing issues caused by casting to access internal members.
return GetCallstackContextSysThreadId(context, _Thr_val(wThrd));
}
#else
EATHREADLIB_API bool GetCallstackContext(CallstackContext& context, intptr_t threadId)
{
if((threadId == (intptr_t)kThreadIdInvalid) || (threadId == (intptr_t)kThreadIdCurrent))
threadId = (intptr_t)::GetCurrentThread(); // GetCurrentThread returns a thread 'pseudohandle' and not a real thread handle.
const DWORD sysThreadId = EA::Thread::GetThreadIdFromThreadHandle(threadId);
return GetCallstackContextSysThreadId(context, sysThreadId);
}
#endif
///////////////////////////////////////////////////////////////////////////////
// GetCallstackContextSysThreadId
//
// A sysThreadId is a Microsoft DWORD thread id, which can be obtained from
// the currently running thread via GetCurrentThreadId. It can be obtained from
// a Microsoft thread HANDLE via EA::Thread::GetThreadIdFromThreadHandle();
// A DWORD thread id can be converted to a thread HANDLE via the Microsoft OpenThread
// system function.
//
EA_DISABLE_VC_WARNING(4701) // potentially uninitialized local variable 'win64CONTEXT' used
EATHREADLIB_API bool GetCallstackContextSysThreadId(CallstackContext& context, intptr_t sysThreadId)
{
EAT_COMPILETIME_ASSERT(offsetof(EA::Thread::ContextX86_64, Rip) == offsetof(CONTEXT, Rip));
EAT_COMPILETIME_ASSERT(offsetof(EA::Thread::ContextX86_64, VectorRegister) == offsetof(CONTEXT, VectorRegister));
EAT_COMPILETIME_ASSERT(offsetof(EA::Thread::ContextX86_64, LastExceptionFromRip) == offsetof(CONTEXT, LastExceptionFromRip));
const DWORD sysThreadIdCurrent = GetCurrentThreadId();
CONTEXT win64CONTEXT;
if(sysThreadIdCurrent == (DWORD)sysThreadId) // If getting the context of the current thread...
{
#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
RtlCaptureContext(&win64CONTEXT); // This function has no return value.
#else
void* ip = NULL;
EAGetInstructionPointer(ip);
win64CONTEXT.Rip = (uintptr_t)ip;
win64CONTEXT.Rsp = (uintptr_t)GetRSP();
win64CONTEXT.Rbp = 0; // RBP isn't actually needed for stack unwinding on x64, and don't typically need to use it in generated code, as the instruction set provides an efficient way to read/write via rsp offsets. Also, when frame pointers are omitted in the compiler settings then ebp won't be used.
win64CONTEXT.ContextFlags = CONTEXT_CONTROL; // CONTEXT_CONTROL actually specifies SegSs, Rsp, SegCs, Rip, and EFlags. But for callstack tracing and unwinding, all that matters is Rip and Rsp.
#endif
}
else
{
#if !defined(EA_PLATFORM_WINDOWS) || EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
// In this case we are working with a separate thread, so we suspend it
// and read information about it and then resume it. We cannot use this
// technique to get the context of the current thread unless we do it by
// spawing a new thread which suspends this thread and calls GetThreadContext.
HANDLE threadId = OpenThread(THREAD_SUSPEND_RESUME | THREAD_GET_CONTEXT, TRUE, (DWORD)sysThreadId);
BOOL result = false;
EAT_ASSERT(threadId != 0); // If this fails then maybe there's a process security restriction we are running into.
if(threadId)
{
DWORD suspendResult = SuspendThread(threadId);
if(suspendResult != (DWORD)-1)
{
win64CONTEXT.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
result = GetThreadContext(threadId, &win64CONTEXT);
suspendResult = ResumeThread(threadId);
EAT_ASSERT(suspendResult != (DWORD)-1); EA_UNUSED(suspendResult);
}
CloseHandle(threadId);
}
if(!result)
{
win64CONTEXT.Rip = 0;
win64CONTEXT.Rsp = 0;
win64CONTEXT.Rbp = 0;
win64CONTEXT.ContextFlags = 0;
}
#endif
}
context.mRIP = win64CONTEXT.Rip;
context.mRSP = win64CONTEXT.Rsp;
context.mRBP = win64CONTEXT.Rbp;
return (context.mRIP != 0);
}
EA_RESTORE_VC_WARNING()
///////////////////////////////////////////////////////////////////////////////
// GetCallstackContext
//
void GetCallstackContext(CallstackContext& context, const Context* pContext)
{
context.mRIP = pContext->Rip;
context.mRSP = pContext->Rsp;
context.mRBP = pContext->Rbp;
}
///////////////////////////////////////////////////////////////////////////////
// GetModuleFromAddress
//
size_t GetModuleFromAddress(const void* address, char* pModuleName, size_t moduleNameCapacity)
{
MEMORY_BASIC_INFORMATION mbi;
if(VirtualQuery(address, &mbi, sizeof(mbi)))
{
HMODULE hModule = (HMODULE)mbi.AllocationBase;
if(hModule)
{
#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP) // GetModuleFileName is desktop API-only.
// As of the early Windows 8 SDKs, GetModuleFileName is not exposed to non-desktop
// apps, though it's apparently nevertheless present in the libraries.
return GetModuleFileNameA(hModule, pModuleName, (DWORD)moduleNameCapacity);
#else
// If it turns out in the end that we really can't do this, then for non-shipping builds
// we can likely implement a manual version of this via information found through the
// TEB structure for the process.
return GetModuleFileNameA(hModule, pModuleName, (DWORD)moduleNameCapacity);
#endif
}
}
pModuleName[0] = 0;
return 0;
}
///////////////////////////////////////////////////////////////////////////////
// GetModuleHandleFromAddress
//
// The input pAddress must be an address of code and not data or stack space.
//
EATHREADLIB_API ModuleHandle GetModuleHandleFromAddress(const void* pAddress)
{
MEMORY_BASIC_INFORMATION mbi;
if(VirtualQuery(pAddress, &mbi, sizeof(mbi)))
{
// In Microsoft platforms, the module handle is really just a pointer
// to the code for the module. It corresponds directly to the information
// in the map file, though the actual address may have been changed
// from the value in the map file on loading into memory.
return (ModuleHandle)mbi.AllocationBase;
}
return 0;
}
///////////////////////////////////////////////////////////////////////////////
// SetStackBase
//
EATHREADLIB_API void SetStackBase(void* /*pStackBase*/)
{
// Nothing to do, as GetStackBase always works on its own.
}
///////////////////////////////////////////////////////////////////////////////
// GetStackBase
//
EATHREADLIB_API void* GetStackBase()
{
NT_TIB64* pTIB = (NT_TIB64*)NtCurrentTeb(); // NtCurrentTeb is defined in <WinNT.h> as an inline call to __readgsqword
return (void*)pTIB->StackBase;
}
///////////////////////////////////////////////////////////////////////////////
// GetStackLimit
//
EATHREADLIB_API void* GetStackLimit()
{
NT_TIB64* pTIB = (NT_TIB64*)NtCurrentTeb(); // NtCurrentTeb is defined in <WinNT.h> as an inline call to __readgsqword
return (void*)pTIB->StackLimit;
// The following is an alternative implementation that returns the extent
// of the current stack usage as opposed to the stack limit as seen by the OS.
// This value will be a higher address than Tib.StackLimit (recall that the
// stack grows downward). It's debatable which of these two approaches is
// better, as one returns the thread's -usable- stack space while the
// other returns how much the thread is -currently- using. The determination
// of the usable stack space is complicated by the fact that Microsoft
// platforms auto-extend the stack if the process pushes beyond the current limit.
// In the end the Tib.StackLimit solution is actually the most portable across
// Microsoft OSs and compilers for those OSs (Microsoft or not).
// Alternative implementation:
// We return our stack pointer, which is a good approximation of the stack limit of the caller.
// void* rsp = GetRSP();
// return rsp;
}
} // namespace Thread
} // namespace EA
#if defined(_MSC_VER) && (defined(_M_AMD64) || defined(_WIN64))
#pragma optimize("", on) // See comments above regarding this optimization change.
#endif
@@ -0,0 +1,215 @@
///////////////////////////////////////////////////////////////////////////////
// Copyright (c) Electronic Arts Inc. All rights reserved.
///////////////////////////////////////////////////////////////////////////////
#include "EABase/eabase.h"
#include "eathread/eathread_mutex.h"
#include "eathread/eathread.h"
#if defined(EA_PLATFORM_MICROSOFT)
EA_DISABLE_ALL_VC_WARNINGS()
#include <Windows.h>
EA_RESTORE_ALL_VC_WARNINGS()
#endif
#ifdef CreateMutex
#undef CreateMutex // Windows #defines CreateMutex to CreateMutexA or CreateMutexW.
#endif
#if defined(EA_PLATFORM_MICROSOFT) && !EA_POSIX_THREADS_AVAILABLE
#if defined(EA_PLATFORM_WINDOWS)
extern "C" WINBASEAPI BOOL WINAPI TryEnterCriticalSection(_Inout_ LPCRITICAL_SECTION lpCriticalSection);
#endif
EAMutexData::EAMutexData()
: mnLockCount(0), mbIntraProcess(true)
{
#if EAT_ASSERT_ENABLED
mThreadId = EA::Thread::kThreadIdInvalid;
mSysThreadId = EA::Thread::kSysThreadIdInvalid;
#endif
::memset(&mData, 0, sizeof(mData));
}
EA::Thread::MutexParameters::MutexParameters(bool bIntraProcess, const char* pName)
: mbIntraProcess(bIntraProcess)
{
if(pName)
{
strncpy(mName, pName, sizeof(mName)-1);
mName[sizeof(mName)-1] = 0;
}
else
mName[0] = 0;
}
EA::Thread::Mutex::Mutex(const MutexParameters* pMutexParameters, bool bDefaultParameters)
{
if(!pMutexParameters && bDefaultParameters)
{
MutexParameters parameters;
Init(&parameters);
}
else
Init(pMutexParameters);
}
EA::Thread::Mutex::~Mutex()
{
EAT_ASSERT(mMutexData.mnLockCount == 0);
// Consider doing something to verify the mutex object has been initialized.
#if defined(EA_PLATFORM_WINDOWS)
if(mMutexData.mbIntraProcess)
DeleteCriticalSection((CRITICAL_SECTION*)mMutexData.mData);
else
CloseHandle(*(HANDLE*)mMutexData.mData);
#else
DeleteCriticalSection((CRITICAL_SECTION*)mMutexData.mData);
#endif
}
bool EA::Thread::Mutex::Init(const MutexParameters* pMutexParameters)
{
// Make sure that internal structure is big enough to hold critical section data.
// If this assert fires, please adjust MUTEX_PLATFORM_DATA_SIZE in eathread_mutex.h accordingly.
EAT_COMPILETIME_ASSERT(sizeof(CRITICAL_SECTION) <= (MUTEX_PLATFORM_DATA_SIZE / sizeof(uint64_t) * sizeof(uint64_t)));
EAT_COMPILETIME_ASSERT(sizeof(HANDLE) <= MUTEX_PLATFORM_DATA_SIZE);
if(pMutexParameters)
{
mMutexData.mnLockCount = 0;
#if defined(EA_PLATFORM_WINDOWS)
mMutexData.mbIntraProcess = pMutexParameters->mbIntraProcess;
if(mMutexData.mbIntraProcess)
{
// We use InitializeCriticalSectionAndSpinCount, as that has resulted in improved performance in practice on multiprocessors systems.
int rv = InitializeCriticalSectionAndSpinCount((CRITICAL_SECTION*)mMutexData.mData, 256);
EAT_ASSERT(rv != 0);
EA_UNUSED(rv);
return true;
}
else
{
EAT_COMPILETIME_ASSERT(sizeof(pMutexParameters->mName) <= MAX_PATH);
*(HANDLE*)mMutexData.mData = ::CreateMutexA(NULL, false, pMutexParameters->mName[0] ? pMutexParameters->mName : NULL);
EAT_ASSERT(*(HANDLE*)mMutexData.mData != 0);
return *(HANDLE*)mMutexData.mData != 0;
}
#else
// We use InitializeCriticalSectionAndSpinCount, as that has resulted in improved performance in practice on multiprocessors systems.
InitializeCriticalSectionAndSpinCount((CRITICAL_SECTION*)mMutexData.mData, 256);
return true;
#endif
}
return false;
}
#pragma warning(push)
#pragma warning(disable: 4706) // disable warning about assignment within a conditional expression
int EA::Thread::Mutex::Lock(const ThreadTime& timeoutAbsolute)
{
EAT_ASSERT(mMutexData.mnLockCount < 100000);
#if defined(EA_PLATFORM_WINDOWS) // Non-Windows is always assumed to be intra-process.
if(mMutexData.mbIntraProcess)
{
#endif
if(timeoutAbsolute == kTimeoutNone)
EnterCriticalSection((CRITICAL_SECTION*)mMutexData.mData);
else
{
// To consider: Have a pathway for kTimeoutImmediate which doesn't check the current time.
while(!TryEnterCriticalSection((CRITICAL_SECTION*)mMutexData.mData))
{
if(GetThreadTime() >= timeoutAbsolute)
return kResultTimeout;
Sleep(1);
}
}
#if defined(EA_PLATFORM_WINDOWS)
}
else
{
EAT_ASSERT(*(HANDLE*)mMutexData.mData != 0);
const DWORD dw = ::WaitForSingleObject(*(HANDLE*)mMutexData.mData, RelativeTimeoutFromAbsoluteTimeout(timeoutAbsolute));
if(dw == WAIT_TIMEOUT)
return kResultTimeout;
if(dw != WAIT_OBJECT_0)
{
EAT_ASSERT(false);
return kResultError;
}
}
#endif
EAT_ASSERT((mMutexData.mSysThreadId = EA::Thread::GetSysThreadId()) != kSysThreadIdInvalid);
EAT_ASSERT(mMutexData.mnLockCount >= 0);
return ++mMutexData.mnLockCount; // This is safe to do because we have the lock.
}
#pragma warning(pop)
int EA::Thread::Mutex::Unlock()
{
EAT_ASSERT(mMutexData.mSysThreadId == EA::Thread::GetSysThreadId());
EAT_ASSERT(mMutexData.mnLockCount > 0);
const int nReturnValue(--mMutexData.mnLockCount); // This is safe to do because we have the lock.
#if defined(EA_PLATFORM_WINDOWS)
if(mMutexData.mbIntraProcess)
LeaveCriticalSection((CRITICAL_SECTION*)mMutexData.mData);
else
{
EAT_ASSERT(*(HANDLE*)mMutexData.mData != 0);
ReleaseMutex(*(HANDLE*)mMutexData.mData);
}
#else
LeaveCriticalSection((CRITICAL_SECTION*)mMutexData.mData);
#endif
return nReturnValue;
}
int EA::Thread::Mutex::GetLockCount() const
{
return mMutexData.mnLockCount;
}
bool EA::Thread::Mutex::HasLock() const
{
#if EAT_ASSERT_ENABLED
return (mMutexData.mnLockCount > 0) && (mMutexData.mSysThreadId == EA::Thread::GetSysThreadId());
#else
return (mMutexData.mnLockCount > 0); // This is the best we can do, though it is of limited use, since it doesn't tell you if you are the thread with the lock.
#endif
}
#endif // EA_PLATFORM_XXX
@@ -0,0 +1,911 @@
///////////////////////////////////////////////////////////////////////////////
// Copyright (c) Electronic Arts Inc. All rights reserved.
///////////////////////////////////////////////////////////////////////////////
#include <EABase/eabase.h>
#include "eathread/eathread.h"
#include "eathread/eathread_thread.h"
#include "eathread/eathread_storage.h"
#if defined(EA_PLATFORM_MICROSOFT) && !EA_POSIX_THREADS_AVAILABLE
#include <process.h>
EA_DISABLE_ALL_VC_WARNINGS()
#include <Windows.h>
#include <stdlib.h> // for mbstowcs
#include <setjmp.h>
EA_RESTORE_ALL_VC_WARNINGS()
#include "eathread/eathread_futex.h"
extern "C" WINBASEAPI DWORD WINAPI SetThreadIdealProcessor(_In_ HANDLE hThread, _In_ DWORD dwIdealProcessor);
#if defined(EA_PLATFORM_WIN64)
extern "C" WINBASEAPI DWORD WINAPI GetThreadId(_In_ HANDLE hThread);
extern "C" WINBASEAPI ULONGLONG GetTickCount64(VOID); // Will not run on pre-Vista OS so 64 bit XP not supported
#endif
// We set this module to initialize early. We want to do this because it
// allows statically initialized objects to call these functions safely.
EA_DISABLE_VC_WARNING(4074) // warning C4074: initializers put in compiler reserved initialization area
#pragma init_seg(compiler)
EA_RESTORE_VC_WARNING()
#ifndef EATHREAD_INIT_SEG_DEFINED
#define EATHREAD_INIT_SEG_DEFINED
#endif
namespace EA
{
namespace Thread
{
// Note by Paul Pedriana:
// There is a bit of code here which implements "dynamic thread array maintenance".
// The reason for this is that we are trying to present to the user a consistently
// behaving GetThreadId function. The Windows threading API has a number of design
// characteristics that make it less than ideal for applications. One of these
// designs is that an application cannot ask the system what its thread id is and
// get a consistent answer; in fact you always get a different answer.
// To consider: Use the VC++ undocumented __tlregdtor function to detect thread exits.
// __tlregdtor is a VC++ CRT function which detects the exiting of any threads created
// with the CRT beginthread family of functions. It cannot detect the exit of any threads
// that are begun via direct OS thread creation functions, nor can it detect the exit of
// threads that are exited by direct OS thread exit functions. This is may not be a major
// problem, because C/C++ programs should virtually always be calling the CRT thread begin
// and end functions so that the CRT can be maintained properly for the thread.
//
// typedef void (*_PVFV)();
// void __tlregdtor(_PVFV func);
// void ThreadExit(){ Do something. May need to be careful about what APIs are called. }
// Assertion variables.
EA::Thread::AssertionFailureFunction gpAssertionFailureFunction = NULL;
void* gpAssertionFailureContext = NULL;
// Dynamic thread array maintenance.
// If the user calls GetThreadId from a thread that was created by some third
// party, then we don't have a thread handle for it. The only current way to get
// such a thread handle is to call OpenThread(GetCurrentThreadId()) or
// DuplicateHandle(GetCurrentThread()). In either case the return value is a
// handle which must be disposed of via CloseHandle. Additionally, since the
// thread was created by a thrid party, it's entirely possible that the thread
// will be exited without us ever finding about it. But we still need to call
// CloseHandle on the handle. So we maintain an array of handles and check their
// status periodically and upon process exit.
const size_t kMaxThreadDynamicArrayCount = 128;
struct DynamicThreadArray
{
static HANDLE mhDynamicThreadArray[kMaxThreadDynamicArrayCount];
static CRITICAL_SECTION mCriticalSection;
static bool mbDynamicThreadArrayInitialized;
static void Initialize();
static void CheckDynamicThreadArray(bool bCloseAll);
static void AddDynamicThreadHandle(HANDLE hThread, bool bAdd);
};
HANDLE DynamicThreadArray::mhDynamicThreadArray[kMaxThreadDynamicArrayCount];
CRITICAL_SECTION DynamicThreadArray::mCriticalSection;
bool DynamicThreadArray::mbDynamicThreadArrayInitialized;
// DynamicThreadArray ctor/dtor were removed to because memory tracking systems that are required to run
// pre-main and post-main. In order to support memory tracking of allocations that occur post-main we
// intentially "leak" a operating system critical section and leave it to be cleaned up by the operating
// system at process shutdown.
//
// DynamicThreadArray::DynamicThreadArray()
// {
// Initialize();
// }
// DynamicThreadArray::~DynamicThreadArray()
// {
// CheckDynamicThreadArray(true);
// DeleteCriticalSection(&mCriticalSection);
// }
void DynamicThreadArray::Initialize()
{
static EA::Thread::Futex m;
const bool done = mbDynamicThreadArrayInitialized;
// ensure that if we've seen previous writes to mbDynamicThreadArrayInitialized, we also
// see the writes to mCriticalSection, to avoid the case where another thread sees the flag
// before it sees the initialization
EAReadBarrier();
if(!done)
{
EA::Thread::AutoFutex _(m);
if (!mbDynamicThreadArrayInitialized)
{
memset(mhDynamicThreadArray, 0, sizeof(mhDynamicThreadArray));
InitializeCriticalSection(&mCriticalSection);
// ensure writes to mCriticalSection and mhDynamicThreadArray are visible before writes
// to mbDynamicThreadArrayInitialized, to avoid the case where another thread sees the
// flag before it sees the initialization
EAWriteBarrier();
mbDynamicThreadArrayInitialized = true;
}
}
}
// This function looks at the existing set of thread ids and see if any of them
// were quit. If so then this function removes their entry from our array of
// thread handles, and most importantly, calls CloseHandle on the thread handle.
void DynamicThreadArray::CheckDynamicThreadArray(bool bCloseAll)
{
Initialize();
EnterCriticalSection(&mCriticalSection);
for(size_t i(0); i < sizeof(mhDynamicThreadArray)/sizeof(mhDynamicThreadArray[0]); i++)
{
if(mhDynamicThreadArray[i])
{
DWORD dwExitCode(0);
// Note that GetExitCodeThread is a hazard if the user of a thread exits
// with a return value that is equal to the value of STILL_ACTIVE (i.e. 259).
// We can document that users shouldn't do this, or we can change the code
// here to use WaitForSingleObject(hThread, 0) and assume the thread is
// still active if the return value is WAIT_TIMEOUT.
if(bCloseAll || !GetExitCodeThread(mhDynamicThreadArray[i], &dwExitCode) || (dwExitCode != STILL_ACTIVE)) // If the thread id is invalid or it has exited...
{
CloseHandle(mhDynamicThreadArray[i]); // This matches the DuplicateHandle call we made below.
mhDynamicThreadArray[i] = 0;
}
}
}
LeaveCriticalSection(&mCriticalSection);
}
void DynamicThreadArray::AddDynamicThreadHandle(HANDLE hThread, bool bAdd)
{
Initialize();
if(hThread)
{
EnterCriticalSection(&mCriticalSection);
if(bAdd)
{
for(size_t i(0); i < sizeof(mhDynamicThreadArray)/sizeof(mhDynamicThreadArray[0]); i++)
{
if(mhDynamicThreadArray[i] == kThreadIdInvalid)
{
mhDynamicThreadArray[i] = hThread;
hThread = kThreadIdInvalid; // This tells us that we succeeded, and we'll use this result below.
break;
}
}
EAT_ASSERT(hThread == kThreadIdInvalid); // Assert that there was enough room (that the above loop found a spot).
if(hThread != kThreadIdInvalid) // If not, then we need to free the handle.
CloseHandle(hThread); // This matches the DuplicateHandle call we made below.
}
else
{
for(size_t i(0); i < sizeof(mhDynamicThreadArray)/sizeof(mhDynamicThreadArray[0]); i++)
{
if(mhDynamicThreadArray[i] == hThread)
{
CloseHandle(hThread); // This matches the DuplicateHandle call we made below.
mhDynamicThreadArray[i] = kThreadIdInvalid;
break;
}
}
// By design, we don't consider a non-found handle an error. It may simply be the
// case that the given handle was not a dynamnic thread handle. Due to the way
// Windows works, there's just no way for us to tell.
}
LeaveCriticalSection(&mCriticalSection);
}
}
// Thread handle local storage.
// We have this code here in order to cache the thread handles for
// threads, so that the user gets a consistent return value from the
// GetThreadId function for each unique thread.
static DWORD dwThreadHandleTLS = TLS_OUT_OF_INDEXES; // We intentionally make this an independent variable so that it is initialized unilaterally on segment load.
struct TLSAlloc
{
TLSAlloc()
{
if(dwThreadHandleTLS == TLS_OUT_OF_INDEXES) // It turns out that the user might have set this to a
dwThreadHandleTLS = TlsAlloc(); // value before this constructor has run. So we check.
}
#if EATHREAD_TLSALLOC_DTOR_ENABLED
// Since this class is used only as a static variable, this destructor would
// only get called during module destruction: app quit or DLL unload.
// In the case of DLL unload, we may have a problem if the DLL was unloaded
// before threads created by it were destroyed. Whether the problem is significant
// depends on the application. In most cases it won't be significant.
//
// We want to call TlsFree because not doing so results in a memory leak and eventual
// exhaustion of TLS ids by the system.
~TLSAlloc()
{
if(dwThreadHandleTLS != TLS_OUT_OF_INDEXES)
{
// We don't read the hThread stored at dwThreadHandleTLS and call CloseHandle
// on it, as the DynamicThreadArray destructor will deal with closing any
// thread handles this module knows about.
TlsFree(dwThreadHandleTLS);
dwThreadHandleTLS = TLS_OUT_OF_INDEXES;
}
}
#endif
};
static TLSAlloc sTLSAlloc;
void SetCurrentThreadHandle(HANDLE hThread, bool bDynamic)
{
// EAT_ASSERT(hThread != kThreadIdInvalid); We can't do this, as we can be intentionally called with an hThread of kThreadIdInvalid.
if(dwThreadHandleTLS == TLS_OUT_OF_INDEXES) // This should generally always evaluate to true because we init dwThreadHandleTLS on startup.
dwThreadHandleTLS = TlsAlloc();
EAT_ASSERT(dwThreadHandleTLS != TLS_OUT_OF_INDEXES);
if(dwThreadHandleTLS != TLS_OUT_OF_INDEXES)
{
DynamicThreadArray::CheckDynamicThreadArray(false);
if(bDynamic)
{
if(hThread != kThreadIdInvalid) // If adding the hThread...
DynamicThreadArray::AddDynamicThreadHandle(hThread, true);
else // Else removing the existing current thread handle...
{
HANDLE hThreadOld = TlsGetValue(dwThreadHandleTLS);
if(hThreadOld != kThreadIdInvalid) // This should always evaluate to true in practice.
DynamicThreadArray::AddDynamicThreadHandle(hThreadOld, false); // Will Close the dynamic thread handle if it is one.
}
}
TlsSetValue(dwThreadHandleTLS, hThread);
}
}
} // namespace Thread
} // namespace EA
EATHREADLIB_API EA::Thread::ThreadId EA::Thread::GetThreadId()
{
// We have some non-trivial code here because Windows doesn't provide a means for a
// thread to read its own thread id (thread handle) in a consistent way.
// If we have allocated thread-local storage for this module...
if(dwThreadHandleTLS != TLS_OUT_OF_INDEXES)
{
void* const pValue = TlsGetValue(dwThreadHandleTLS);
if(pValue) // If the current thread's ThreadId has been previously saved...
return pValue; // Under Win32, type ThreadId should be the same as HANDLE which should be the same as void*.
// Else fall through and get the current thread handle and cache it so that next time the above code will succeed.
}
// In this case the thread was not created by EAThread. So we give
// the thread a new Id, based on GetCurrentThread and DuplicateHandle.
// GetCurrentThread returns a "pseudo handle" which isn't actually the
// thread handle but is a hard-coded constant which means "current thread".
// If you want to get a real thread handle then you need to call DuplicateHandle
// on the pseudo handle. Every time you call DuplicateHandle you get a different
// result, yet we want this GetThreadId function to return a consistent value
// to the user, as that's what a rational user would expect. So after calling
// DuplicateHandle we save the value for the next time the user calls this
// function. We save the value in thread-local storage, so each unique thread
// sees a unique view of GetThreadId.
HANDLE hThread, hThreadPseudo = GetCurrentThread();
BOOL bResult = DuplicateHandle(GetCurrentProcess(), hThreadPseudo, GetCurrentProcess(), &hThread, 0, true, DUPLICATE_SAME_ACCESS);
EAT_ASSERT(bResult && (hThread != kThreadIdInvalid));
if(bResult)
EA::Thread::SetCurrentThreadHandle(hThread, true); // Need to eventually call CloseHandle on hThread, so we store it.
return hThread;
}
EATHREADLIB_API EA::Thread::ThreadId EA::Thread::GetThreadId(EA::Thread::SysThreadId id)
{
EAThreadDynamicData* const pTDD = EA::Thread::FindThreadDynamicData(id);
if(pTDD)
{
return pTDD->mhThread;
}
return EA::Thread::kThreadIdInvalid;
}
EATHREADLIB_API EA::Thread::SysThreadId EA::Thread::GetSysThreadId(ThreadId id)
{
#if defined(EA_PLATFORM_MICROSOFT) && defined(EA_PROCESSOR_X86_64)
// Win64 has this function natively.
return ::GetThreadId(id);
// Fast implementation of this, which has been verified:
// uintptr_t pTIB = __readgsqword(0x30);
// uint32_t threadId = *((uint32_t*)(((uint8_t*)pTIB) + 0x48));
// return (EA::Thread::SysThreadId)threadId;
#elif defined(EA_PLATFORM_WIN32)
// What we do here is first try to use the GetThreadId function, which is
// available on some later versions of WinXP and later OSs. If that doesn't
// work then we are using an earlier OS and we use the NtQueryInformationThread
// kernel function to read thread info.
typedef DWORD (WINAPI *GetThreadIdFunc)(HANDLE);
typedef BOOL (WINAPI *NtQueryInformationThreadFunc)(HANDLE, int, PVOID, ULONG, PULONG);
// We implement our own manual version of static variables here. We do this because
// the static variable mechanism the compiler provides wouldn't provide thread
// safety for us.
static volatile bool sInitialized = false;
static GetThreadIdFunc spGetThreadIdFunc = NULL;
static NtQueryInformationThreadFunc spNtQueryInformationThread = NULL;
if(!sInitialized)
{
HMODULE hKernel32 = GetModuleHandleA("kernel32.dll");
if(hKernel32)
spGetThreadIdFunc = (GetThreadIdFunc)(uintptr_t)GetProcAddress(hKernel32, "GetThreadId");
if(!spGetThreadIdFunc)
{
HMODULE hNTDLL = GetModuleHandleA("ntdll.dll");
if(hNTDLL)
spNtQueryInformationThread = (NtQueryInformationThreadFunc)(uintptr_t)GetProcAddress(hNTDLL, "NtQueryInformationThread");
}
sInitialized = true;
}
if(spGetThreadIdFunc)
return (SysThreadId)spGetThreadIdFunc(id);
if(spNtQueryInformationThread)
{
struct THREAD_BASIC_INFORMATION_WIN32
{
BOOL ExitStatus;
PVOID TebBaseAddress;
DWORD UniqueProcessId;
DWORD UniqueThreadId;
DWORD AffinityMask;
DWORD Priority;
DWORD BasePriority;
};
THREAD_BASIC_INFORMATION_WIN32 tbi;
if(spNtQueryInformationThread(id, 0, &tbi, sizeof(tbi), NULL) == 0)
return (SysThreadId)tbi.UniqueThreadId;
}
return kSysThreadIdInvalid;
#endif
}
EATHREADLIB_API EA::Thread::SysThreadId EA::Thread::GetSysThreadId()
{
return ::GetCurrentThreadId();
}
EATHREADLIB_API int EA::Thread::GetThreadPriority()
{
const int nPriority = ::GetThreadPriority(GetCurrentThread());
return kThreadPriorityDefault + (nPriority - THREAD_PRIORITY_NORMAL);
}
EATHREADLIB_API bool EA::Thread::SetThreadPriority(int nPriority)
{
EAT_ASSERT(nPriority != kThreadPriorityUnknown);
int nNewPriority = THREAD_PRIORITY_NORMAL + (nPriority - kThreadPriorityDefault);
bool result = ::SetThreadPriority(GetCurrentThread(), nNewPriority) != 0;
// Windows process running in NORMAL_PRIORITY_CLASS is picky about the priority passed in.
// So we need to set the priority to the next priority supported
#if defined(EA_PLATFORM_WINDOWS) || defined(EA_PLATFORM_XBOXONE)
HANDLE thread = GetCurrentThread();
while(!result)
{
if (nNewPriority >= THREAD_PRIORITY_TIME_CRITICAL)
return ::SetThreadPriority(thread, THREAD_PRIORITY_TIME_CRITICAL) != 0;
if (nNewPriority <= THREAD_PRIORITY_IDLE)
return ::SetThreadPriority(thread, THREAD_PRIORITY_IDLE) != 0;
result = ::SetThreadPriority(thread, nNewPriority) != 0;
nNewPriority++;
}
#endif
return result;
}
EATHREADLIB_API void EA::Thread::SetThreadProcessor(int nProcessor)
{
#if defined(EA_PLATFORM_XBOXONE)
DWORD mask = 0xFF; //Default to all
if (nProcessor >= 0)
mask = (DWORD)(1 << nProcessor);
SetThreadAffinityMask(GetCurrentThread(), mask);
#else
static const int nProcessorCount = GetProcessorCount();
if(nProcessor < 0)
nProcessor = MAXIMUM_PROCESSORS; // This cases the SetThreadIdealProcessor to reset to 'no ideal processor'.
else
{
if(nProcessor >= nProcessorCount)
nProcessor %= nProcessorCount;
}
// SetThreadIdealProcessor differs from SetThreadAffinityMask in that SetThreadIdealProcessor is not
// a strict assignment, and it allows the OS to move the thread if the ideal processor is busy.
// SetThreadAffinityMask is a more rigid assignment, but it can result in slower performance and
// possibly hangs due to processor contention between threads. For Windows we use SetIdealThreadProcessor
// in the name of safety and likely better overall performance.
SetThreadIdealProcessor(GetCurrentThread(), (DWORD)nProcessor);
#endif
}
void* EA::Thread::GetThreadStackBase()
{
#if defined(EA_PLATFORM_WIN32) && defined(EA_PROCESSOR_X86) && defined(EA_COMPILER_MSVC)
// Offset 0x18 from the FS segment register gives a pointer to
// the thread information block for the current thread
// VC++ also offers the __readfsdword() intrinsic, which would be better to use here.
NT_TIB* pTib;
__asm {
mov eax, fs:[18h]
mov pTib, eax
}
return (void*)pTib->StackBase;
#elif defined(EA_PLATFORM_MICROSOFT) && defined(EA_PROCESSOR_X86_64) && defined(EA_COMPILER_MSVC)
// VC++ also offers the __readgsdword() intrinsic, which is an alternative which could
// retrieve the current thread TEB if the following proves unreliable.
PNT_TIB64 pTib = reinterpret_cast<PNT_TIB64>(NtCurrentTeb());
return (void*)pTib->StackBase;
#elif defined(EA_PLATFORM_WIN32) && defined(EA_PROCESSOR_X86) && defined(EA_COMPILER_GCC)
NT_TIB* pTib;
asm ( "movl %%fs:0x18, %0\n"
: "=r" (pTib)
);
return (void*)pTib->StackBase;
#endif
}
#if defined(EA_PLATFORM_WIN32) && defined(EA_PROCESSOR_X86) && defined(_MSC_VER) && (_MSC_VER >= 1400)
// People report on the Internet that this function can get you what CPU the current thread
// is running on. But that's false, as this function has been seen to return values greater than
// the number of physical or real CPUs present. For example, this function returns 6 for my
// Single CPU that's dual-hyperthreaded.
static int GetCurrentProcessorNumberCPUID()
{
_asm { mov eax, 1 }
_asm { cpuid }
_asm { shr ebx, 24 }
_asm { mov eax, ebx }
}
int GetCurrentProcessorNumberXP()
{
int cpuNumber = GetCurrentProcessorNumberCPUID();
int cpuCount = EA::Thread::GetProcessorCount();
return (cpuNumber % cpuCount); // I don't know if this is the right thing to do, but it's better than returning an impossible number and Windows XP is a fading OS as it is.
}
#endif
EATHREADLIB_API int EA::Thread::GetThreadProcessor()
{
#if defined(EA_PLATFORM_WIN32)
// Only Windows Vista and later provides GetCurrentProcessorNumber.
// So we must dynamically link to this function.
static EA_THREAD_LOCAL bool bInitialized = false;
static EA_THREAD_LOCAL DWORD (WINAPI *pfnGetCurrentProcessorNumber)() = NULL;
if(!bInitialized)
{
HMODULE hKernel32 = GetModuleHandleA("KERNEL32.DLL");
if(hKernel32)
pfnGetCurrentProcessorNumber = (DWORD (WINAPI*)())(uintptr_t)GetProcAddress(hKernel32, "GetCurrentProcessorNumber");
bInitialized = true;
}
if(pfnGetCurrentProcessorNumber)
return (int)(unsigned)pfnGetCurrentProcessorNumber();
#if defined(EA_PLATFORM_WINDOWS) && defined(EA_PROCESSOR_X86) && defined(_MSC_VER) && (_MSC_VER >= 1400)
return GetCurrentProcessorNumberXP();
#else
return 0;
#endif
#elif defined(EA_PLATFORM_WIN64)
static EA_THREAD_LOCAL bool bInitialized = false;
static EA_THREAD_LOCAL DWORD (WINAPI *pfnGetCurrentProcessorNumber)() = NULL;
if(!bInitialized)
{
HMODULE hKernel32 = GetModuleHandleA("KERNEL32.DLL"); // Yes, we want to use Kernel32.dll. There is no Kernel64.dll on Win64.
if(hKernel32)
pfnGetCurrentProcessorNumber = (DWORD (WINAPI*)())(uintptr_t)GetProcAddress(hKernel32, "GetCurrentProcessorNumber");
bInitialized = true;
}
if(pfnGetCurrentProcessorNumber)
return (int)(unsigned)pfnGetCurrentProcessorNumber();
return 0;
#else
return (int)(unsigned)GetCurrentProcessorNumber();
#endif
}
EATHREADLIB_API void EA::Thread::SetThreadAffinityMask(const EA::Thread::ThreadId& id, ThreadAffinityMask nAffinityMask)
{
// Update the affinity mask in the thread dynamic data cache.
EAThreadDynamicData* const pTDD = FindThreadDynamicData(id);
if(pTDD)
{
pTDD->mnThreadAffinityMask = nAffinityMask;
}
#if EATHREAD_THREAD_AFFINITY_MASK_SUPPORTED
#if defined(EA_PLATFORM_XBOXONE)
DWORD_PTR nProcessorCountMask = 0x7F; // default to all 7 available cores.
#else
DWORD_PTR nProcessorCountMask = (DWORD_PTR)1 << GetProcessorCount();
#endif
// Call the Windows library function.
DWORD_PTR nProcessAffinityMask, nSystemAffinityMask;
if(EA_LIKELY(GetProcessAffinityMask(GetCurrentProcess(), &nProcessAffinityMask, &nSystemAffinityMask)))
nProcessorCountMask = nProcessAffinityMask;
nAffinityMask &= nProcessorCountMask;
auto opResult = ::SetThreadAffinityMask(id, static_cast<DWORD_PTR>(nAffinityMask));
EA_UNUSED(opResult);
EAT_ASSERT_FORMATTED(opResult != 0, "The Windows platform SetThreadAffinityMask failed. GetLastError %x", GetLastError());
#endif
}
EATHREADLIB_API EA::Thread::ThreadAffinityMask EA::Thread::GetThreadAffinityMask(const EA::Thread::ThreadId& id)
{
// Update the affinity mask in the thread dynamic data cache.
EAThreadDynamicData* const pTDD = FindThreadDynamicData(id);
if(pTDD)
{
return pTDD->mnThreadAffinityMask;
}
return kThreadAffinityMaskAny;
}
// Internal SetThreadName API's so we don't repeat the implementations
namespace EA {
namespace Thread {
namespace Internal {
bool PixSetThreadName(EA::Thread::ThreadId threadId, const char* pName)
{
EA_UNUSED(threadId); EA_UNUSED(pName);
bool result = true;
#if (defined(EA_PLATFORM_XBOXONE) && EA_CAPILANO_DBG_ENABLED == 1)
wchar_t wName[EATHREAD_NAME_SIZE];
mbstowcs(wName, pName, EATHREAD_NAME_SIZE);
result = (::SetThreadName(threadId, wName) == TRUE); // requires toolhelpx.lib
EAT_ASSERT(result);
#endif
return result;
}
bool WinSetThreadName(EA::Thread::ThreadId threadId, const char* pName)
{
bool result = true;
typedef HRESULT(WINAPI *SetThreadDescription)(HANDLE hThread, PCWSTR lpThreadDescription);
// Check if Windows Operating System has the 'SetThreadDescription" API.
auto pSetThreadDescription = (SetThreadDescription)GetProcAddress(GetModuleHandleA("kernel32.dll"), "SetThreadDescription");
if (pSetThreadDescription)
{
wchar_t wName[EATHREAD_NAME_SIZE];
mbstowcs(wName, pName, EATHREAD_NAME_SIZE);
result = SUCCEEDED(pSetThreadDescription(threadId, wName));
EAT_ASSERT(result);
}
return result;
}
void WinSetThreadNameByException(EA::Thread::SysThreadId threadId, const char* pName)
{
struct ThreadNameInfo
{
DWORD dwType;
LPCSTR lpName;
DWORD dwThreadId;
DWORD dwFlags;
};
// This setjmp/longjmp weirdness is here to work around an apparent bug in the VS2013 debugger,
// whereby EBX will be trashed on return from RaiseException, causing bad things to happen in code
// which runs later. This only seems to happen when a debugger is attached and there's some managed
// code in the process.
jmp_buf jmpbuf;
__pragma(warning(push))
__pragma(warning(disable : 4611))
if (!setjmp(jmpbuf))
{
ThreadNameInfo threadNameInfo = {0x1000, pName, threadId, 0};
__try { RaiseException(0x406D1388, 0, sizeof(threadNameInfo) / sizeof(ULONG_PTR), (CONST ULONG_PTR*)(uintptr_t)&threadNameInfo); }
__except (GetExceptionCode() == 0x406D1388 ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) { }
longjmp(jmpbuf, 1);
}
__pragma(warning(pop))
}
void SetThreadName(EAThreadDynamicData* pTDD, const char* pName)
{
strncpy(pTDD->mName, pName, EATHREAD_NAME_SIZE);
pTDD->mName[EATHREAD_NAME_SIZE - 1] = 0;
#if defined(EA_PLATFORM_WINDOWS) && defined(_MSC_VER) || (defined(EA_PLATFORM_XBOXONE))
if(pTDD->mName[0] && (pTDD->mhThread != EA::Thread::kThreadIdInvalid))
{
#if EATHREAD_NAMING == EATHREAD_NAMING_DISABLED
bool namingEnabled = false;
#elif EATHREAD_NAMING == EATHREAD_NAMING_ENABLED
bool namingEnabled = true;
#else
bool namingEnabled = IsDebuggerPresent();
#endif
if(namingEnabled)
{
PixSetThreadName(pTDD->mhThread, pTDD->mName);
WinSetThreadName(pTDD->mhThread, pTDD->mName);
WinSetThreadNameByException(pTDD->mnThreadId, pTDD->mName);
}
}
#endif
}
} // namespace Internal
} // namespace Thread
} // namespace EA
EATHREADLIB_API void EA::Thread::SetThreadName(const char* pName) { SetThreadName(GetThreadId(), pName); }
EATHREADLIB_API const char* EA::Thread::GetThreadName() { return GetThreadName(GetThreadId()); }
EATHREADLIB_API void EA::Thread::SetThreadName(const EA::Thread::ThreadId& id, const char* pName)
{
EAThreadDynamicData* const pTDD = FindThreadDynamicData(id);
if(pTDD)
{
Internal::SetThreadName(pTDD, pName);
}
}
EATHREADLIB_API const char* EA::Thread::GetThreadName(const EA::Thread::ThreadId& id)
{
EAThreadDynamicData* const pTDD = FindThreadDynamicData(id);
return pTDD ? pTDD->mName : "";
}
EATHREADLIB_API int EA::Thread::GetProcessorCount()
{
#if defined(EA_PLATFORM_XBOXONE)
// Capilano has 7-ish physical CPUs available to titles. We can access 50 - 90% of the 7th Core.
// Check platform documentation for details.
DWORD_PTR ProcessAffinityMask;
DWORD_PTR SystemAffinityMask;
unsigned long nCoreCount = 6;
if(EA_LIKELY(GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask, &SystemAffinityMask)))
{
_BitScanForward(&nCoreCount, (unsigned long)~ProcessAffinityMask);
}
return (int) nCoreCount;
#elif defined(EA_PLATFORM_WINDOWS)
static int nProcessorCount = 0; // This doesn't really need to be an atomic integer.
if(nProcessorCount == 0)
{
// A better function to use would possibly be KeQueryActiveProcessorCount
// (NTKERNELAPI ULONG KeQueryActiveProcessorCount(PKAFFINITY ActiveProcessors))
SYSTEM_INFO systemInfo;
memset(&systemInfo, 0, sizeof(systemInfo));
GetSystemInfo(&systemInfo);
nProcessorCount = (int)systemInfo.dwNumberOfProcessors;
}
return nProcessorCount;
#else
static int nProcessorCount = 0; // This doesn't really need to be an atomic integer.
if(nProcessorCount == 0)
{
// A better function to use would possibly be KeQueryActiveProcessorCount
// (NTKERNELAPI ULONG KeQueryActiveProcessorCount(PKAFFINITY ActiveProcessors))
SYSTEM_INFO systemInfo;
memset(&systemInfo, 0, sizeof(systemInfo));
GetNativeSystemInfo(&systemInfo);
nProcessorCount = (int)systemInfo.dwNumberOfProcessors;
}
return nProcessorCount;
#endif
}
EATHREADLIB_API void EA::Thread::ThreadSleep(const ThreadTime& timeRelative)
{
// Sleep(0) sleeps the current thread if any other thread of equal priority is ready to run.
// Sleep(n) sleeps the current thread for up to n milliseconds if there is any other thread of any priority ready to run.
// SwitchToThread() sleeps the current thread for one time slice if there is any other thread of any priority ready to run.
if(timeRelative == 0)
SwitchToThread(); // It's debateable whether we should do a SwitchToThread or a Sleep(0) here.
else // The only difference is that the former allows threads of lower priority to execute.
SleepEx((unsigned)timeRelative, TRUE);
}
namespace EA {
namespace Thread {
extern EAThreadDynamicData* FindThreadDynamicData(ThreadId threadId);
extern EAThreadDynamicData* FindThreadDynamicData(SysThreadId sysThreadId);
}
}
void EA::Thread::ThreadEnd(intptr_t threadReturnValue)
{
EAThreadDynamicData* const pTDD = FindThreadDynamicData(GetThreadId());
if(pTDD)
{
pTDD->mnStatus = Thread::kStatusEnded;
pTDD->mnReturnValue = threadReturnValue;
pTDD->Release();
}
EA::Thread::SetCurrentThreadHandle(kThreadIdInvalid, true); // We use 'true' here just to be safe, as we don't know who is calling this function.
#if defined(EA_PLATFORM_XBOXONE)
// _endthreadex is not supported on Capilano because it's not compatible with C++/CX and /ZW. Use of ExitThread could result in memory leaks
// as ExitThread does not clean up memory allocated by the C runtime library.
// https://forums.xboxlive.com/AnswerPage.aspx?qid=47c1607c-bb18-4bc4-a79a-a40c59444ff3&tgt=1
ExitThread(static_cast<DWORD>(threadReturnValue));
#elif defined(EA_PLATFORM_MICROSOFT) && defined(EA_PLATFORM_CONSOLE) && !defined(EA_PLATFORM_XBOXONE)
EAT_FAIL_MSG("EA::Thread::ThreadEnd: Not supported by this platform.");
#else
_endthreadex((unsigned int)threadReturnValue);
#endif
}
EATHREADLIB_API EA::Thread::ThreadTime EA::Thread::GetThreadTime()
{
// We choose to use GetTickCount because it low overhead and
// still yields values that have a precision in the same range
// as the Win32 thread time slice time. In particular:
// rdtsc takes ~5 cycles and has a nanosecond resolution. But it is unreliable
// GetTickCount() takes ~80 cycles and has ~15ms resolution.
// timeGetTime() takes ~350 cpu cycles and has 1ms resolution.
// QueryPerformanceCounter() takes ~3000 cpu cycles on most machines and has ~1us resolution.
// We add EATHREAD_MIN_ABSOLUTE_TIME to this absolute time to ensure this absolute time is never less than our min
// (This fix was required because GetTickCount64 starts at 0x0 for titles on capilano)
#if defined(EA_PLATFORM_MICROSOFT) && defined(EA_PROCESSOR_X86_64)
return (ThreadTime)(GetTickCount64() + EATHREAD_MIN_ABSOLUTE_TIME);
#else // Note that this value matches the value used by some runtime assertion code within EA::Thread. It would be best to define this as a shared constant between modules.
return (ThreadTime)(GetTickCount() + EATHREAD_MIN_ABSOLUTE_TIME);
#endif
}
EATHREADLIB_API void EA::Thread::SetAssertionFailureFunction(EA::Thread::AssertionFailureFunction pAssertionFailureFunction, void* pContext)
{
gpAssertionFailureFunction = pAssertionFailureFunction;
gpAssertionFailureContext = pContext;
}
EATHREADLIB_API void EA::Thread::AssertionFailure(const char* pExpression)
{
if(gpAssertionFailureFunction)
gpAssertionFailureFunction(pExpression, gpAssertionFailureContext);
else
{
#if EAT_ASSERT_ENABLED
OutputDebugStringA("EA::Thread::AssertionFailure: ");
OutputDebugStringA(pExpression);
OutputDebugStringA("\n");
#ifdef _MSC_VER
__debugbreak();
#endif
#endif
}
}
uint32_t EA::Thread::RelativeTimeoutFromAbsoluteTimeout(ThreadTime timeoutAbsolute)
{
EAT_ASSERT((timeoutAbsolute == kTimeoutImmediate) || (timeoutAbsolute > EATHREAD_MIN_ABSOLUTE_TIME)); // Assert that the user didn't make the mistake of treating time as relative instead of absolute.
DWORD timeoutRelative = 0;
if (timeoutAbsolute == kTimeoutNone)
{
timeoutRelative = INFINITE;
}
else if (timeoutAbsolute == kTimeoutImmediate)
{
timeoutRelative = 0;
}
else
{
ThreadTime timeCurrent(GetThreadTime());
timeoutRelative = (timeoutAbsolute > timeCurrent) ? static_cast<DWORD>(timeoutAbsolute - timeCurrent) : 0;
}
EAT_ASSERT((timeoutRelative == INFINITE) || (timeoutRelative < 100000000)); // Assert that the timeout is a sane value and didn't wrap around.
return timeoutRelative;
}
#endif // EA_PLATFORM_XXX
@@ -0,0 +1,304 @@
///////////////////////////////////////////////////////////////////////////////
// Copyright (c) Electronic Arts Inc. All rights reserved.
///////////////////////////////////////////////////////////////////////////////
#include "EABase/eabase.h"
#include "eathread/eathread_semaphore.h"
#include "eathread/eathread_sync.h"
#include <limits.h>
#if defined(EA_PLATFORM_MICROSOFT)
EA_DISABLE_ALL_VC_WARNINGS()
#include <Windows.h>
EA_RESTORE_ALL_VC_WARNINGS()
#if defined(EA_PLATFORM_WIN64)
#if !defined _Ret_maybenull_
#define _Ret_maybenull_
#endif
#if !defined _Reserved_
#define _Reserved_
#endif
extern "C" WINBASEAPI _Ret_maybenull_ HANDLE WINAPI CreateSemaphoreExW(_In_opt_ LPSECURITY_ATTRIBUTES, _In_ LONG, _In_ LONG, _In_opt_ LPCWSTR, _Reserved_ DWORD, _In_ DWORD);
#endif
#endif
#ifdef CreateSemaphore
#undef CreateSemaphore
#endif
#if defined(EA_PLATFORM_MICROSOFT) && !EA_POSIX_THREADS_AVAILABLE
// Helper function to abstract away differences between APIs for different versions of Windows
static DWORD EASemaphoreWaitForSingleObject(HANDLE handle, DWORD milliseconds)
{
#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
return WaitForSingleObject(handle, milliseconds);
#else
return WaitForSingleObjectEx(handle, milliseconds, TRUE);
#endif
}
EASemaphoreData::EASemaphoreData()
: mhSemaphore(0), mnCount(0), mnCancelCount(0), mnMaxCount(INT_MAX), mbIntraProcess(true)
{
EAWriteBarrier();
static_assert(sizeof(int32_t) == sizeof(LONG), "We use int32_t and LONG interchangably. Windows (including Win64) uses 32 bit longs.");
}
void EASemaphoreData::UpdateCancelCount(int32_t cancelCount)
{
// This is used by the fast semaphore path. This function actually isn't called very often -- only under uncommon circumstances.
// This is based on an algorithm discussed on usenet in 2004.
// We safely increment count by min(cancelCount, -count) if count < 0.
int32_t oldCount, newCount, cmpCount;
if(cancelCount > 0)
{
oldCount = mnCount;
while(oldCount < 0)
{
// Increment old count by the number of cancels
if((newCount = oldCount + cancelCount) > 0)
newCount = 0; // ...but not greater then zero.
cmpCount = oldCount;
oldCount = InterlockedCompareExchange((LONG*)&mnCount, newCount, cmpCount);
if(oldCount == cmpCount)
{
cancelCount -= (newCount - oldCount);
break;
}
}
if(cancelCount > 0)
InterlockedExchangeAdd((LONG*)&mnCancelCount, cancelCount);
}
}
EA::Thread::SemaphoreParameters::SemaphoreParameters(int initialCount, bool bIntraProcess, const char* pName)
: mInitialCount(initialCount), mMaxCount(INT_MAX), mbIntraProcess(bIntraProcess)
{
if(pName)
{
strncpy(mName, pName, sizeof(mName)-1);
mName[sizeof(mName)-1] = 0;
}
else
mName[0] = 0;
}
EA::Thread::Semaphore::Semaphore(const SemaphoreParameters* pSemaphoreParameters, bool bDefaultParameters)
{
if(!pSemaphoreParameters && bDefaultParameters)
{
SemaphoreParameters parameters;
Init(&parameters);
}
else
Init(pSemaphoreParameters);
}
EA::Thread::Semaphore::Semaphore(int initialCount)
{
SemaphoreParameters parameters(initialCount);
Init(&parameters);
}
EA::Thread::Semaphore::~Semaphore()
{
if(mSemaphoreData.mhSemaphore)
CloseHandle(mSemaphoreData.mhSemaphore);
}
bool EA::Thread::Semaphore::Init(const SemaphoreParameters* pSemaphoreParameters)
{
if(pSemaphoreParameters && !mSemaphoreData.mhSemaphore)
{
mSemaphoreData.mnCount = pSemaphoreParameters->mInitialCount;
mSemaphoreData.mnMaxCount = pSemaphoreParameters->mMaxCount;
if(mSemaphoreData.mnCount < 0)
mSemaphoreData.mnCount = 0;
mSemaphoreData.mbIntraProcess = pSemaphoreParameters->mbIntraProcess;
// If the fast semaphore is disabled, then we always act like inter-process as opposed to intra-process.
#if EATHREAD_FAST_MS_SEMAPHORE_ENABLED
const bool bIntraProcess = mSemaphoreData.mbIntraProcess;
#else
const bool bIntraProcess = false;
#endif
if(bIntraProcess)
{
// Note that we do things rather differently for intra-process, as we are
// implementing a fast semaphore. This semaphore will be at least 10 times
// faster than the OS semaphore for all Microsoft platforms for the case of
// successful immediate acquire of a semaphore. Semaphore posts (or releases
// will also be much faster than the OS version.
#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
mSemaphoreData.mhSemaphore = CreateSemaphoreA(NULL, 0, INT_MAX/2, NULL); // Intentionally ignore mnCount and mnMaxCount here.
#else
mSemaphoreData.mhSemaphore = CreateSemaphoreExW(NULL, 0, INT_MAX/2, NULL, 0, SYNCHRONIZE | SEMAPHORE_MODIFY_STATE); // Intentionally ignore mnCount and mnMaxCount here.
#endif
}
else // Else we create a conventional Win32-style semaphore.
{
#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
mSemaphoreData.mhSemaphore = CreateSemaphoreA(NULL, (LONG)mSemaphoreData.mnCount, (LONG)mSemaphoreData.mnMaxCount, pSemaphoreParameters->mName[0] ? pSemaphoreParameters->mName : NULL);
#else
wchar_t wName[EAArrayCount(pSemaphoreParameters->mName)]; // We do an ASCII conversion.
for(size_t c = 0; c < EAArrayCount(wName); c++)
wName[c] = (wchar_t)(uint8_t)pSemaphoreParameters->mName[c];
mSemaphoreData.mhSemaphore = CreateSemaphoreExW(NULL, (LONG)mSemaphoreData.mnCount, (LONG)mSemaphoreData.mnMaxCount, wName[0] ? wName : NULL, 0, SYNCHRONIZE | SEMAPHORE_MODIFY_STATE);
#endif
}
EAWriteBarrier();
EAT_ASSERT(mSemaphoreData.mhSemaphore != 0);
return (mSemaphoreData.mhSemaphore != 0);
}
return false;
}
int EA::Thread::Semaphore::Wait(const ThreadTime& timeoutAbsolute)
{
EAT_ASSERT(mSemaphoreData.mhSemaphore != 0);
// If the fast semaphore is disabled, then we always act like inter-process as opposed to intra-process.
#if EATHREAD_FAST_MS_SEMAPHORE_ENABLED
const bool bIntraProcess = mSemaphoreData.mbIntraProcess;
#else
const bool bIntraProcess = false;
#endif
if(bIntraProcess) // If this is true, we are using the fast semaphore pathway.
{
if(InterlockedDecrement((LONG*)&mSemaphoreData.mnCount) < 0) // InterlockedDecrement returns the new value. If the mnCount was > 0 before this decrement, then this Wait function will return very quickly.
{
const DWORD dw = EASemaphoreWaitForSingleObject(mSemaphoreData.mhSemaphore, RelativeTimeoutFromAbsoluteTimeout(timeoutAbsolute));
if(dw != WAIT_OBJECT_0) // If there was a timeout...
{
mSemaphoreData.UpdateCancelCount(1); // or InterlockedIncrement(&mSemaphoreData.mnCancelCount); // The latter has a bug whereby mnCancelCount can increment indefinitely.
EAT_ASSERT(dw == WAIT_TIMEOUT); // Otherwise it was probably a timeout.
if(dw == WAIT_TIMEOUT)
return kResultTimeout;
return kResultError; // WAIT_FAILED
}
}
// It is by design that a semaphore post does a full memory barrier.
// We don't need such a barrier for this pathway to work, but rather
// it is expected by the user that such a barrier is executed. Investigation
// into the choice of a full vs. just read or write barrier has concluded
// (based on the Posix standard) that a full read-write barrier is expected.
EAReadWriteBarrier();
const int count = (int)mSemaphoreData.mnCount; // Make temporary to avoid race condition in ternary operator below.
return (count > 0 ? count : 0);
}
else
{
const DWORD dw = EASemaphoreWaitForSingleObject(mSemaphoreData.mhSemaphore, RelativeTimeoutFromAbsoluteTimeout(timeoutAbsolute));
if(dw == WAIT_OBJECT_0)
return (int)InterlockedDecrement((LONG*)&mSemaphoreData.mnCount);
else if(dw == WAIT_TIMEOUT)
return kResultTimeout;
return kResultError;
}
}
int EA::Thread::Semaphore::Post(int count)
{
EAT_ASSERT((mSemaphoreData.mhSemaphore != 0) && (count >= 0));
if(count > 0)
{
// If the fast semaphore is disabled, then we always act like inter-process as opposed to intra-process.
#if EATHREAD_FAST_MS_SEMAPHORE_ENABLED
const bool bIntraProcess = mSemaphoreData.mbIntraProcess;
#else
const bool bIntraProcess = false;
#endif
if(bIntraProcess) // If this is true, we are using the fast semaphore pathway.
{
// It is by design that a semaphore post does a full memory barrier.
// We don't need such a barrier for this pathway to work, but rather
// it is expected by the user that such a barrier is executed. Investigation
// into the choice of a full vs. just read or write barrier has concluded
// (based on the Posix standard) that a full read-write barrier is expected.
EAReadWriteBarrier();
if((mSemaphoreData.mnCancelCount > 0) && (mSemaphoreData.mnCount < 0)) // Much of the time this will evaluate to false due to the first condition.
mSemaphoreData.UpdateCancelCount(InterlockedExchange((LONG*)&mSemaphoreData.mnCancelCount, 0)); // It's possible that mnCancelCount may have decremented down to zero between the previous line of code and this line of code.
const int currentCount = mSemaphoreData.mnCount;
if((mSemaphoreData.mnMaxCount - count) < currentCount) // If count would cause an overflow...
return kResultError; // We do what most OS implementations of max-count do. count = (mSemaphoreData.mnMaxCount - currentCount);
const int32_t nWaiterCount = -InterlockedExchangeAdd((LONG*)&mSemaphoreData.mnCount, count); // InterlockedExchangeAdd returns the initial value of mnCount. If it's below zero, then it's a count of waiters.
const int nNewCount = count - nWaiterCount;
if(nWaiterCount > 0) // If there were waiters blocking...
{
const int32_t nReleaseCount = (count < nWaiterCount) ? count : nWaiterCount; // Call ReleaseSemaphore for as many waiters as possible.
ReleaseSemaphore(mSemaphoreData.mhSemaphore, nReleaseCount, NULL); // Note that by the time this executes, nReleaseCount might be > than the actual number of waiting threads, due to timeouts.
}
return (nNewCount > 0 ? nNewCount : 0);
}
else
{
const int32_t nPreviousCount = InterlockedExchangeAdd((LONG*)&mSemaphoreData.mnCount, count);
const int nNewCount = nPreviousCount + count;
const BOOL result = ReleaseSemaphore(mSemaphoreData.mhSemaphore, count, NULL);
if(!result)
{
InterlockedExchangeAdd((LONG*)&mSemaphoreData.mnCount, -count);
EAT_ASSERT(result);
return kResultError;
}
return nNewCount;
}
}
return (int)mSemaphoreData.mnCount; // We don't worry if this value is changing. There is nothing that you can rely upon about this value anyway.
}
int EA::Thread::Semaphore::GetCount() const
{
// Under our fast pathway, mnCount can go below zero.
// Under the fast pathway, we need to add mnCancelCount to mnCount because mnCount is negative by the number of waiters and mnCancelCount is the number of waiters that have abandoned waiting but the value hasn't been rolled back into mnCount yet.
EAReadBarrier();
const int count = (int)mSemaphoreData.mnCount + (int)mSemaphoreData.mnCancelCount; // Make temporary to avoid race condition in ternary operator below.
return (count > 0 ? count : 0);
}
#endif // EA_PLATFORM_XXX
@@ -0,0 +1,833 @@
///////////////////////////////////////////////////////////////////////////////
// Copyright (c) Electronic Arts Inc. All rights reserved.
///////////////////////////////////////////////////////////////////////////////
#include "EABase/eabase.h"
#include "eathread/eathread.h"
#include "eathread/eathread_callstack.h"
#include "eathread/eathread_mutex.h"
#include "eathread/eathread_sync.h"
#include "eathread/eathread_thread.h"
#include "eathread/internal/eathread_global.h"
#if EA_COMPILER_VERSION >= 1900 // VS2015+
// required for windows.h that has mismatch that is included in this file
EA_DISABLE_VC_WARNING(5031 5032)// #pragma warning(pop): likely mismatch, popping warning state pushed in different file / detected #pragma warning(push) with no corresponding
#endif
// Warning 6312 and 6322 are spurious, as we are not execution a case that could possibly loop.
// 6312: Possible infinite loop: use of the constant EXCEPTION_CONTINUE_EXECUTION in the exception-filter expression of a try-except. Execution restarts in the protected block
// 6322: Empty _except block
EA_DISABLE_VC_WARNING(6312 6322)
#if defined(EA_PLATFORM_MICROSOFT) && !EA_POSIX_THREADS_AVAILABLE
#include <new>
#include <process.h>
EA_DISABLE_ALL_VC_WARNINGS()
#include <Windows.h>
EA_RESTORE_ALL_VC_WARNINGS()
#if defined(_MSC_VER)
struct ThreadNameInfo{
DWORD dwType;
LPCSTR lpName;
DWORD dwThreadId;
DWORD dwFlags;
};
extern "C" WINBASEAPI DWORD WINAPI SetThreadIdealProcessor(_In_ HANDLE hThread, _In_ DWORD dwIdealProcessor);
extern "C" WINBASEAPI BOOL WINAPI IsDebuggerPresent();
#endif
#ifdef _MSC_VER
#ifndef EATHREAD_INIT_SEG_DEFINED
#define EATHREAD_INIT_SEG_DEFINED
#endif
// We are changing the initalization ordering here because in bulkbuild tool builds the initialization
// order of globals changes and causes a crash when we attempt to lock the Mutex guarding access
// of the EAThreadDynamicData objects. The code attempts to lock a mutex that has been destructed
// and causes a crash within the WindowsSDK. This ensures that global mutex object is not destructed
// until all user code has destructed.
//
#ifndef EATHREAD_INIT_SEG_DEFINED
#define EATHREAD_INIT_SEG_DEFINED
#pragma warning(disable: 4075) // warning C4075: initializers put in unrecognized initialization area
#pragma warning(disable: 4073) //warning C4073: initializers put in library initialization area
#pragma init_seg(lib)
#endif
#endif
namespace EA {
namespace Thread {
extern void SetCurrentThreadHandle(HANDLE hThread, bool bDynamic);
namespace Internal { extern void SetThreadName(EAThreadDynamicData* pTDD, const char* pName); };
}
}
namespace EA
{
namespace Thread
{
extern Allocator* gpAllocator;
static AtomicInt32 nLastProcessor = 0;
const size_t kMaxThreadDynamicDataCount = 128;
struct EAThreadGlobalVars
{
EA_PREFIX_ALIGN(8)
char gThreadDynamicData[kMaxThreadDynamicDataCount][sizeof(EAThreadDynamicData)] EA_POSTFIX_ALIGN(8);
AtomicInt32 gThreadDynamicDataAllocated[kMaxThreadDynamicDataCount];
Mutex gThreadDynamicMutex;
EAThreadGlobalVars() {}
EAThreadGlobalVars(const EAThreadGlobalVars&) {}
EAThreadGlobalVars& operator=(const EAThreadGlobalVars&) {}
};
EATHREAD_GLOBALVARS_CREATE_INSTANCE;
EAThreadDynamicData* AllocateThreadDynamicData()
{
AutoMutex am(EATHREAD_GLOBALVARS.gThreadDynamicMutex);
for(size_t i(0); i < kMaxThreadDynamicDataCount; i++)
{
if(EATHREAD_GLOBALVARS.gThreadDynamicDataAllocated[i].SetValueConditional(1, 0))
return (EAThreadDynamicData*)EATHREAD_GLOBALVARS.gThreadDynamicData[i];
}
// This is a safety fallback mechanism. In practice it won't be used in almost all situations.
if(gpAllocator)
return (EAThreadDynamicData*)gpAllocator->Alloc(sizeof(EAThreadDynamicData));
else
return new EAThreadDynamicData; // This is a small problem, as this doesn't just allocate it but also constructs it.
}
void FreeThreadDynamicData(EAThreadDynamicData* pEAThreadDynamicData)
{
AutoMutex am(EATHREAD_GLOBALVARS.gThreadDynamicMutex);
if((pEAThreadDynamicData >= (EAThreadDynamicData*)EATHREAD_GLOBALVARS.gThreadDynamicData) && (pEAThreadDynamicData < ((EAThreadDynamicData*)EATHREAD_GLOBALVARS.gThreadDynamicData + kMaxThreadDynamicDataCount)))
{
pEAThreadDynamicData->~EAThreadDynamicData();
EATHREAD_GLOBALVARS.gThreadDynamicDataAllocated[pEAThreadDynamicData - (EAThreadDynamicData*)EATHREAD_GLOBALVARS.gThreadDynamicData].SetValue(0);
}
else
{
// Assume the data was allocated via the fallback mechanism.
if(gpAllocator)
{
pEAThreadDynamicData->~EAThreadDynamicData();
gpAllocator->Free(pEAThreadDynamicData);
}
else
delete pEAThreadDynamicData;
}
}
EAThreadDynamicData* FindThreadDynamicData(ThreadId threadId)
{
for(size_t i(0); i < kMaxThreadDynamicDataCount; i++)
{
EAThreadDynamicData* const pTDD = (EAThreadDynamicData*)EATHREAD_GLOBALVARS.gThreadDynamicData[i];
if(pTDD->mhThread == threadId)
return pTDD;
}
return NULL; // This is no practical way we can find the data unless thread-specific storage was involved.
}
EAThreadDynamicData* FindThreadDynamicData(EA::Thread::SysThreadId sysThreadId)
{
for (size_t i(0); i < kMaxThreadDynamicDataCount; ++i)
{
EAThreadDynamicData* const pTDD = (EAThreadDynamicData*)EATHREAD_GLOBALVARS.gThreadDynamicData[i];
if (pTDD->mnThreadId == sysThreadId)
return pTDD;
}
return nullptr; // This is no practical way we can find the data unless thread-specific storage was involved.
}
bool IsDebuggerPresent()
{
#if defined(EA_PLATFORM_MICROSOFT)
return ::IsDebuggerPresent() != 0;
#else
return false;
#endif
}
}
}
EAThreadDynamicData::EAThreadDynamicData()
: mhThread(EA::Thread::kThreadIdInvalid),
mnThreadId(0), // Note that this is a Windows "thread id", wheras for us thread ids are what Windows calls a thread handle.
mnStatus(EA::Thread::Thread::kStatusNone),
mnReturnValue(0),
mpBeginThreadUserWrapper(NULL),
mnRefCount(0)
{
// Empty
}
EAThreadDynamicData::~EAThreadDynamicData()
{
if(mhThread)
::CloseHandle(mhThread);
mhThread = EA::Thread::kThreadIdInvalid;
mnThreadId = 0;
}
void EAThreadDynamicData::AddRef()
{
mnRefCount.Increment();
}
void EAThreadDynamicData::Release()
{
if(mnRefCount.Decrement() == 0)
EA::Thread::FreeThreadDynamicData(this);
}
EA::Thread::ThreadParameters::ThreadParameters()
: mpStack(NULL)
, mnStackSize(0)
, mnPriority(kThreadPriorityDefault)
, mnProcessor(kProcessorDefault)
, mnAffinityMask(kThreadAffinityMaskAny)
, mpName("")
, mbDisablePriorityBoost(false)
{
}
EA::Thread::RunnableFunctionUserWrapper EA::Thread::Thread::sGlobalRunnableFunctionUserWrapper = NULL;
EA::Thread::RunnableClassUserWrapper EA::Thread::Thread::sGlobalRunnableClassUserWrapper = NULL;
EA::Thread::AtomicInt32 EA::Thread::Thread::sDefaultProcessor = kProcessorAny;
EA::Thread::AtomicUint64 EA::Thread::Thread::sDefaultProcessorMask = UINT64_C(0xffffffffffffffff);
EA::Thread::RunnableFunctionUserWrapper EA::Thread::Thread::GetGlobalRunnableFunctionUserWrapper()
{
return sGlobalRunnableFunctionUserWrapper;
}
void EA::Thread::Thread::SetGlobalRunnableFunctionUserWrapper(EA::Thread::RunnableFunctionUserWrapper pUserWrapper)
{
if(sGlobalRunnableFunctionUserWrapper != NULL)
{
// Can only be set once in entire game.
EAT_ASSERT(false);
}
else
{
sGlobalRunnableFunctionUserWrapper = pUserWrapper;
}
}
EA::Thread::RunnableClassUserWrapper EA::Thread::Thread::GetGlobalRunnableClassUserWrapper()
{
return sGlobalRunnableClassUserWrapper;
}
void EA::Thread::Thread::SetGlobalRunnableClassUserWrapper(EA::Thread::RunnableClassUserWrapper pUserWrapper)
{
if(sGlobalRunnableClassUserWrapper != NULL)
{
// Can only be set once.
EAT_ASSERT(false);
}
else
sGlobalRunnableClassUserWrapper = pUserWrapper;
}
// Helper that selects a target processor based on the provided ThreadParameters structure and the various
// pieces of shared state that EAThread maintains to implement a 'round-robin' style processor selection.
int SelectProcessor(const EA::Thread::ThreadParameters* pTP, EA::Thread::AtomicInt32& sDefaultProcessor, EA::Thread::AtomicUint64& sDefaultProcessorMask)
{
int nProcessor;
if (pTP && (pTP->mnProcessor >= 0))
{
nProcessor = pTP->mnProcessor;
// This is a small attempt to try to spread out threads between processors. We don't
// care much if another thread happens to be created here and races with this.
if (nProcessor == EA::Thread::nLastProcessor)
++EA::Thread::nLastProcessor;
}
else
{
#if defined(EA_PLATFORM_MICROSOFT)
if (!pTP || pTP->mnProcessor == EA::Thread::kProcessorAny)
{
// If the processor is not specified, then allow the scheduler to
// run the thread on any available processor
nProcessor = EA::Thread::kProcessorDefault;
}
else
#endif
if (sDefaultProcessor >= 0) // If the user has identified a specific processor...
nProcessor = sDefaultProcessor;
else if(sDefaultProcessor == EA::Thread::kProcessorDefault) // If the user explicitly asked for the default processor
nProcessor = sDefaultProcessor;
else
{
// NOTE(rparolin): The reason we have this round-robin code is that the
// originally we used it on Xenon OS which required us to pick a CPU to run on.
// After the Xenon was deprecated this code remained and is now a functional
// requirement. We should probably deprecate and remove in the future but
// currently teams are dependent on it.
const uint64_t processorMask = sDefaultProcessorMask.GetValue();
do
{
nProcessor = EA::Thread::nLastProcessor.Increment();
if (nProcessor == MAXIMUM_PROCESSORS)
{
EA::Thread::nLastProcessor.SetValueConditional(0, MAXIMUM_PROCESSORS);
nProcessor = 0;
}
} while ((((uint64_t)1 << nProcessor) & processorMask) == 0);
}
}
return nProcessor;
}
EA::Thread::Thread::Thread()
{
mThreadData.mpData = NULL;
}
EA::Thread::Thread::Thread(const Thread& t)
: mThreadData(t.mThreadData)
{
if(mThreadData.mpData)
mThreadData.mpData->AddRef();
}
EA::Thread::Thread& EA::Thread::Thread::operator=(const Thread& t)
{
// We don't synchronize access to mpData; we assume that the user
// synchronizes it or this Thread instances is used from a single thread.
if(t.mThreadData.mpData)
t.mThreadData.mpData->AddRef();
if(mThreadData.mpData)
mThreadData.mpData->Release();
mThreadData = t.mThreadData;
return *this;
}
EA::Thread::Thread::~Thread()
{
// We don't synchronize access to mpData; we assume that the user
// synchronizes it or this Thread instances is used from a single thread.
if(mThreadData.mpData)
mThreadData.mpData->Release();
}
#if defined(EA_PLATFORM_XBOXONE)
static DWORD WINAPI RunnableFunctionInternal(void* pContext)
#else
static unsigned int __stdcall RunnableFunctionInternal(void* pContext)
#endif
{
// The parent thread is sharing memory with us and we need to
// make sure our view of it is synchronized with the parent.
EAReadWriteBarrier();
EAThreadDynamicData* const pTDD = (EAThreadDynamicData*)pContext;
EA::Thread::RunnableFunction pFunction = (EA::Thread::RunnableFunction)pTDD->mpStartContext[0];
void* pCallContext = pTDD->mpStartContext[1];
EA::Thread::SetCurrentThreadHandle(pTDD->mpStartContext[2], false);
pTDD->mpStackBase = EA::Thread::GetStackBase();
pTDD->mnStatus = EA::Thread::Thread::kStatusRunning;
EA::Thread::SetThreadName(pTDD->mhThread, pTDD->mName);
if(pTDD->mpBeginThreadUserWrapper != NULL)
{
EA::Thread::RunnableFunctionUserWrapper pWrapperFunction = (EA::Thread::RunnableFunctionUserWrapper)pTDD->mpBeginThreadUserWrapper;
// if user wrapper is specified, call user wrapper and pass down the pFunction and pContext
pTDD->mnReturnValue = pWrapperFunction(pFunction, pCallContext);
}
else
{
pTDD->mnReturnValue = pFunction(pCallContext);
}
const unsigned int nReturnValue = (unsigned int)pTDD->mnReturnValue;
EA::Thread::SetCurrentThreadHandle(0, false);
pTDD->mnStatus = EA::Thread::Thread::kStatusEnded;
pTDD->Release();
return nReturnValue;
}
void EA::Thread::Thread::SetAffinityMask(EA::Thread::ThreadAffinityMask nAffinityMask)
{
if(mThreadData.mpData->mhThread)
{
EA::Thread::SetThreadAffinityMask(mThreadData.mpData->mhThread, nAffinityMask);
}
}
EA::Thread::ThreadAffinityMask EA::Thread::Thread::GetAffinityMask()
{
if(mThreadData.mpData->mhThread)
{
return mThreadData.mpData->mnThreadAffinityMask;
}
return kThreadAffinityMaskAny;
}
EA::Thread::ThreadId EA::Thread::Thread::Begin(RunnableFunction pFunction, void* pContext, const ThreadParameters* pTP, RunnableFunctionUserWrapper pUserWrapper)
{
// Check there is an entry for the current thread context in our ThreadDynamicData array.
ThreadId thisThreadId = GetThreadId();
if(!FindThreadDynamicData(thisThreadId))
{
EAThreadDynamicData* pData = new(AllocateThreadDynamicData()) EAThreadDynamicData;
if(pData)
{
pData->AddRef(); // AddRef for ourselves, to be released upon this Thread class being deleted or upon Begin being called again for a new thread.
// Do no AddRef for thread execution because this is not an EAThread managed thread.
pData->AddRef(); // AddRef for this function, to be released upon this function's exit.
pData->mhThread = thisThreadId;
pData->mnThreadId = GetCurrentThreadId();
strncpy(pData->mName, "external", EATHREAD_NAME_SIZE);
pData->mName[EATHREAD_NAME_SIZE - 1] = 0;
pData->mpStackBase = EA::Thread::GetStackBase();
}
}
if(mThreadData.mpData)
mThreadData.mpData->Release(); // Matches the "AddRef for ourselves" below.
// Win32-like platforms don't support user-supplied stacks. A user-supplied stack pointer
// here would be a waste of user memory, and so we assert that mpStack == NULL.
EAT_ASSERT(!pTP || (pTP->mpStack == NULL));
// We use the pData temporary throughout this function because it's possible that mThreadData.mpData could be
// modified as we are executing, in particular in the case that mThreadData.mpData is destroyed and changed
// during execution.
EAThreadDynamicData* pData = new(AllocateThreadDynamicData()) EAThreadDynamicData; // Note that we use a special new here which doesn't use the heap.
mThreadData.mpData = pData;
pData->AddRef(); // AddRef for ourselves, to be released upon this Thread class being deleted or upon Begin being called again for a new thread.
pData->AddRef(); // AddRef for the thread, to be released upon the thread exiting.
pData->AddRef(); // AddRef for this function, to be released upon this function's exit.
pData->mhThread = kThreadIdInvalid;
pData->mpStartContext[0] = pFunction;
pData->mpStartContext[1] = pContext;
pData->mpBeginThreadUserWrapper = pUserWrapper;
pData->mnThreadAffinityMask = pTP ? pTP->mnAffinityMask : kThreadAffinityMaskAny;
const unsigned nStackSize = pTP ? (unsigned)pTP->mnStackSize : 0;
#if defined(EA_PLATFORM_XBOXONE)
// Capilano no longer supports _beginthreadex. Using CreateThread instead may cause issues when using the MS CRT
// according to MSDN (memory leaks or possibly crashes) as it does not initialize the CRT. This a reasonable
// workaround while we wait for clarification from MS on what the recommended threading APIs are for Capilano.
HANDLE hThread = CreateThread(NULL, nStackSize, RunnableFunctionInternal, pData, CREATE_SUSPENDED, reinterpret_cast<LPDWORD>(&pData->mnThreadId));
#else
HANDLE hThread = (HANDLE)_beginthreadex(NULL, nStackSize, RunnableFunctionInternal, pData, CREATE_SUSPENDED, &pData->mnThreadId);
#endif
if(hThread)
{
pData->mhThread = hThread;
if(pTP)
SetName(pTP->mpName);
pData->mpStartContext[2] = hThread;
if(pTP && (pTP->mnPriority != kThreadPriorityDefault))
SetPriority(pTP->mnPriority);
#if defined(EA_PLATFORM_WINDOWS) || defined(EA_PLATFORM_XBOXONE)
if (pTP)
{
auto result = SetThreadPriorityBoost(pData->mhThread, pTP->mbDisablePriorityBoost);
EAT_ASSERT(result != 0);
EA_UNUSED(result);
}
#endif
#if defined(EA_PLATFORM_MICROSOFT)
int nProcessor = SelectProcessor(pTP, sDefaultProcessor, sDefaultProcessorMask);
if(pTP && pTP->mnProcessor == EA::Thread::kProcessorAny)
SetAffinityMask(pTP->mnAffinityMask);
else
SetProcessor(nProcessor);
#endif
ResumeThread(hThread);
pData->Release(); // Matches AddRef for this function.
return hThread;
}
pData->Release(); // Matches AddRef for this function.
pData->Release(); // Matches AddRef for this Thread class above.
pData->Release(); // Matches AddRef for the thread above.
mThreadData.mpData = NULL; // mThreadData.mpData == pData
return (ThreadId)kThreadIdInvalid;
}
#if defined(EA_PLATFORM_XBOXONE)
static DWORD WINAPI RunnableObjectInternal(void* pContext)
#else
static unsigned int __stdcall RunnableObjectInternal(void* pContext)
#endif
{
// The parent thread is sharing memory with us and we need to
// make sure our view of it is synchronized with the parent.
EAReadWriteBarrier();
EAThreadDynamicData* const pTDD = (EAThreadDynamicData*)pContext;
EA::Thread::IRunnable* pRunnable = (EA::Thread::IRunnable*)pTDD->mpStartContext[0];
void* pCallContext = pTDD->mpStartContext[1];
EA::Thread::SetCurrentThreadHandle(pTDD->mpStartContext[2], false);
pTDD->mnStatus = EA::Thread::Thread::kStatusRunning;
EA::Thread::SetThreadName(pTDD->mhThread, pTDD->mName);
if(pTDD->mpBeginThreadUserWrapper)
{
EA::Thread::RunnableClassUserWrapper pWrapperClass = (EA::Thread::RunnableClassUserWrapper)pTDD->mpBeginThreadUserWrapper;
// if user wrapper is specified, call user wrapper and pass down the pFunction and pContext
pTDD->mnReturnValue = pWrapperClass(pRunnable, pCallContext);
}
else
pTDD->mnReturnValue = pRunnable->Run(pCallContext);
const unsigned int nReturnValue = (unsigned int)pTDD->mnReturnValue;
EA::Thread::SetCurrentThreadHandle(0, false);
pTDD->mnStatus = EA::Thread::Thread::kStatusEnded;
pTDD->Release();
return nReturnValue;
}
EA::Thread::ThreadId EA::Thread::Thread::Begin(IRunnable* pRunnable, void* pContext, const ThreadParameters* pTP, RunnableClassUserWrapper pUserWrapper)
{
if(mThreadData.mpData)
mThreadData.mpData->Release(); // Matches the "AddRef for ourselves" below.
// Win32-like platforms don't support user-supplied stacks. A user-supplied stack pointer
// here would be a waste of user memory, and so we assert that mpStack == NULL.
EAT_ASSERT(!pTP || (pTP->mpStack == NULL));
// We use the pData temporary throughout this function because it's possible that mThreadData.mpData could be
// modified as we are executing, in particular in the case that mThreadData.mpData is destroyed and changed
// during execution.
EAThreadDynamicData* pData = new(AllocateThreadDynamicData()) EAThreadDynamicData; // Note that we use a special new here which doesn't use the heap.
mThreadData.mpData = pData;
pData->AddRef(); // AddRef for ourselves, to be released upon this Thread class being deleted or upon Begin being called again for a new thread.
pData->AddRef(); // AddRef for the thread, to be released upon the thread exiting.
pData->AddRef(); // AddRef for this function, to be released upon this function's exit.
pData->mhThread = kThreadIdInvalid;
pData->mpStartContext[0] = pRunnable;
pData->mpStartContext[1] = pContext;
pData->mpBeginThreadUserWrapper = pUserWrapper;
pData->mnThreadAffinityMask = pTP ? pTP->mnAffinityMask : kThreadAffinityMaskAny;
const unsigned nStackSize = pTP ? (unsigned)pTP->mnStackSize : 0;
#if defined(EA_PLATFORM_XBOXONE)
// Capilano no longer supports _beginthreadex. Using CreateThread instead may cause issues when using the MS CRT
// according to MSDN (memory leaks or possibly crashes) as it does not initialize the CRT. This a reasonable
// workaround while we wait for clarification from MS on what the recommended threading APIs are for Capilano.
HANDLE hThread = CreateThread(NULL, nStackSize, RunnableObjectInternal, pData, CREATE_SUSPENDED, reinterpret_cast<LPDWORD>(&pData->mnThreadId));
#else
HANDLE hThread = (HANDLE)_beginthreadex(NULL, nStackSize, RunnableObjectInternal, pData, CREATE_SUSPENDED, &pData->mnThreadId);
#endif
if(hThread)
{
pData->mhThread = hThread;
if(pTP)
SetName(pTP->mpName);
pData->mpStartContext[2] = hThread;
if(pTP && (pTP->mnPriority != kThreadPriorityDefault))
SetPriority(pTP->mnPriority);
#if defined(EA_PLATFORM_WINDOWS) || defined(EA_PLATFORM_XBOXONE)
if (pTP)
{
auto result = SetThreadPriorityBoost(pData->mhThread, pTP->mbDisablePriorityBoost);
EAT_ASSERT(result != 0);
EA_UNUSED(result);
}
#endif
#if defined(EA_PLATFORM_MICROSOFT)
int nProcessor = SelectProcessor(pTP, sDefaultProcessor, sDefaultProcessorMask);
if(pTP && pTP->mnProcessor == EA::Thread::kProcessorAny)
SetAffinityMask(pTP->mnAffinityMask);
else
SetProcessor(nProcessor);
#endif
ResumeThread(hThread); // This will unsuspend the thread.
pData->Release(); // Matches AddRef for this function.
return hThread;
}
pData->Release(); // Matches AddRef for this function.
pData->Release(); // Matches AddRef for this Thread class above.
pData->Release(); // Matches AddRef for the thread above.
mThreadData.mpData = NULL;
return (ThreadId)kThreadIdInvalid;
}
EA::Thread::Thread::Status EA::Thread::Thread::WaitForEnd(const ThreadTime& timeoutAbsolute, intptr_t* pThreadReturnValue)
{
// The mThreadData memory is shared between threads and when
// reading it we must be synchronized.
EAReadWriteBarrier();
// A mutex lock around mpData is not needed below because
// mpData is never allowed to go from non-NULL to NULL.
// Todo: Consider that there may be a subtle race condition here if
// the user immediately calls WaitForEnd right after calling Begin.
if(mThreadData.mpData)
{
if(mThreadData.mpData->mhThread) // If it was started...
{
// We must not call WaitForEnd from the thread we are waiting to end. That would result in a deadlock.
EAT_ASSERT(mThreadData.mpData->mhThread != EA::Thread::GetThreadId());
// dwResult normally should be 'WAIT_OBJECT_0', but can also be WAIT_ABANDONED or WAIT_FAILED.
const DWORD dwResult = ::WaitForSingleObject(mThreadData.mpData->mhThread, RelativeTimeoutFromAbsoluteTimeout(timeoutAbsolute));
if(dwResult == WAIT_TIMEOUT)
return kStatusRunning;
// Close the handle now so as to minimize handle proliferation.
::CloseHandle(mThreadData.mpData->mhThread);
mThreadData.mpData->mhThread = 0;
mThreadData.mpData->mnStatus = kStatusEnded;
}
if(pThreadReturnValue)
{
EAReadWriteBarrier();
*pThreadReturnValue = mThreadData.mpData->mnReturnValue;
}
return kStatusEnded; // A thread was created, so it must have ended.
}
else
{
// Else the user hasn't started the thread yet, so we wait until the user starts it.
// Ideally, what we really want to do here is wait for some kind of signal.
// Instead for the time being we do a polling loop.
while((!mThreadData.mpData || !mThreadData.mpData->mhThread) && (GetThreadTime() < timeoutAbsolute))
{
ThreadSleep(1);
EAReadWriteBarrier();
EACompilerMemoryBarrier();
}
if(mThreadData.mpData)
return WaitForEnd(timeoutAbsolute);
}
return kStatusNone; // No thread has been started.
}
EA::Thread::Thread::Status EA::Thread::Thread::GetStatus(intptr_t* pThreadReturnValue) const
{
// The mThreadData memory is shared between threads and when
// reading it we must be synchronized.
EAReadWriteBarrier();
// A mutex lock around mpData is not needed below because
// mpData is never allowed to go from non-NULL to NULL.
if(mThreadData.mpData)
{
if(mThreadData.mpData->mhThread) // If the thread has been started...
{
DWORD dwExitStatus;
// Note that GetExitCodeThread is a hazard if the user of a thread exits
// with a return value that is equal to the value of STILL_ACTIVE (i.e. 259).
// We can document that users shouldn't do this, or we can change the code
// here to use WaitForSingleObject(hThread, 0) and assume the thread is
// still active if the return value is WAIT_TIMEOUT.
if(::GetExitCodeThread(mThreadData.mpData->mhThread, &dwExitStatus))
{
if(dwExitStatus == STILL_ACTIVE)
return kStatusRunning; // Nothing has changed.
::CloseHandle(mThreadData.mpData->mhThread); // Do this now so as to minimize handle proliferation.
mThreadData.mpData->mhThread = 0;
} // else fall through.
} // else fall through.
if(pThreadReturnValue)
*pThreadReturnValue = mThreadData.mpData->mnReturnValue;
mThreadData.mpData->mnStatus = kStatusEnded;
return kStatusEnded;
}
return kStatusNone;
}
EA::Thread::ThreadId EA::Thread::Thread::GetId() const
{
// A mutex lock around mpData is not needed below because
// mpData is never allowed to go from non-NULL to NULL.
if(mThreadData.mpData)
return (ThreadId)mThreadData.mpData->mhThread;
return kThreadIdInvalid;
}
int EA::Thread::Thread::GetPriority() const
{
// A mutex lock around mpData is not needed below because
// mpData is never allowed to go from non-NULL to NULL.
if(mThreadData.mpData)
{
const int nPriority = ::GetThreadPriority(mThreadData.mpData->mhThread);
return kThreadPriorityDefault + (nPriority - THREAD_PRIORITY_NORMAL);
}
return kThreadPriorityUnknown;
}
bool EA::Thread::Thread::SetPriority(int nPriority)
{
// A mutex lock around mpData is not needed below because
// mpData is never allowed to go from non-NULL to NULL.
// For more information on how Windows handle thread priority based on process priority, see
// http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dllproc/base/scheduling_priorities.asp
EAT_ASSERT(nPriority != kThreadPriorityUnknown);
if(mThreadData.mpData)
{
int nNewPriority = THREAD_PRIORITY_NORMAL + (nPriority - kThreadPriorityDefault);
bool result = ::SetThreadPriority(mThreadData.mpData->mhThread, nNewPriority) != 0;
// Windows process running in NORMAL_PRIORITY_CLASS is picky about the priority passed in.
// So we need to set the priority to the next priority supported
#if defined(EA_PLATFORM_WINDOWS) || defined(EA_PLATFORM_XBOXONE)
while(!result)
{
if(nNewPriority >= THREAD_PRIORITY_TIME_CRITICAL)
return ::SetThreadPriority(mThreadData.mpData->mhThread, THREAD_PRIORITY_TIME_CRITICAL) != 0;
if(nNewPriority <= THREAD_PRIORITY_IDLE)
return ::SetThreadPriority(mThreadData.mpData->mhThread, THREAD_PRIORITY_IDLE) != 0;
result = ::SetThreadPriority(mThreadData.mpData->mhThread, nNewPriority) != 0;
nNewPriority++;
}
#endif
return result;
}
return false;
}
void EA::Thread::Thread::SetProcessor(int nProcessor)
{
if(mThreadData.mpData)
{
#if defined(EA_PLATFORM_XBOXONE)
static int nProcessorCount = GetProcessorCount();
if(nProcessor >= nProcessorCount)
nProcessor %= nProcessorCount;
ThreadAffinityMask mask = 0x7F; // default to all 7 available cores.
if (nProcessor >= 0)
mask = ((ThreadAffinityMask)1) << nProcessor;
SetThreadAffinityMask(mThreadData.mpData->mhThread, mask);
#else
static int nProcessorCount = GetProcessorCount();
if(nProcessor < 0)
nProcessor = MAXIMUM_PROCESSORS; // This causes the SetThreadIdealProcessor to reset to 'no ideal processor'.
else
{
if(nProcessor >= nProcessorCount)
nProcessor %= nProcessorCount;
}
// SetThreadIdealProcessor differs from SetThreadAffinityMask in that SetThreadIdealProcessor is not
// a strict assignment, and it allows the OS to move the thread if the ideal processor is busy.
// SetThreadAffinityMask is a more rigid assignment, but it can result in slower performance and
// possibly hangs due to processor contention between threads. For Windows we use SetIdealThreadProcessor
// in the name of safety and likely better overall performance.
SetThreadIdealProcessor(mThreadData.mpData->mhThread, (DWORD)nProcessor);
#endif
}
}
typedef VOID (APIENTRY *PAPCFUNC)(_In_ ULONG_PTR dwParam);
extern "C" WINBASEAPI DWORD WINAPI QueueUserAPC(_In_ PAPCFUNC pfnAPC, _In_ HANDLE hThread, _In_ ULONG_PTR dwData);
void EA::Thread::Thread::Wake()
{
// A mutex lock around mpData is not needed below because
// mpData is never allowed to go from non-NULL to NULL.
struct ThreadWake{ static void WINAPI Empty(ULONG_PTR){} };
if(mThreadData.mpData && mThreadData.mpData->mhThread)
::QueueUserAPC((PAPCFUNC)ThreadWake::Empty, mThreadData.mpData->mhThread, 0);
}
const char* EA::Thread::Thread::GetName() const
{
if(mThreadData.mpData)
return mThreadData.mpData->mName;
return "";
}
void EA::Thread::Thread::SetName(const char* pName)
{
if (mThreadData.mpData && pName)
EA::Thread::Internal::SetThreadName(mThreadData.mpData, pName);
}
#endif // EA_PLATFORM_MICROSOFT
EA_RESTORE_VC_WARNING()
#if EA_COMPILER_VERSION >= 1900 // VS2015+
EA_RESTORE_VC_WARNING()// #pragma warning(pop): likely mismatch, popping warning state pushed in different file / detected #pragma warning(push) with no corresponding
#endif