| /* |
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. |
| |
| This file is part of Threading Building Blocks. |
| |
| Threading Building Blocks is free software; you can redistribute it |
| and/or modify it under the terms of the GNU General Public License |
| version 2 as published by the Free Software Foundation. |
| |
| Threading Building Blocks is distributed in the hope that it will be |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with Threading Building Blocks; if not, write to the Free Software |
| Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
| |
| As a special exception, you may use this file as part of a free software |
| library without restriction. Specifically, if other files instantiate |
| templates or use macros or inline functions from this file, or you compile |
| this file and link it with other files to produce an executable, this |
| file does not by itself cause the resulting executable to be covered by |
| the GNU General Public License. This exception does not however |
| invalidate any other reasons why the executable file might be covered by |
| the GNU General Public License. |
| */ |
| |
| #ifndef __TBB_machine_H |
| #define __TBB_machine_H |
| |
| #include "tbb_stddef.h" |
| |
| #if _WIN32||_WIN64 |
| |
| #ifdef _MANAGED |
| #pragma managed(push, off) |
| #endif |
| |
| #if __MINGW32__ |
| #include "machine/linux_ia32.h" |
| extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void ); |
| #define __TBB_Yield() SwitchToThread() |
| #elif defined(_M_IX86) |
| #include "machine/windows_ia32.h" |
| #elif defined(_M_AMD64) |
| #include "machine/windows_intel64.h" |
| #elif _XBOX |
| #include "machine/xbox360_ppc.h" |
| #else |
| #error Unsupported platform |
| #endif |
| |
| #ifdef _MANAGED |
| #pragma managed(pop) |
| #endif |
| |
| #elif __linux__ || __FreeBSD__ |
| |
| #if __i386__ |
| #include "machine/linux_ia32.h" |
| #elif __x86_64__ |
| #include "machine/linux_intel64.h" |
| #elif __ia64__ |
| #include "machine/linux_ia64.h" |
| #endif |
| |
| #elif __APPLE__ |
| |
| #if __i386__ |
| #include "machine/linux_ia32.h" |
| #elif __x86_64__ |
| #include "machine/linux_intel64.h" |
| #elif __POWERPC__ |
| #include "machine/mac_ppc.h" |
| #endif |
| |
| #elif _AIX |
| |
| #include "machine/ibm_aix51.h" |
| |
| #elif __sun || __SUNPRO_CC |
| |
| #define __asm__ asm |
| #define __volatile__ volatile |
| #if __i386 || __i386__ |
| #include "machine/linux_ia32.h" |
| #elif __x86_64__ |
| #include "machine/linux_intel64.h" |
| #elif __sparc |
| #include "machine/sunos_sparc.h" |
| #endif |
| |
| #endif |
| |
| #if !defined(__TBB_CompareAndSwap4) \ |
| || !defined(__TBB_CompareAndSwap8) \ |
| || !defined(__TBB_Yield) \ |
| || !defined(__TBB_release_consistency_helper) |
| #error Minimal requirements for tbb_machine.h not satisfied |
| #endif |
| |
| #ifndef __TBB_load_with_acquire |
| //! Load with acquire semantics; i.e., no following memory operation can move above the load. |
| template<typename T> |
| inline T __TBB_load_with_acquire(const volatile T& location) { |
| T temp = location; |
| __TBB_release_consistency_helper(); |
| return temp; |
| } |
| #endif |
| |
| #ifndef __TBB_store_with_release |
| //! Store with release semantics; i.e., no prior memory operation can move below the store. |
| template<typename T, typename V> |
| inline void __TBB_store_with_release(volatile T& location, V value) { |
| __TBB_release_consistency_helper(); |
| location = T(value); |
| } |
| #endif |
| |
| #ifndef __TBB_Pause |
| inline void __TBB_Pause(int32_t) { |
| __TBB_Yield(); |
| } |
| #endif |
| |
| namespace tbb { |
| namespace internal { |
| |
| //! Class that implements exponential backoff. |
| /** See implementation of spin_wait_while_eq for an example. */ |
| class atomic_backoff { |
| //! Time delay, in units of "pause" instructions. |
| /** Should be equal to approximately the number of "pause" instructions |
| that take the same time as an context switch. */ |
| static const int32_t LOOPS_BEFORE_YIELD = 16; |
| int32_t count; |
| public: |
| atomic_backoff() : count(1) {} |
| |
| //! Pause for a while. |
| void pause() { |
| if( count<=LOOPS_BEFORE_YIELD ) { |
| __TBB_Pause(count); |
| // Pause twice as long the next time. |
| count*=2; |
| } else { |
| // Pause is so long that we might as well yield CPU to scheduler. |
| __TBB_Yield(); |
| } |
| } |
| |
| // pause for a few times and then return false immediately. |
| bool bounded_pause() { |
| if( count<=LOOPS_BEFORE_YIELD ) { |
| __TBB_Pause(count); |
| // Pause twice as long the next time. |
| count*=2; |
| return true; |
| } else { |
| return false; |
| } |
| } |
| |
| void reset() { |
| count = 1; |
| } |
| }; |
| |
| //! Spin WHILE the value of the variable is equal to a given value |
| /** T and U should be comparable types. */ |
| template<typename T, typename U> |
| void spin_wait_while_eq( const volatile T& location, U value ) { |
| atomic_backoff backoff; |
| while( location==value ) backoff.pause(); |
| } |
| |
| //! Spin UNTIL the value of the variable is equal to a given value |
| /** T and U should be comparable types. */ |
| template<typename T, typename U> |
| void spin_wait_until_eq( const volatile T& location, const U value ) { |
| atomic_backoff backoff; |
| while( location!=value ) backoff.pause(); |
| } |
| |
| // T should be unsigned, otherwise sign propagation will break correctness of bit manipulations. |
| // S should be either 1 or 2, for the mask calculation to work correctly. |
| // Together, these rules limit applicability of Masked CAS to unsigned char and unsigned short. |
| template<size_t S, typename T> |
| inline T __TBB_MaskedCompareAndSwap (volatile T *ptr, T value, T comparand ) { |
| volatile uint32_t * base = (uint32_t*)( (uintptr_t)ptr & ~(uintptr_t)0x3 ); |
| #if __TBB_BIG_ENDIAN |
| const uint8_t bitoffset = uint8_t( 8*( 4-S - (uintptr_t(ptr) & 0x3) ) ); |
| #else |
| const uint8_t bitoffset = uint8_t( 8*((uintptr_t)ptr & 0x3) ); |
| #endif |
| const uint32_t mask = ( (1<<(S*8)) - 1 )<<bitoffset; |
| atomic_backoff b; |
| uint32_t result; |
| for(;;) { |
| result = *base; // reload the base value which might change during the pause |
| uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset ); |
| uint32_t new_value = ( result & ~mask ) | ( value << bitoffset ); |
| // __TBB_CompareAndSwap4 presumed to have full fence. |
| result = __TBB_CompareAndSwap4( base, new_value, old_value ); |
| if( result==old_value // CAS succeeded |
| || ((result^old_value)&mask)!=0 ) // CAS failed and the bits of interest have changed |
| break; |
| else // CAS failed but the bits of interest left unchanged |
| b.pause(); |
| } |
| return T((result & mask) >> bitoffset); |
| } |
| |
| template<size_t S, typename T> |
| inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand ) { |
| return __TBB_CompareAndSwapW((T *)ptr,value,comparand); |
| } |
| |
| template<> |
| inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) { |
| #ifdef __TBB_CompareAndSwap1 |
| return __TBB_CompareAndSwap1(ptr,value,comparand); |
| #else |
| return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,value,comparand); |
| #endif |
| } |
| |
| template<> |
| inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *ptr, uint16_t value, uint16_t comparand ) { |
| #ifdef __TBB_CompareAndSwap2 |
| return __TBB_CompareAndSwap2(ptr,value,comparand); |
| #else |
| return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr,value,comparand); |
| #endif |
| } |
| |
| template<> |
| inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *ptr, uint32_t value, uint32_t comparand ) { |
| return __TBB_CompareAndSwap4(ptr,value,comparand); |
| } |
| |
| template<> |
| inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, uint64_t value, uint64_t comparand ) { |
| return __TBB_CompareAndSwap8(ptr,value,comparand); |
| } |
| |
| template<size_t S, typename T> |
| inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) { |
| atomic_backoff b; |
| T result; |
| for(;;) { |
| result = *reinterpret_cast<volatile T *>(ptr); |
| // __TBB_CompareAndSwapGeneric presumed to have full fence. |
| if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result ) |
| break; |
| b.pause(); |
| } |
| return result; |
| } |
| |
| template<size_t S, typename T> |
| inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) { |
| atomic_backoff b; |
| T result; |
| for(;;) { |
| result = *reinterpret_cast<volatile T *>(ptr); |
| // __TBB_CompareAndSwapGeneric presumed to have full fence. |
| if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result ) |
| break; |
| b.pause(); |
| } |
| return result; |
| } |
| |
| // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as |
| // strict as type T. Type type should have a trivial default constructor and destructor, so that |
| // arrays of that type can be declared without initializers. |
| // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands |
| // to a type bigger than T. |
| // The default definition here works on machines where integers are naturally aligned and the |
| // strictest alignment is 16. |
| #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict |
| |
| #if __GNUC__ || __SUNPRO_CC |
| struct __TBB_machine_type_with_strictest_alignment { |
| int member[4]; |
| } __attribute__((aligned(16))); |
| #elif _MSC_VER |
| __declspec(align(16)) struct __TBB_machine_type_with_strictest_alignment { |
| int member[4]; |
| }; |
| #else |
| #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T) or __TBB_machine_type_with_strictest_alignment |
| #endif |
| |
| template<size_t N> struct type_with_alignment {__TBB_machine_type_with_strictest_alignment member;}; |
| template<> struct type_with_alignment<1> { char member; }; |
| template<> struct type_with_alignment<2> { uint16_t member; }; |
| template<> struct type_with_alignment<4> { uint32_t member; }; |
| template<> struct type_with_alignment<8> { uint64_t member; }; |
| |
| #if _MSC_VER||defined(__GNUC__)&&__GNUC__==3 && __GNUC_MINOR__<=2 |
| //! Work around for bug in GNU 3.2 and MSVC compilers. |
| /** Bug is that compiler sometimes returns 0 for __alignof(T) when T has not yet been instantiated. |
| The work-around forces instantiation by forcing computation of sizeof(T) before __alignof(T). */ |
| template<size_t Size, typename T> |
| struct work_around_alignment_bug { |
| #if _MSC_VER |
| static const size_t alignment = __alignof(T); |
| #else |
| static const size_t alignment = __alignof__(T); |
| #endif |
| }; |
| #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment> |
| #elif __GNUC__ || __SUNPRO_CC |
| #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__alignof__(T)> |
| #else |
| #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) __TBB_machine_type_with_strictest_alignment |
| #endif |
| #endif /* ____TBB_TypeWithAlignmentAtLeastAsStrict */ |
| |
| // Template class here is to avoid instantiation of the static data for modules that don't use it |
| template<typename T> |
| struct reverse { |
| static const T byte_table[256]; |
| }; |
| // An efficient implementation of the reverse function utilizes a 2^8 lookup table holding the bit-reversed |
| // values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost. |
| template<typename T> |
| const T reverse<T>::byte_table[256] = { |
| 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, |
| 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, |
| 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, |
| 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, |
| 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, |
| 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, |
| 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, |
| 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, |
| 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, |
| 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, |
| 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, |
| 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, |
| 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, |
| 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, |
| 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, |
| 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF |
| }; |
| |
| } // namespace internal |
| } // namespace tbb |
| |
| #ifndef __TBB_CompareAndSwap1 |
| #define __TBB_CompareAndSwap1 tbb::internal::__TBB_CompareAndSwapGeneric<1,uint8_t> |
| #endif |
| |
| #ifndef __TBB_CompareAndSwap2 |
| #define __TBB_CompareAndSwap2 tbb::internal::__TBB_CompareAndSwapGeneric<2,uint16_t> |
| #endif |
| |
| #ifndef __TBB_CompareAndSwapW |
| #define __TBB_CompareAndSwapW tbb::internal::__TBB_CompareAndSwapGeneric<sizeof(ptrdiff_t),ptrdiff_t> |
| #endif |
| |
| #ifndef __TBB_FetchAndAdd1 |
| #define __TBB_FetchAndAdd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_t> |
| #endif |
| |
| #ifndef __TBB_FetchAndAdd2 |
| #define __TBB_FetchAndAdd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16_t> |
| #endif |
| |
| #ifndef __TBB_FetchAndAdd4 |
| #define __TBB_FetchAndAdd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32_t> |
| #endif |
| |
| #ifndef __TBB_FetchAndAdd8 |
| #define __TBB_FetchAndAdd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64_t> |
| #endif |
| |
| #ifndef __TBB_FetchAndAddW |
| #define __TBB_FetchAndAddW tbb::internal::__TBB_FetchAndAddGeneric<sizeof(ptrdiff_t),ptrdiff_t> |
| #endif |
| |
| #ifndef __TBB_FetchAndStore1 |
| #define __TBB_FetchAndStore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,uint8_t> |
| #endif |
| |
| #ifndef __TBB_FetchAndStore2 |
| #define __TBB_FetchAndStore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,uint16_t> |
| #endif |
| |
| #ifndef __TBB_FetchAndStore4 |
| #define __TBB_FetchAndStore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,uint32_t> |
| #endif |
| |
| #ifndef __TBB_FetchAndStore8 |
| #define __TBB_FetchAndStore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,uint64_t> |
| #endif |
| |
| #ifndef __TBB_FetchAndStoreW |
| #define __TBB_FetchAndStoreW tbb::internal::__TBB_FetchAndStoreGeneric<sizeof(ptrdiff_t),ptrdiff_t> |
| #endif |
| |
| #if __TBB_DECL_FENCED_ATOMICS |
| |
| #ifndef __TBB_CompareAndSwap1__TBB_full_fence |
| #define __TBB_CompareAndSwap1__TBB_full_fence __TBB_CompareAndSwap1 |
| #endif |
| #ifndef __TBB_CompareAndSwap1acquire |
| #define __TBB_CompareAndSwap1acquire __TBB_CompareAndSwap1__TBB_full_fence |
| #endif |
| #ifndef __TBB_CompareAndSwap1release |
| #define __TBB_CompareAndSwap1release __TBB_CompareAndSwap1__TBB_full_fence |
| #endif |
| |
| #ifndef __TBB_CompareAndSwap2__TBB_full_fence |
| #define __TBB_CompareAndSwap2__TBB_full_fence __TBB_CompareAndSwap2 |
| #endif |
| #ifndef __TBB_CompareAndSwap2acquire |
| #define __TBB_CompareAndSwap2acquire __TBB_CompareAndSwap2__TBB_full_fence |
| #endif |
| #ifndef __TBB_CompareAndSwap2release |
| #define __TBB_CompareAndSwap2release __TBB_CompareAndSwap2__TBB_full_fence |
| #endif |
| |
| #ifndef __TBB_CompareAndSwap4__TBB_full_fence |
| #define __TBB_CompareAndSwap4__TBB_full_fence __TBB_CompareAndSwap4 |
| #endif |
| #ifndef __TBB_CompareAndSwap4acquire |
| #define __TBB_CompareAndSwap4acquire __TBB_CompareAndSwap4__TBB_full_fence |
| #endif |
| #ifndef __TBB_CompareAndSwap4release |
| #define __TBB_CompareAndSwap4release __TBB_CompareAndSwap4__TBB_full_fence |
| #endif |
| |
| #ifndef __TBB_CompareAndSwap8__TBB_full_fence |
| #define __TBB_CompareAndSwap8__TBB_full_fence __TBB_CompareAndSwap8 |
| #endif |
| #ifndef __TBB_CompareAndSwap8acquire |
| #define __TBB_CompareAndSwap8acquire __TBB_CompareAndSwap8__TBB_full_fence |
| #endif |
| #ifndef __TBB_CompareAndSwap8release |
| #define __TBB_CompareAndSwap8release __TBB_CompareAndSwap8__TBB_full_fence |
| #endif |
| |
| #ifndef __TBB_FetchAndAdd1__TBB_full_fence |
| #define __TBB_FetchAndAdd1__TBB_full_fence __TBB_FetchAndAdd1 |
| #endif |
| #ifndef __TBB_FetchAndAdd1acquire |
| #define __TBB_FetchAndAdd1acquire __TBB_FetchAndAdd1__TBB_full_fence |
| #endif |
| #ifndef __TBB_FetchAndAdd1release |
| #define __TBB_FetchAndAdd1release __TBB_FetchAndAdd1__TBB_full_fence |
| #endif |
| |
| #ifndef __TBB_FetchAndAdd2__TBB_full_fence |
| #define __TBB_FetchAndAdd2__TBB_full_fence __TBB_FetchAndAdd2 |
| #endif |
| #ifndef __TBB_FetchAndAdd2acquire |
| #define __TBB_FetchAndAdd2acquire __TBB_FetchAndAdd2__TBB_full_fence |
| #endif |
| #ifndef __TBB_FetchAndAdd2release |
| #define __TBB_FetchAndAdd2release __TBB_FetchAndAdd2__TBB_full_fence |
| #endif |
| |
| #ifndef __TBB_FetchAndAdd4__TBB_full_fence |
| #define __TBB_FetchAndAdd4__TBB_full_fence __TBB_FetchAndAdd4 |
| #endif |
| #ifndef __TBB_FetchAndAdd4acquire |
| #define __TBB_FetchAndAdd4acquire __TBB_FetchAndAdd4__TBB_full_fence |
| #endif |
| #ifndef __TBB_FetchAndAdd4release |
| #define __TBB_FetchAndAdd4release __TBB_FetchAndAdd4__TBB_full_fence |
| #endif |
| |
| #ifndef __TBB_FetchAndAdd8__TBB_full_fence |
| #define __TBB_FetchAndAdd8__TBB_full_fence __TBB_FetchAndAdd8 |
| #endif |
| #ifndef __TBB_FetchAndAdd8acquire |
| #define __TBB_FetchAndAdd8acquire __TBB_FetchAndAdd8__TBB_full_fence |
| #endif |
| #ifndef __TBB_FetchAndAdd8release |
| #define __TBB_FetchAndAdd8release __TBB_FetchAndAdd8__TBB_full_fence |
| #endif |
| |
| #ifndef __TBB_FetchAndStore1__TBB_full_fence |
| #define __TBB_FetchAndStore1__TBB_full_fence __TBB_FetchAndStore1 |
| #endif |
| #ifndef __TBB_FetchAndStore1acquire |
| #define __TBB_FetchAndStore1acquire __TBB_FetchAndStore1__TBB_full_fence |
| #endif |
| #ifndef __TBB_FetchAndStore1release |
| #define __TBB_FetchAndStore1release __TBB_FetchAndStore1__TBB_full_fence |
| #endif |
| |
| #ifndef __TBB_FetchAndStore2__TBB_full_fence |
| #define __TBB_FetchAndStore2__TBB_full_fence __TBB_FetchAndStore2 |
| #endif |
| #ifndef __TBB_FetchAndStore2acquire |
| #define __TBB_FetchAndStore2acquire __TBB_FetchAndStore2__TBB_full_fence |
| #endif |
| #ifndef __TBB_FetchAndStore2release |
| #define __TBB_FetchAndStore2release __TBB_FetchAndStore2__TBB_full_fence |
| #endif |
| |
| #ifndef __TBB_FetchAndStore4__TBB_full_fence |
| #define __TBB_FetchAndStore4__TBB_full_fence __TBB_FetchAndStore4 |
| #endif |
| #ifndef __TBB_FetchAndStore4acquire |
| #define __TBB_FetchAndStore4acquire __TBB_FetchAndStore4__TBB_full_fence |
| #endif |
| #ifndef __TBB_FetchAndStore4release |
| #define __TBB_FetchAndStore4release __TBB_FetchAndStore4__TBB_full_fence |
| #endif |
| |
| #ifndef __TBB_FetchAndStore8__TBB_full_fence |
| #define __TBB_FetchAndStore8__TBB_full_fence __TBB_FetchAndStore8 |
| #endif |
| #ifndef __TBB_FetchAndStore8acquire |
| #define __TBB_FetchAndStore8acquire __TBB_FetchAndStore8__TBB_full_fence |
| #endif |
| #ifndef __TBB_FetchAndStore8release |
| #define __TBB_FetchAndStore8release __TBB_FetchAndStore8__TBB_full_fence |
| #endif |
| |
| #endif // __TBB_DECL_FENCED_ATOMICS |
| |
| // Special atomic functions |
| #ifndef __TBB_FetchAndAddWrelease |
| #define __TBB_FetchAndAddWrelease __TBB_FetchAndAddW |
| #endif |
| |
| #ifndef __TBB_FetchAndIncrementWacquire |
| #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1) |
| #endif |
| |
| #ifndef __TBB_FetchAndDecrementWrelease |
| #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1)) |
| #endif |
| |
| #if __TBB_WORDSIZE==4 |
| // On 32-bit platforms, "atomic.h" requires definition of __TBB_Store8 and __TBB_Load8 |
| #ifndef __TBB_Store8 |
| inline void __TBB_Store8 (volatile void *ptr, int64_t value) { |
| tbb::internal::atomic_backoff b; |
| for(;;) { |
| int64_t result = *(int64_t *)ptr; |
| if( __TBB_CompareAndSwap8(ptr,value,result)==result ) break; |
| b.pause(); |
| } |
| } |
| #endif |
| |
| #ifndef __TBB_Load8 |
| inline int64_t __TBB_Load8 (const volatile void *ptr) { |
| int64_t result = *(int64_t *)ptr; |
| result = __TBB_CompareAndSwap8((volatile void *)ptr,result,result); |
| return result; |
| } |
| #endif |
| #endif /* __TBB_WORDSIZE==4 */ |
| |
| #ifndef __TBB_Log2 |
| inline intptr_t __TBB_Log2( uintptr_t x ) { |
| if( x==0 ) return -1; |
| intptr_t result = 0; |
| uintptr_t tmp; |
| #if __TBB_WORDSIZE>=8 |
| if( (tmp = x>>32) ) { x=tmp; result += 32; } |
| #endif |
| if( (tmp = x>>16) ) { x=tmp; result += 16; } |
| if( (tmp = x>>8) ) { x=tmp; result += 8; } |
| if( (tmp = x>>4) ) { x=tmp; result += 4; } |
| if( (tmp = x>>2) ) { x=tmp; result += 2; } |
| return (x&2)? result+1: result; |
| } |
| #endif |
| |
| #ifndef __TBB_AtomicOR |
| inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) { |
| tbb::internal::atomic_backoff b; |
| for(;;) { |
| uintptr_t tmp = *(volatile uintptr_t *)operand; |
| uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp); |
| if( result==tmp ) break; |
| b.pause(); |
| } |
| } |
| #endif |
| |
| #ifndef __TBB_AtomicAND |
| inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) { |
| tbb::internal::atomic_backoff b; |
| for(;;) { |
| uintptr_t tmp = *(volatile uintptr_t *)operand; |
| uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp); |
| if( result==tmp ) break; |
| b.pause(); |
| } |
| } |
| #endif |
| |
| #ifndef __TBB_TryLockByte |
| inline bool __TBB_TryLockByte( unsigned char &flag ) { |
| return __TBB_CompareAndSwap1(&flag,1,0)==0; |
| } |
| #endif |
| |
| #ifndef __TBB_LockByte |
| inline uintptr_t __TBB_LockByte( unsigned char& flag ) { |
| if ( !__TBB_TryLockByte(flag) ) { |
| tbb::internal::atomic_backoff b; |
| do { |
| b.pause(); |
| } while ( !__TBB_TryLockByte(flag) ); |
| } |
| return 0; |
| } |
| #endif |
| |
| #ifndef __TBB_ReverseByte |
| inline unsigned char __TBB_ReverseByte(unsigned char src) { |
| return tbb::internal::reverse<unsigned char>::byte_table[src]; |
| } |
| #endif |
| |
| template<typename T> |
| T __TBB_ReverseBits(T src) |
| { |
| T dst; |
| unsigned char *original = (unsigned char *) &src; |
| unsigned char *reversed = (unsigned char *) &dst; |
| |
| for( int i = sizeof(T)-1; i >= 0; i-- ) |
| reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] ); |
| |
| return dst; |
| } |
| |
| #endif /* __TBB_machine_H */ |