ops_gcc_x86_dcas.hpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
  1. /*
  2. * Distributed under the Boost Software License, Version 1.0.
  3. * (See accompanying file LICENSE_1_0.txt or copy at
  4. * http://www.boost.org/LICENSE_1_0.txt)
  5. *
  6. * Copyright (c) 2009 Helge Bahmann
  7. * Copyright (c) 2012 Tim Blechmann
  8. * Copyright (c) 2014 - 2018 Andrey Semashev
  9. */
  10. /*!
  11. * \file atomic/detail/ops_gcc_x86_dcas.hpp
  12. *
  13. * This header contains implementation of the double-width CAS primitive for x86.
  14. */
  15. #ifndef BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_
  16. #define BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_
  17. #include <boost/cstdint.hpp>
  18. #include <boost/memory_order.hpp>
  19. #include <boost/atomic/detail/config.hpp>
  20. #include <boost/atomic/detail/storage_traits.hpp>
  21. #include <boost/atomic/detail/string_ops.hpp>
  22. #include <boost/atomic/capabilities.hpp>
  23. #ifdef BOOST_HAS_PRAGMA_ONCE
  24. #pragma once
  25. #endif
  26. namespace boost {
  27. namespace atomics {
  28. namespace detail {
  29. // Note: In the 32-bit PIC code guarded with BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX below we have to avoid using memory
  30. // operand constraints because the compiler may choose to use ebx as the base register for that operand. At least, clang
  31. // is known to do that. For this reason we have to pre-compute a pointer to storage and pass it in edi. For the same reason
  32. // we cannot save ebx to the stack with a mov instruction, so we use esi as a scratch register and restore it afterwards.
  33. // Alternatively, we could push/pop the register to the stack, but exchanging the registers is faster.
  34. // The need to pass a pointer in edi is a bit wasteful because normally the memory operand would use a base pointer
  35. // with an offset (e.g. `this` + offset). But unfortunately, there seems to be no way around it.
  36. #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B)
  37. template< bool Signed >
  38. struct gcc_dcas_x86
  39. {
  40. typedef typename storage_traits< 8u >::type storage_type;
  41. typedef uint32_t BOOST_ATOMIC_DETAIL_MAY_ALIAS aliasing_uint32_t;
  42. static BOOST_CONSTEXPR_OR_CONST std::size_t storage_size = 8u;
  43. static BOOST_CONSTEXPR_OR_CONST std::size_t storage_alignment = 8u;
  44. static BOOST_CONSTEXPR_OR_CONST bool is_signed = Signed;
  45. static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = true;
  46. static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true;
  47. static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
  48. {
  49. if (BOOST_LIKELY((((uint32_t)&storage) & 0x00000007) == 0u))
  50. {
  51. #if defined(__SSE__)
  52. typedef float xmm_t __attribute__((__vector_size__(16)));
  53. xmm_t xmm_scratch;
  54. __asm__ __volatile__
  55. (
  56. #if defined(__AVX__)
  57. "vmovq %[value], %[xmm_scratch]\n\t"
  58. "vmovq %[xmm_scratch], %[storage]\n\t"
  59. #elif defined(__SSE2__)
  60. "movq %[value], %[xmm_scratch]\n\t"
  61. "movq %[xmm_scratch], %[storage]\n\t"
  62. #else
  63. "xorps %[xmm_scratch], %[xmm_scratch]\n\t"
  64. "movlps %[value], %[xmm_scratch]\n\t"
  65. "movlps %[xmm_scratch], %[storage]\n\t"
  66. #endif
  67. : [storage] "=m" (storage), [xmm_scratch] "=x" (xmm_scratch)
  68. : [value] "m" (v)
  69. : "memory"
  70. );
  71. #else
  72. __asm__ __volatile__
  73. (
  74. "fildll %[value]\n\t"
  75. "fistpll %[storage]\n\t"
  76. : [storage] "=m" (storage)
  77. : [value] "m" (v)
  78. : "memory"
  79. );
  80. #endif
  81. }
  82. else
  83. {
  84. #if defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
  85. __asm__ __volatile__
  86. (
  87. "xchgl %%ebx, %%esi\n\t"
  88. "movl %%eax, %%ebx\n\t"
  89. "movl (%[dest]), %%eax\n\t"
  90. "movl 4(%[dest]), %%edx\n\t"
  91. ".align 16\n\t"
  92. "1: lock; cmpxchg8b (%[dest])\n\t"
  93. "jne 1b\n\t"
  94. "xchgl %%ebx, %%esi\n\t"
  95. :
  96. : "a" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage)
  97. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "edx", "memory"
  98. );
  99. #else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
  100. __asm__ __volatile__
  101. (
  102. "movl %[dest_lo], %%eax\n\t"
  103. "movl %[dest_hi], %%edx\n\t"
  104. ".align 16\n\t"
  105. "1: lock; cmpxchg8b %[dest_lo]\n\t"
  106. "jne 1b\n\t"
  107. : [dest_lo] "=m" (storage), [dest_hi] "=m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1])
  108. : [value_lo] "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32))
  109. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "eax", "edx", "memory"
  110. );
  111. #endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
  112. }
  113. }
  114. static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT
  115. {
  116. storage_type value;
  117. if (BOOST_LIKELY((((uint32_t)&storage) & 0x00000007) == 0u))
  118. {
  119. #if defined(__SSE__)
  120. typedef float xmm_t __attribute__((__vector_size__(16)));
  121. xmm_t xmm_scratch;
  122. __asm__ __volatile__
  123. (
  124. #if defined(__AVX__)
  125. "vmovq %[storage], %[xmm_scratch]\n\t"
  126. "vmovq %[xmm_scratch], %[value]\n\t"
  127. #elif defined(__SSE2__)
  128. "movq %[storage], %[xmm_scratch]\n\t"
  129. "movq %[xmm_scratch], %[value]\n\t"
  130. #else
  131. "xorps %[xmm_scratch], %[xmm_scratch]\n\t"
  132. "movlps %[storage], %[xmm_scratch]\n\t"
  133. "movlps %[xmm_scratch], %[value]\n\t"
  134. #endif
  135. : [value] "=m" (value), [xmm_scratch] "=x" (xmm_scratch)
  136. : [storage] "m" (storage)
  137. : "memory"
  138. );
  139. #else
  140. __asm__ __volatile__
  141. (
  142. "fildll %[storage]\n\t"
  143. "fistpll %[value]\n\t"
  144. : [value] "=m" (value)
  145. : [storage] "m" (storage)
  146. : "memory"
  147. );
  148. #endif
  149. }
  150. else
  151. {
  152. // Note that despite const qualification cmpxchg8b below may issue a store to the storage. The storage value
  153. // will not change, but this prevents the storage to reside in read-only memory.
  154. #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  155. uint32_t value_bits[2];
  156. // We don't care for comparison result here; the previous value will be stored into value anyway.
  157. // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b.
  158. __asm__ __volatile__
  159. (
  160. "movl %%ebx, %%eax\n\t"
  161. "movl %%ecx, %%edx\n\t"
  162. "lock; cmpxchg8b %[storage]\n\t"
  163. : "=&a" (value_bits[0]), "=&d" (value_bits[1])
  164. : [storage] "m" (storage)
  165. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  166. );
  167. BOOST_ATOMIC_DETAIL_MEMCPY(&value, value_bits, sizeof(value));
  168. #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  169. // We don't care for comparison result here; the previous value will be stored into value anyway.
  170. // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b.
  171. __asm__ __volatile__
  172. (
  173. "movl %%ebx, %%eax\n\t"
  174. "movl %%ecx, %%edx\n\t"
  175. "lock; cmpxchg8b %[storage]\n\t"
  176. : "=&A" (value)
  177. : [storage] "m" (storage)
  178. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  179. );
  180. #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  181. }
  182. return value;
  183. }
  184. static BOOST_FORCEINLINE bool compare_exchange_strong(
  185. storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT
  186. {
  187. #if defined(__clang__)
  188. // Clang cannot allocate eax:edx register pairs but it has sync intrinsics
  189. storage_type old_expected = expected;
  190. expected = __sync_val_compare_and_swap(&storage, old_expected, desired);
  191. return expected == old_expected;
  192. #elif defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
  193. bool success;
  194. #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
  195. __asm__ __volatile__
  196. (
  197. "xchgl %%ebx, %%esi\n\t"
  198. "lock; cmpxchg8b (%[dest])\n\t"
  199. "xchgl %%ebx, %%esi\n\t"
  200. : "+A" (expected), [success] "=@ccz" (success)
  201. : "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage)
  202. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  203. );
  204. #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
  205. __asm__ __volatile__
  206. (
  207. "xchgl %%ebx, %%esi\n\t"
  208. "lock; cmpxchg8b (%[dest])\n\t"
  209. "xchgl %%ebx, %%esi\n\t"
  210. "sete %[success]\n\t"
  211. : "+A" (expected), [success] "=qm" (success)
  212. : "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage)
  213. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  214. );
  215. #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
  216. return success;
  217. #else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
  218. bool success;
  219. #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
  220. __asm__ __volatile__
  221. (
  222. "lock; cmpxchg8b %[dest]\n\t"
  223. : "+A" (expected), [dest] "+m" (storage), [success] "=@ccz" (success)
  224. : "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32))
  225. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  226. );
  227. #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
  228. __asm__ __volatile__
  229. (
  230. "lock; cmpxchg8b %[dest]\n\t"
  231. "sete %[success]\n\t"
  232. : "+A" (expected), [dest] "+m" (storage), [success] "=qm" (success)
  233. : "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32))
  234. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  235. );
  236. #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
  237. return success;
  238. #endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
  239. }
  240. static BOOST_FORCEINLINE bool compare_exchange_weak(
  241. storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT
  242. {
  243. return compare_exchange_strong(storage, expected, desired, success_order, failure_order);
  244. }
  245. static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
  246. {
  247. #if defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
  248. #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  249. uint32_t old_bits[2];
  250. __asm__ __volatile__
  251. (
  252. "xchgl %%ebx, %%esi\n\t"
  253. "movl (%[dest]), %%eax\n\t"
  254. "movl 4(%[dest]), %%edx\n\t"
  255. ".align 16\n\t"
  256. "1: lock; cmpxchg8b (%[dest])\n\t"
  257. "jne 1b\n\t"
  258. "xchgl %%ebx, %%esi\n\t"
  259. : "=a" (old_bits[0]), "=d" (old_bits[1])
  260. : "S" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage)
  261. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  262. );
  263. storage_type old_value;
  264. BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value));
  265. return old_value;
  266. #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  267. storage_type old_value;
  268. __asm__ __volatile__
  269. (
  270. "xchgl %%ebx, %%esi\n\t"
  271. "movl (%[dest]), %%eax\n\t"
  272. "movl 4(%[dest]), %%edx\n\t"
  273. ".align 16\n\t"
  274. "1: lock; cmpxchg8b (%[dest])\n\t"
  275. "jne 1b\n\t"
  276. "xchgl %%ebx, %%esi\n\t"
  277. : "=A" (old_value)
  278. : "S" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage)
  279. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  280. );
  281. return old_value;
  282. #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  283. #else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
  284. #if defined(__MINGW32__) && ((__GNUC__+0) * 100 + (__GNUC_MINOR__+0)) < 407
  285. // MinGW gcc up to 4.6 has problems with allocating registers in the asm blocks below
  286. uint32_t old_bits[2];
  287. __asm__ __volatile__
  288. (
  289. "movl (%[dest]), %%eax\n\t"
  290. "movl 4(%[dest]), %%edx\n\t"
  291. ".align 16\n\t"
  292. "1: lock; cmpxchg8b (%[dest])\n\t"
  293. "jne 1b\n\t"
  294. : "=&a" (old_bits[0]), "=&d" (old_bits[1])
  295. : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "DS" (&storage)
  296. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  297. );
  298. storage_type old_value;
  299. BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value));
  300. return old_value;
  301. #elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  302. uint32_t old_bits[2];
  303. __asm__ __volatile__
  304. (
  305. "movl %[dest_lo], %%eax\n\t"
  306. "movl %[dest_hi], %%edx\n\t"
  307. ".align 16\n\t"
  308. "1: lock; cmpxchg8b %[dest_lo]\n\t"
  309. "jne 1b\n\t"
  310. : "=&a" (old_bits[0]), "=&d" (old_bits[1]), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1])
  311. : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32))
  312. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  313. );
  314. storage_type old_value;
  315. BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value));
  316. return old_value;
  317. #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  318. storage_type old_value;
  319. __asm__ __volatile__
  320. (
  321. "movl %[dest_lo], %%eax\n\t"
  322. "movl %[dest_hi], %%edx\n\t"
  323. ".align 16\n\t"
  324. "1: lock; cmpxchg8b %[dest_lo]\n\t"
  325. "jne 1b\n\t"
  326. : "=&A" (old_value), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1])
  327. : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32))
  328. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  329. );
  330. return old_value;
  331. #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  332. #endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
  333. }
  334. };
  335. #endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B)
  336. #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B)
  337. template< bool Signed >
  338. struct gcc_dcas_x86_64
  339. {
  340. typedef typename storage_traits< 16u >::type storage_type;
  341. typedef uint64_t BOOST_ATOMIC_DETAIL_MAY_ALIAS aliasing_uint64_t;
  342. static BOOST_CONSTEXPR_OR_CONST std::size_t storage_size = 16u;
  343. static BOOST_CONSTEXPR_OR_CONST std::size_t storage_alignment = 16u;
  344. static BOOST_CONSTEXPR_OR_CONST bool is_signed = Signed;
  345. static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = true;
  346. static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true;
  347. static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
  348. {
  349. __asm__ __volatile__
  350. (
  351. "movq %[dest_lo], %%rax\n\t"
  352. "movq %[dest_hi], %%rdx\n\t"
  353. ".align 16\n\t"
  354. "1: lock; cmpxchg16b %[dest_lo]\n\t"
  355. "jne 1b\n\t"
  356. : [dest_lo] "=m" (storage), [dest_hi] "=m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1])
  357. : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1])
  358. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "rax", "rdx", "memory"
  359. );
  360. }
  361. static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT
  362. {
  363. // Note that despite const qualification cmpxchg16b below may issue a store to the storage. The storage value
  364. // will not change, but this prevents the storage to reside in read-only memory.
  365. #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  366. uint64_t value_bits[2];
  367. // We don't care for comparison result here; the previous value will be stored into value anyway.
  368. // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b.
  369. __asm__ __volatile__
  370. (
  371. "movq %%rbx, %%rax\n\t"
  372. "movq %%rcx, %%rdx\n\t"
  373. "lock; cmpxchg16b %[storage]\n\t"
  374. : "=&a" (value_bits[0]), "=&d" (value_bits[1])
  375. : [storage] "m" (storage)
  376. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  377. );
  378. storage_type value;
  379. BOOST_ATOMIC_DETAIL_MEMCPY(&value, value_bits, sizeof(value));
  380. return value;
  381. #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  382. storage_type value;
  383. // We don't care for comparison result here; the previous value will be stored into value anyway.
  384. // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b.
  385. __asm__ __volatile__
  386. (
  387. "movq %%rbx, %%rax\n\t"
  388. "movq %%rcx, %%rdx\n\t"
  389. "lock; cmpxchg16b %[storage]\n\t"
  390. : "=&A" (value)
  391. : [storage] "m" (storage)
  392. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  393. );
  394. return value;
  395. #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  396. }
  397. static BOOST_FORCEINLINE bool compare_exchange_strong(
  398. storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT
  399. {
  400. #if defined(__clang__)
  401. // Clang cannot allocate rax:rdx register pairs but it has sync intrinsics
  402. storage_type old_expected = expected;
  403. expected = __sync_val_compare_and_swap(&storage, old_expected, desired);
  404. return expected == old_expected;
  405. #elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  406. // Some compilers can't allocate rax:rdx register pair either but also don't support 128-bit __sync_val_compare_and_swap
  407. bool success;
  408. __asm__ __volatile__
  409. (
  410. "lock; cmpxchg16b %[dest]\n\t"
  411. "sete %[success]\n\t"
  412. : [dest] "+m" (storage), "+a" (reinterpret_cast< aliasing_uint64_t* >(&expected)[0]), "+d" (reinterpret_cast< aliasing_uint64_t* >(&expected)[1]), [success] "=q" (success)
  413. : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1])
  414. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  415. );
  416. return success;
  417. #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  418. bool success;
  419. #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
  420. __asm__ __volatile__
  421. (
  422. "lock; cmpxchg16b %[dest]\n\t"
  423. : "+A" (expected), [dest] "+m" (storage), "=@ccz" (success)
  424. : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1])
  425. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  426. );
  427. #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
  428. __asm__ __volatile__
  429. (
  430. "lock; cmpxchg16b %[dest]\n\t"
  431. "sete %[success]\n\t"
  432. : "+A" (expected), [dest] "+m" (storage), [success] "=qm" (success)
  433. : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1])
  434. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  435. );
  436. #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
  437. return success;
  438. #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  439. }
  440. static BOOST_FORCEINLINE bool compare_exchange_weak(
  441. storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT
  442. {
  443. return compare_exchange_strong(storage, expected, desired, success_order, failure_order);
  444. }
  445. static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
  446. {
  447. #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  448. uint64_t old_bits[2];
  449. __asm__ __volatile__
  450. (
  451. "movq %[dest_lo], %%rax\n\t"
  452. "movq %[dest_hi], %%rdx\n\t"
  453. ".align 16\n\t"
  454. "1: lock; cmpxchg16b %[dest_lo]\n\t"
  455. "jne 1b\n\t"
  456. : [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1]), "=&a" (old_bits[0]), "=&d" (old_bits[1])
  457. : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1])
  458. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  459. );
  460. storage_type old_value;
  461. BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value));
  462. return old_value;
  463. #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  464. storage_type old_value;
  465. __asm__ __volatile__
  466. (
  467. "movq %[dest_lo], %%rax\n\t"
  468. "movq %[dest_hi], %%rdx\n\t"
  469. ".align 16\n\t"
  470. "1: lock; cmpxchg16b %[dest_lo]\n\t"
  471. "jne 1b\n\t"
  472. : "=&A" (old_value), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1])
  473. : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1])
  474. : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
  475. );
  476. return old_value;
  477. #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
  478. }
  479. };
  480. #endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B)
  481. } // namespace detail
  482. } // namespace atomics
  483. } // namespace boost
  484. #endif // BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_