|
| 1 | +// Copyright (c) 2013 Google Inc. All rights reserved. |
| 2 | +// |
| 3 | +// Redistribution and use in source and binary forms, with or without |
| 4 | +// modification, are permitted provided that the following conditions are |
| 5 | +// met: |
| 6 | +// |
| 7 | +// * Redistributions of source code must retain the above copyright |
| 8 | +// notice, this list of conditions and the following disclaimer. |
| 9 | +// * Redistributions in binary form must reproduce the above |
| 10 | +// copyright notice, this list of conditions and the following disclaimer |
| 11 | +// in the documentation and/or other materials provided with the |
| 12 | +// distribution. |
| 13 | +// * Neither the name of Google Inc. nor the name Chromium Embedded |
| 14 | +// Framework nor the names of its contributors may be used to endorse |
| 15 | +// or promote products derived from this software without specific prior |
| 16 | +// written permission. |
| 17 | +// |
| 18 | +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 | +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 | +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 | +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 | +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 | +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 | +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 | +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 | +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 | +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 | +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | +// |
| 30 | +// Do not include this header file directly. Use base/cef_atomicops.h |
| 31 | +// instead. |
| 32 | +// |
| 33 | +// LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears. |
| 34 | + |
| 35 | +#ifndef CEF_INCLUDE_BASE_INTERNAL_CEF_ATOMICOPS_ARM_GCC_H_ |
| 36 | +#define CEF_INCLUDE_BASE_INTERNAL_CEF_ATOMICOPS_ARM_GCC_H_ |
| 37 | + |
| 38 | +#if defined(OS_QNX) |
| 39 | +#include <sys/cpuinline.h> |
| 40 | +#endif |
| 41 | + |
| 42 | +namespace base { |
| 43 | +namespace subtle { |
| 44 | + |
| 45 | +// Memory barriers on ARM are funky, but the kernel is here to help: |
| 46 | +// |
| 47 | +// * ARMv5 didn't support SMP, there is no memory barrier instruction at |
| 48 | +// all on this architecture, or when targeting its machine code. |
| 49 | +// |
| 50 | +// * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by |
| 51 | +// writing a random value to a very specific coprocessor register. |
| 52 | +// |
| 53 | +// * On ARMv7, the "dmb" instruction is used to perform a full memory |
| 54 | +// barrier (though writing to the co-processor will still work). |
| 55 | +// However, on single core devices (e.g. Nexus One, or Nexus S), |
| 56 | +// this instruction will take up to 200 ns, which is huge, even though |
| 57 | +// it's completely un-needed on these devices. |
| 58 | +// |
| 59 | +// * There is no easy way to determine at runtime if the device is |
| 60 | +// single or multi-core. However, the kernel provides a useful helper |
| 61 | +// function at a fixed memory address (0xffff0fa0), which will always |
| 62 | +// perform a memory barrier in the most efficient way. I.e. on single |
| 63 | +// core devices, this is an empty function that exits immediately. |
| 64 | +// On multi-core devices, it implements a full memory barrier. |
| 65 | +// |
| 66 | +// * This source could be compiled to ARMv5 machine code that runs on a |
| 67 | +// multi-core ARMv6 or ARMv7 device. In this case, memory barriers |
| 68 | +// are needed for correct execution. Always call the kernel helper, even |
| 69 | +// when targeting ARMv5TE. |
| 70 | +// |
| 71 | + |
| 72 | +inline void MemoryBarrier() { |
| 73 | +#if defined(OS_LINUX) || defined(OS_ANDROID) |
| 74 | + // Note: This is a function call, which is also an implicit compiler barrier. |
| 75 | + typedef void (*KernelMemoryBarrierFunc)(); |
| 76 | + ((KernelMemoryBarrierFunc)0xffff0fa0)(); |
| 77 | +#elif defined(OS_QNX) |
| 78 | + __cpu_membarrier(); |
| 79 | +#else |
| 80 | +#error MemoryBarrier() is not implemented on this platform. |
| 81 | +#endif |
| 82 | +} |
| 83 | + |
| 84 | +// An ARM toolchain would only define one of these depending on which |
| 85 | +// variant of the target architecture is being used. This tests against |
| 86 | +// any known ARMv6 or ARMv7 variant, where it is possible to directly |
| 87 | +// use ldrex/strex instructions to implement fast atomic operations. |
| 88 | +#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ |
| 89 | + defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \ |
| 90 | + defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ |
| 91 | + defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ |
| 92 | + defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) |
| 93 | + |
| 94 | +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, |
| 95 | + Atomic32 old_value, |
| 96 | + Atomic32 new_value) { |
| 97 | + Atomic32 prev_value; |
| 98 | + int reloop; |
| 99 | + do { |
| 100 | + // The following is equivalent to: |
| 101 | + // |
| 102 | + // prev_value = LDREX(ptr) |
| 103 | + // reloop = 0 |
| 104 | + // if (prev_value != old_value) |
| 105 | + // reloop = STREX(ptr, new_value) |
| 106 | + __asm__ __volatile__( |
| 107 | + " ldrex %0, [%3]\n" |
| 108 | + " mov %1, #0\n" |
| 109 | + " cmp %0, %4\n" |
| 110 | +#ifdef __thumb2__ |
| 111 | + " it eq\n" |
| 112 | +#endif |
| 113 | + " strexeq %1, %5, [%3]\n" |
| 114 | + : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr) |
| 115 | + : "r"(ptr), "r"(old_value), "r"(new_value) |
| 116 | + : "cc", "memory"); |
| 117 | + } while (reloop != 0); |
| 118 | + return prev_value; |
| 119 | +} |
| 120 | + |
| 121 | +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, |
| 122 | + Atomic32 old_value, |
| 123 | + Atomic32 new_value) { |
| 124 | + Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
| 125 | + MemoryBarrier(); |
| 126 | + return result; |
| 127 | +} |
| 128 | + |
| 129 | +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, |
| 130 | + Atomic32 old_value, |
| 131 | + Atomic32 new_value) { |
| 132 | + MemoryBarrier(); |
| 133 | + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
| 134 | +} |
| 135 | + |
| 136 | +inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, |
| 137 | + Atomic32 increment) { |
| 138 | + Atomic32 value; |
| 139 | + int reloop; |
| 140 | + do { |
| 141 | + // Equivalent to: |
| 142 | + // |
| 143 | + // value = LDREX(ptr) |
| 144 | + // value += increment |
| 145 | + // reloop = STREX(ptr, value) |
| 146 | + // |
| 147 | + __asm__ __volatile__( |
| 148 | + " ldrex %0, [%3]\n" |
| 149 | + " add %0, %0, %4\n" |
| 150 | + " strex %1, %0, [%3]\n" |
| 151 | + : "=&r"(value), "=&r"(reloop), "+m"(*ptr) |
| 152 | + : "r"(ptr), "r"(increment) |
| 153 | + : "cc", "memory"); |
| 154 | + } while (reloop); |
| 155 | + return value; |
| 156 | +} |
| 157 | + |
| 158 | +inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, |
| 159 | + Atomic32 increment) { |
| 160 | + // TODO(digit): Investigate if it's possible to implement this with |
| 161 | + // a single MemoryBarrier() operation between the LDREX and STREX. |
| 162 | + // See http://crbug.com/246514 |
| 163 | + MemoryBarrier(); |
| 164 | + Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment); |
| 165 | + MemoryBarrier(); |
| 166 | + return result; |
| 167 | +} |
| 168 | + |
| 169 | +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, |
| 170 | + Atomic32 new_value) { |
| 171 | + Atomic32 old_value; |
| 172 | + int reloop; |
| 173 | + do { |
| 174 | + // old_value = LDREX(ptr) |
| 175 | + // reloop = STREX(ptr, new_value) |
| 176 | + __asm__ __volatile__( |
| 177 | + " ldrex %0, [%3]\n" |
| 178 | + " strex %1, %4, [%3]\n" |
| 179 | + : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr) |
| 180 | + : "r"(ptr), "r"(new_value) |
| 181 | + : "cc", "memory"); |
| 182 | + } while (reloop != 0); |
| 183 | + return old_value; |
| 184 | +} |
| 185 | + |
| 186 | +// This tests against any known ARMv5 variant. |
| 187 | +#elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \ |
| 188 | + defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) |
| 189 | + |
| 190 | +// The kernel also provides a helper function to perform an atomic |
| 191 | +// compare-and-swap operation at the hard-wired address 0xffff0fc0. |
| 192 | +// On ARMv5, this is implemented by a special code path that the kernel |
| 193 | +// detects and treats specially when thread pre-emption happens. |
| 194 | +// On ARMv6 and higher, it uses LDREX/STREX instructions instead. |
| 195 | +// |
| 196 | +// Note that this always perform a full memory barrier, there is no |
| 197 | +// need to add calls MemoryBarrier() before or after it. It also |
| 198 | +// returns 0 on success, and 1 on exit. |
| 199 | +// |
| 200 | +// Available and reliable since Linux 2.6.24. Both Android and ChromeOS |
| 201 | +// use newer kernel revisions, so this should not be a concern. |
| 202 | +namespace { |
| 203 | + |
| 204 | +inline int LinuxKernelCmpxchg(Atomic32 old_value, |
| 205 | + Atomic32 new_value, |
| 206 | + volatile Atomic32* ptr) { |
| 207 | + typedef int (*KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32*); |
| 208 | + return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr); |
| 209 | +} |
| 210 | + |
| 211 | +} // namespace |
| 212 | + |
| 213 | +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, |
| 214 | + Atomic32 old_value, |
| 215 | + Atomic32 new_value) { |
| 216 | + Atomic32 prev_value; |
| 217 | + for (;;) { |
| 218 | + prev_value = *ptr; |
| 219 | + if (prev_value != old_value) |
| 220 | + return prev_value; |
| 221 | + if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) |
| 222 | + return old_value; |
| 223 | + } |
| 224 | +} |
| 225 | + |
| 226 | +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, |
| 227 | + Atomic32 new_value) { |
| 228 | + Atomic32 old_value; |
| 229 | + do { |
| 230 | + old_value = *ptr; |
| 231 | + } while (LinuxKernelCmpxchg(old_value, new_value, ptr)); |
| 232 | + return old_value; |
| 233 | +} |
| 234 | + |
| 235 | +inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, |
| 236 | + Atomic32 increment) { |
| 237 | + return Barrier_AtomicIncrement(ptr, increment); |
| 238 | +} |
| 239 | + |
| 240 | +inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, |
| 241 | + Atomic32 increment) { |
| 242 | + for (;;) { |
| 243 | + // Atomic exchange the old value with an incremented one. |
| 244 | + Atomic32 old_value = *ptr; |
| 245 | + Atomic32 new_value = old_value + increment; |
| 246 | + if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) { |
| 247 | + // The exchange took place as expected. |
| 248 | + return new_value; |
| 249 | + } |
| 250 | + // Otherwise, *ptr changed mid-loop and we need to retry. |
| 251 | + } |
| 252 | +} |
| 253 | + |
| 254 | +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, |
| 255 | + Atomic32 old_value, |
| 256 | + Atomic32 new_value) { |
| 257 | + Atomic32 prev_value; |
| 258 | + for (;;) { |
| 259 | + prev_value = *ptr; |
| 260 | + if (prev_value != old_value) { |
| 261 | + // Always ensure acquire semantics. |
| 262 | + MemoryBarrier(); |
| 263 | + return prev_value; |
| 264 | + } |
| 265 | + if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) |
| 266 | + return old_value; |
| 267 | + } |
| 268 | +} |
| 269 | + |
| 270 | +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, |
| 271 | + Atomic32 old_value, |
| 272 | + Atomic32 new_value) { |
| 273 | + // This could be implemented as: |
| 274 | + // MemoryBarrier(); |
| 275 | + // return NoBarrier_CompareAndSwap(); |
| 276 | + // |
| 277 | + // But would use 3 barriers per succesful CAS. To save performance, |
| 278 | + // use Acquire_CompareAndSwap(). Its implementation guarantees that: |
| 279 | + // - A succesful swap uses only 2 barriers (in the kernel helper). |
| 280 | + // - An early return due to (prev_value != old_value) performs |
| 281 | + // a memory barrier with no store, which is equivalent to the |
| 282 | + // generic implementation above. |
| 283 | + return Acquire_CompareAndSwap(ptr, old_value, new_value); |
| 284 | +} |
| 285 | + |
| 286 | +#else |
| 287 | +#error "Your CPU's ARM architecture is not supported yet" |
| 288 | +#endif |
| 289 | + |
| 290 | +// NOTE: Atomicity of the following load and store operations is only |
| 291 | +// guaranteed in case of 32-bit alignement of |ptr| values. |
| 292 | + |
| 293 | +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { |
| 294 | + *ptr = value; |
| 295 | +} |
| 296 | + |
| 297 | +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { |
| 298 | + *ptr = value; |
| 299 | + MemoryBarrier(); |
| 300 | +} |
| 301 | + |
| 302 | +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { |
| 303 | + MemoryBarrier(); |
| 304 | + *ptr = value; |
| 305 | +} |
| 306 | + |
| 307 | +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { |
| 308 | + return *ptr; |
| 309 | +} |
| 310 | + |
| 311 | +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { |
| 312 | + Atomic32 value = *ptr; |
| 313 | + MemoryBarrier(); |
| 314 | + return value; |
| 315 | +} |
| 316 | + |
| 317 | +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { |
| 318 | + MemoryBarrier(); |
| 319 | + return *ptr; |
| 320 | +} |
| 321 | + |
| 322 | +} // namespace base::subtle |
| 323 | +} // namespace base |
| 324 | + |
| 325 | +#endif // CEF_INCLUDE_BASE_INTERNAL_CEF_ATOMICOPS_ARM_GCC_H_ |
0 commit comments