xenium
lock_free_ref_count.hpp
1 //
2 // Copyright (c) 2018-2020 Manuel Pöter.
3 // Licensed under the MIT License. See LICENSE file in the project root for full license information.
4 //
5 
6 #ifndef LOCK_FREE_REF_COUNT_IMPL
7 #error "This is an impl file and must not be included directly!"
8 #endif
9 
10 #ifdef _MSC_VER
11 #pragma warning(push)
12 #pragma warning(disable: 4127) // conditional expression is constant
13 #endif
14 
15 namespace xenium { namespace reclamation {
16 
17  template<class Traits>
18  template<class T, std::size_t N, class Deleter>
19  class lock_free_ref_count<Traits>::enable_concurrent_ptr<T, N, Deleter>::free_list {
20  public:
21  T *pop() {
22  if (max_local_elements > 0)
23  if (auto result = local_free_list().pop())
24  return result;
25 
26  guard_ptr guard;
27 
28  while (true) {
29  // (1) - this acquire-load synchronizes-with the release-CAS (3)
30  guard = acquire_guard(head, std::memory_order_acquire);
31  if (guard.get() == nullptr)
32  return nullptr;
33 
34  // Note: ref_count can be anything here since multiple threads
35  // could have gotten a reference to the node on the freelist.
36  marked_ptr expected(guard);
37  auto next = guard->next_free().load(std::memory_order_relaxed);
38  // since head is only changed via CAS operations it is sufficient to use relaxed order
39  // for this operation as it is always part of a release-sequence headed by (3)
40  if (head.compare_exchange_weak(expected, next, std::memory_order_relaxed))
41  {
42  assert((guard->ref_count().load(std::memory_order_relaxed) & RefCountClaimBit) != 0 &&
43  "ClaimBit must be set for a node on the free list");
44 
45  auto ptr = guard.get();
46  ptr->ref_count().fetch_sub(RefCountClaimBit, std::memory_order_relaxed); // clear claim bit
47  ptr->next_free().store(nullptr, std::memory_order_relaxed);
48  guard.ptr.reset(); // reset guard_ptr to prevent decrement of ref_count
49  return ptr;
50  }
51  }
52  }
53 
54  void push(T *node) {
55  assert(node->ref_count().load(std::memory_order_relaxed) & RefCountClaimBit &&
56  "ClaimBit must be set for a node to be put on the free list");
57  if (max_local_elements > 0 && local_free_list().push(node))
58  return;
59 
60  add_nodes(node, node);
61  }
62 
63  private:
64  void add_nodes(T *first, T *last) {
65  // (2) - this acquire-load synchronizes-with the release-CAS (3)
66  auto old = head.load(std::memory_order_acquire);
67  do {
68  last->next_free().store(old, std::memory_order_relaxed);
69  // (3) - if this release-CAS succeeds, it synchronizes-with the acquire-loads (1, 2)
70  // if it failes, the reload synchronizes-with itself (3)
71  } while (!head.compare_exchange_weak(old, first, std::memory_order_release, std::memory_order_acquire));
72  }
73 
74  // the free list is implemented as a FILO single linked list
75  // the LSB of a node's ref_count acts as claim bit, so for all nodes on the free list the bit has to be set
76  concurrent_ptr <T, N> head;
77 
78  class thread_local_free_list {
79  public:
80  ~thread_local_free_list() noexcept {
81  if (head == nullptr)
82  return;
83  auto first = head;
84  auto last = head;
85  auto next = last->next_free().load(std::memory_order_relaxed);
86  while (next) {
87  last = next.get();
88  next = next->next_free().load(std::memory_order_relaxed);
89  }
90  global_free_list.add_nodes(first, last);
91  }
92 
93  bool push(T *node) {
94  if (number_of_elements >= max_local_elements)
95  return false;
96  node->next_free().store(head, std::memory_order_relaxed);
97  head = node;
98  ++number_of_elements;
99  return true;
100  }
101 
102  T *pop() {
103  auto result = head;
104  if (result) {
105  assert(number_of_elements > 0);
106  head = result->next_free().load(std::memory_order_relaxed).get();
107  --number_of_elements;
108  // clear claim bit and increment ref_count
109  result->ref_count().fetch_add(RefCountInc - RefCountClaimBit, std::memory_order_relaxed);
110  result->next_free().store(nullptr, std::memory_order_relaxed);
111  }
112  return result;
113  }
114 
115  private:
116  size_t number_of_elements = 0;
117  T *head = nullptr;
118  };
119 
120  static constexpr size_t max_local_elements = Traits::thread_local_free_list_size;
121 
122  static thread_local_free_list &local_free_list() {
123  // workaround for gcc issue causing redefinition of __tls_guard when
124  // defining this as static thread_local member of free_list.
125  alignas(64) static thread_local thread_local_free_list local_free_list;
126  return local_free_list;
127  }
128  };
129 
130  template<class Traits>
131  template<class T, std::size_t N, class Deleter>
132  void* lock_free_ref_count<Traits>::
133  enable_concurrent_ptr<T, N, Deleter>::operator new(size_t sz) {
134  assert(sz == sizeof(T) && "Cannot handle allocations of anything other than T instances");
135  T *result = global_free_list.pop();
136  if (result == nullptr) {
137  auto h = static_cast<header *>(::operator new(sz + sizeof(header)));
138  h->ref_count.store(RefCountInc, std::memory_order_release);
139  result = static_cast<T *>(static_cast<void *>(h + 1));
140  }
141 
142  return result;
143  }
144 
145  template<class Traits>
146  template<class T, std::size_t N, class Deleter>
147  void lock_free_ref_count<Traits>::
148  enable_concurrent_ptr<T, N, Deleter>::operator delete(void *p) {
149  auto node = static_cast<T *>(p);
150  assert((node->ref_count().load(std::memory_order_relaxed) & RefCountClaimBit) == 0);
151 
152  if (node->decrement_refcnt())
153  node->push_to_free_list();
154  }
155 
156  template<class Traits>
157  template<class T, std::size_t N, class Deleter>
158  bool lock_free_ref_count<Traits>::
159  enable_concurrent_ptr<T, N, Deleter>::decrement_refcnt() {
160  unsigned old_refcnt, new_refcnt;
161  do {
162  old_refcnt = ref_count().load(std::memory_order_relaxed);
163  new_refcnt = old_refcnt - RefCountInc;
164  if (new_refcnt == 0)
165  new_refcnt = RefCountClaimBit;
166  // (4) - this release/acquire CAS synchronizes with itself
167  } while (!ref_count().compare_exchange_weak(old_refcnt, new_refcnt,
168  new_refcnt == RefCountClaimBit
169  ? std::memory_order_acquire
170  : std::memory_order_release,
171  std::memory_order_relaxed));
172 
173  // free node iff ref_count is zero AND we're the first thread to "claim" this node for reclamation.
174  return (old_refcnt - new_refcnt) & RefCountClaimBit;
175  }
176 
177  template<class Traits>
178  template<class T, class MarkedPtr>
179  lock_free_ref_count<Traits>::
180  guard_ptr<T, MarkedPtr>::guard_ptr(const MarkedPtr &p) noexcept :
181  base(p) {
182  if (this->ptr.get() != nullptr)
183  this->ptr->ref_count().fetch_add(RefCountInc, std::memory_order_relaxed);
184  }
185 
186  template<class Traits>
187  template<class T, class MarkedPtr>
188  lock_free_ref_count<Traits>::
189  guard_ptr<T, MarkedPtr>::guard_ptr(const guard_ptr &p) noexcept :
190  guard_ptr(p.ptr) {}
191 
192  template<class Traits>
193  template<class T, class MarkedPtr>
194  lock_free_ref_count<Traits>::
195  guard_ptr<T, MarkedPtr>::guard_ptr(guard_ptr &&p) noexcept :
196  base(p.ptr) {
197  p.ptr.reset();
198  }
199 
200  template<class Traits>
201  template<class T, class MarkedPtr>
202  auto lock_free_ref_count<Traits>::
203  guard_ptr<T, MarkedPtr>::operator=(const guard_ptr &p)
204  -> guard_ptr & {
205  if (&p == this)
206  return *this;
207 
208  reset();
209  this->ptr = p.ptr;
210  if (this->ptr.get() != nullptr)
211  this->ptr->ref_count().fetch_add(RefCountInc, std::memory_order_relaxed);
212  return *this;
213  }
214 
215  template<class Traits>
216  template<class T, class MarkedPtr>
217  auto lock_free_ref_count<Traits>::
218  guard_ptr<T, MarkedPtr>::operator=(guard_ptr &&p) noexcept
219  -> guard_ptr & {
220  if (&p == this)
221  return *this;
222 
223  reset();
224  this->ptr = std::move(p.ptr);
225  p.ptr.reset();
226  return *this;
227  }
228 
229  template<class Traits>
230  template<class T, class MarkedPtr>
231  void lock_free_ref_count<Traits>::
232  guard_ptr<T, MarkedPtr>::acquire(const concurrent_ptr <T> &p, std::memory_order order) noexcept {
233  for (;;) {
234  reset();
235  // FIXME: If this load is relaxed, TSan reports a data race between the following
236  // fetch-add and the initialization of the ref_count field. I tend to disagree, as
237  // the fetch_add should be ordered after the initial store (operator new) in the
238  // modification order of ref_count. Therefore the acquire-fetch-add should
239  // synchronize-with the release store.
240  // I created a GitHub issue:
241  // But for now, let's make this an acquire-load to make TSan happy.
242  auto q = p.load(std::memory_order_acquire);
243  this->ptr = q;
244  if (q.get() == nullptr)
245  return;
246 
247  // (5) - this acquire-fetch_add synchronizes-with the release-fetch_sub (7)
248  // this ensures that a change to p becomes visible
249  q->ref_count().fetch_add(RefCountInc, std::memory_order_acquire);
250 
251  if (q == p.load(order))
252  return;
253  }
254  }
255 
256  template<class Traits>
257  template<class T, class MarkedPtr>
258  bool lock_free_ref_count<Traits>::
259  guard_ptr<T, MarkedPtr>::acquire_if_equal(
260  const concurrent_ptr <T> &p, const MarkedPtr &expected, std::memory_order order) noexcept {
261  reset();
262  // FIXME: same issue with TSan as in acquire (see above).
263  auto q = p.load(std::memory_order_acquire);
264  if (q != expected)
265  return false;
266 
267  this->ptr = q;
268  if (q.get() == nullptr)
269  return true;
270 
271  // (6) - this acquire-fetch_add synchronizes-with the release-fetch_sub (7)
272  // this ensures that a change to p becomes visible
273  q->ref_count().fetch_add(RefCountInc, std::memory_order_acquire);
274 
275  if (q == p.load(order))
276  return true;
277 
278  reset();
279  return false;
280  }
281 
282  template<class Traits>
283  template<class T, class MarkedPtr>
284  void lock_free_ref_count<Traits>::
285  guard_ptr<T, MarkedPtr>::reset() noexcept {
286  auto p = this->ptr.get();
287  this->ptr.reset();
288  if (p == nullptr)
289  return;
290 
291  if (p->decrement_refcnt()) {
292  if (!p->is_destroyed())
293  p->~T();
294 
295  p->push_to_free_list();
296  }
297  }
298 
299  template<class Traits>
300  template<class T, class MarkedPtr>
301  void lock_free_ref_count<Traits>::
302  guard_ptr<T, MarkedPtr>::reclaim(Deleter) noexcept {
303  if (this->ptr.get() != nullptr) {
304  assert(this->ptr->refs() > 1);
305  // ref_count was initialized with "1", so we need an additional
306  // decrement to ensure that the node gets reclaimed.
307  // ref_count cannot drop to zero here -> no check required.
308  // (7) - this release-fetch-sub synchronizes-with the acquire-fetch-add (5, 6)
309  this->ptr->ref_count().fetch_sub(RefCountInc, std::memory_order_release);
310  }
311  reset();
312  }
313 
314  template<class Traits>
315  template<class T, std::size_t N, class Deleter>
316  typename lock_free_ref_count<Traits>::
317  template enable_concurrent_ptr<T, N, Deleter>::free_list
318  lock_free_ref_count<Traits>::
319  enable_concurrent_ptr<T, N, Deleter>::global_free_list;
320 
321 #ifdef TRACK_ALLOCATIONS
322  template <class Traits>
323  inline detail::allocation_counter& lock_free_ref_count<Traits>::allocation_counter()
324  { return allocation_counter_; };
325 
326  template <class Traits>
327  inline void lock_free_ref_count<Traits>::count_allocation()
328  { allocation_counter().count_allocation(); }
329 
330  template <class Traits>
331  inline void lock_free_ref_count<Traits>::count_reclamation()
332  { allocation_counter().count_reclamation(); }
333 #endif
334 }}
335 
336 #ifdef _MSC_VER
337 #pragma warning(pop)
338 #endif