111
|
1 /* reducer_impl.cpp -*-C++-*-
|
|
2 *
|
|
3 *************************************************************************
|
|
4 *
|
|
5 * Copyright (C) 2009-2016, Intel Corporation
|
|
6 * All rights reserved.
|
|
7 *
|
|
8 * Redistribution and use in source and binary forms, with or without
|
|
9 * modification, are permitted provided that the following conditions
|
|
10 * are met:
|
|
11 *
|
|
12 * * Redistributions of source code must retain the above copyright
|
|
13 * notice, this list of conditions and the following disclaimer.
|
|
14 * * Redistributions in binary form must reproduce the above copyright
|
|
15 * notice, this list of conditions and the following disclaimer in
|
|
16 * the documentation and/or other materials provided with the
|
|
17 * distribution.
|
|
18 * * Neither the name of Intel Corporation nor the names of its
|
|
19 * contributors may be used to endorse or promote products derived
|
|
20 * from this software without specific prior written permission.
|
|
21 *
|
|
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
26 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
|
29 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
|
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
|
32 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
33 * POSSIBILITY OF SUCH DAMAGE.
|
|
34 *
|
|
35 * *********************************************************************
|
|
36 *
|
|
37 * PLEASE NOTE: This file is a downstream copy of a file mainitained in
|
|
38 * a repository at cilkplus.org. Changes made to this file that are not
|
|
39 * submitted through the contribution process detailed at
|
|
40 * http://www.cilkplus.org/submit-cilk-contribution will be lost the next
|
|
41 * time that a new version is released. Changes only submitted to the
|
|
42 * GNU compiler collection or posted to the git repository at
|
|
43 * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime.git are
|
|
44 * not tracked.
|
|
45 *
|
|
46 * We welcome your contributions to this open source project. Thank you
|
|
47 * for your assistance in helping us improve Cilk Plus.
|
|
48 *
|
|
49 * Patents Pending, Intel Corporation.
|
|
50 **************************************************************************/
|
|
51
|
|
52 /**
|
|
53 * Support for reducers
|
|
54 */
|
|
55
|
|
56 // ICL: Don't complain about conversion from pointer to same-sized integral type
|
|
57 // in hashfun. That's why we're using size_t
|
|
58 #ifdef _WIN32
|
|
59 # pragma warning(disable: 1684)
|
|
60 #endif
|
|
61
|
|
62 #include "reducer_impl.h"
|
|
63 #include "scheduler.h"
|
|
64 #include "bug.h"
|
|
65 #include "os.h"
|
|
66 #include "global_state.h"
|
|
67 #include "frame_malloc.h"
|
|
68
|
|
69 #include "cilk/hyperobject_base.h"
|
|
70 #include "cilktools/cilkscreen.h"
|
|
71 #include "internal/abi.h"
|
|
72
|
|
73 #if REDPAR_DEBUG > 0
|
|
74 #include <stdio.h>
|
|
75 #include <stdlib.h>
|
|
76 #endif
|
|
77
|
|
78
|
|
79 #define DBG if(0) // if(1) enables some internal checks
|
|
80
|
|
81 // Check that w is the currently executing worker. This method is a
|
|
82 // no-op unless the debug level is set high enough.
|
|
83 static inline void verify_current_wkr(__cilkrts_worker *w)
|
|
84 {
|
|
85 #if REDPAR_DEBUG >= 5
|
|
86 __cilkrts_worker* tmp = __cilkrts_get_tls_worker();
|
|
87 if (w != tmp) {
|
|
88 fprintf(stderr, "W=%d, actual=%d... missing a refresh....\n",
|
|
89 w->self,
|
|
90 tmp->self);
|
|
91 }
|
|
92 CILK_ASSERT(w == tmp); // __cilkrts_get_tls_worker());
|
|
93 #endif
|
|
94 }
|
|
95
|
|
96 // Suppress clang warning that the expression result is unused
|
|
97 #if defined(__clang__) && (! defined(__INTEL_COMPILER))
|
|
98 # pragma clang diagnostic push
|
|
99 # pragma clang diagnostic ignored "-Wunused-value"
|
|
100 #endif // __clang__
|
|
101
|
|
102 /// Helper class to disable and re-enable Cilkscreen
|
|
103 struct DisableCilkscreen
|
|
104 {
|
|
105 DisableCilkscreen () { __cilkscreen_disable_checking(); }
|
|
106 ~DisableCilkscreen () { __cilkscreen_enable_checking(); }
|
|
107 };
|
|
108
|
|
109 /// Helper class to enable and re-disable Cilkscreen
|
|
110 struct EnableCilkscreen
|
|
111 {
|
|
112 EnableCilkscreen () { __cilkscreen_enable_checking(); }
|
|
113 ~EnableCilkscreen () { __cilkscreen_disable_checking(); }
|
|
114 };
|
|
115
|
|
116 #if defined(__clang__) && (! defined(__INTEL_COMPILER))
|
|
117 # pragma clang diagnostic pop
|
|
118 #endif // __clang__
|
|
119
|
|
120 /**
|
|
121 * @brief Element for a hyperobject
|
|
122 */
|
|
123 struct elem {
|
|
124 void *key; ///< Shared key for this hyperobject
|
|
125 __cilkrts_hyperobject_base *hb; ///< Base of the hyperobject.
|
|
126 void *view; ///< Strand-private view of this hyperobject
|
|
127 /// Destroy and deallocate the view object for this element and set view to
|
|
128 /// null.
|
|
129 void destroy();
|
|
130
|
|
131 /// Returns true if this element contains a leftmost view.
|
|
132 bool is_leftmost() const;
|
|
133 };
|
|
134
|
|
135 /** Bucket containing at most NMAX elements */
|
|
136 struct bucket {
|
|
137 /// Size of the array of elements for this bucket
|
|
138 size_t nmax;
|
|
139
|
|
140 /**
|
|
141 * We use the ``struct hack'' to allocate an array of variable
|
|
142 * dimension at the end of the struct. However, we allocate a
|
|
143 * total of NMAX+1 elements instead of NMAX. The last one always
|
|
144 * has key == 0, which we use as a termination criterion
|
|
145 */
|
|
146 elem el[1];
|
|
147 };
|
|
148
|
|
149 /**
|
|
150 * Class that implements the map for reducers so we can find the
|
|
151 * view for a strand.
|
|
152 */
|
|
153 struct cilkred_map {
|
|
154 /** Handy pointer to the global state */
|
|
155 global_state_t *g;
|
|
156
|
|
157 /** Number of elements in table */
|
|
158 size_t nelem;
|
|
159
|
|
160 /** Number of buckets */
|
|
161 size_t nbuckets;
|
|
162
|
|
163 /** Array of pointers to buckets */
|
|
164 bucket **buckets;
|
|
165
|
|
166 /** Set true if merging (for debugging purposes) */
|
|
167 bool merging;
|
|
168
|
|
169 /** Set true for leftmost reducer map */
|
|
170 bool is_leftmost;
|
|
171
|
|
172 /** @brief Return element mapped to 'key' or null if not found. */
|
|
173 elem *lookup(void *key);
|
|
174
|
|
175 /**
|
|
176 * @brief Insert key/value element into hash map without rehashing.
|
|
177 * Does not check for duplicate key.
|
|
178 */
|
|
179 elem *insert_no_rehash(__cilkrts_worker *w,
|
|
180 void *key,
|
|
181 __cilkrts_hyperobject_base *hb,
|
|
182 void *value);
|
|
183
|
|
184 /**
|
|
185 * @brief Insert key/value element into hash map, rehashing if necessary.
|
|
186 * Does not check for duplicate key.
|
|
187 */
|
|
188 inline elem *rehash_and_insert(__cilkrts_worker *w,
|
|
189 void *key,
|
|
190 __cilkrts_hyperobject_base *hb,
|
|
191 void *value);
|
|
192
|
|
193 /** @brief Grow bucket by one element, reallocating bucket if necessary */
|
|
194 static elem *grow(__cilkrts_worker *w, bucket **bp);
|
|
195
|
|
196 /** @brief Rehash a worker's reducer map */
|
|
197 void rehash(__cilkrts_worker *);
|
|
198
|
|
199 /**
|
|
200 * @brief Returns true if a rehash is needed due to the number of elements that
|
|
201 * have been inserted.
|
|
202 */
|
|
203 inline bool need_rehash_p() const;
|
|
204
|
|
205 /** @brief Allocate and initialize the buckets */
|
|
206 void make_buckets(__cilkrts_worker *w, size_t nbuckets);
|
|
207
|
|
208 /**
|
|
209 * Specify behavior when the same key is present in both maps passed
|
|
210 * into merge().
|
|
211 */
|
|
212 enum merge_kind
|
|
213 {
|
|
214 MERGE_UNORDERED, ///< Assertion fails
|
|
215 MERGE_INTO_LEFT, ///< Merges the argument from the right into the left
|
|
216 MERGE_INTO_RIGHT ///< Merges the argument from the left into the right
|
|
217 };
|
|
218
|
|
219 /**
|
|
220 * @brief Merge another reducer map into this one, destroying the other map in
|
|
221 * the process.
|
|
222 */
|
|
223 __cilkrts_worker* merge(__cilkrts_worker *current_wkr,
|
|
224 cilkred_map *other_map,
|
|
225 enum merge_kind kind);
|
|
226
|
|
227 /** @brief check consistency of a reducer map */
|
|
228 void check(bool allow_null_view);
|
|
229
|
|
230 /** @brief Test whether the cilkred_map is empty */
|
|
231 bool is_empty() { return nelem == 0; }
|
|
232 };
|
|
233
|
|
234 static inline struct cilkred_map* install_new_reducer_map(__cilkrts_worker *w) {
|
|
235 cilkred_map *h;
|
|
236 h = __cilkrts_make_reducer_map(w);
|
|
237 w->reducer_map = h;
|
|
238 return h;
|
|
239 }
|
|
240
|
|
241 static size_t sizeof_bucket(size_t nmax)
|
|
242 {
|
|
243 bucket *b = 0;
|
|
244 return (sizeof(*b) + nmax * sizeof(b->el[0]));
|
|
245 }
|
|
246
|
|
247 static bucket *alloc_bucket(__cilkrts_worker *w, size_t nmax)
|
|
248 {
|
|
249 bucket *b = (bucket *)
|
|
250 __cilkrts_frame_malloc(w, sizeof_bucket(nmax));
|
|
251 b->nmax = nmax;
|
|
252 return b;
|
|
253 }
|
|
254
|
|
255 static void free_bucket(__cilkrts_worker *w, bucket **bp)
|
|
256 {
|
|
257 bucket *b = *bp;
|
|
258 if (b) {
|
|
259 __cilkrts_frame_free(w, b, sizeof_bucket(b->nmax));
|
|
260 *bp = 0;
|
|
261 }
|
|
262 }
|
|
263
|
|
264 /* round up nmax to fill a memory allocator block completely */
|
|
265 static size_t roundup(size_t nmax)
|
|
266 {
|
|
267 size_t sz = sizeof_bucket(nmax);
|
|
268
|
|
269 /* round up size to a full malloc block */
|
|
270 sz = __cilkrts_frame_malloc_roundup(sz);
|
|
271
|
|
272 /* invert sizeof_bucket() */
|
|
273 nmax = ((sz - sizeof(bucket)) / sizeof(elem));
|
|
274
|
|
275 return nmax;
|
|
276 }
|
|
277
|
|
278 static bool is_power_of_2(size_t n)
|
|
279 {
|
|
280 return (n & (n - 1)) == 0;
|
|
281 }
|
|
282
|
|
283 void cilkred_map::make_buckets(__cilkrts_worker *w,
|
|
284 size_t new_nbuckets)
|
|
285 {
|
|
286 nbuckets = new_nbuckets;
|
|
287
|
|
288 CILK_ASSERT(is_power_of_2(nbuckets));
|
|
289 #if defined __GNUC__ && defined __ICC
|
|
290 /* bug workaround -- suppress calls to _intel_fast_memset */
|
|
291 bucket *volatile*new_buckets = (bucket *volatile*)
|
|
292 #else
|
|
293 bucket **new_buckets = (bucket **)
|
|
294 #endif
|
|
295 __cilkrts_frame_malloc(w, nbuckets * sizeof(*(buckets)));
|
|
296
|
|
297 #if REDPAR_DEBUG >= 1
|
|
298 fprintf(stderr, "W=%d, desc=make_buckets, new_buckets=%p, new_nbuckets=%zd\n",
|
|
299 w->self, new_buckets, new_nbuckets);
|
|
300 #endif
|
|
301
|
|
302 for (size_t i = 0; i < new_nbuckets; ++i)
|
|
303 new_buckets[i] = 0;
|
|
304 #if defined __GNUC__ && defined __ICC
|
|
305 buckets = (bucket **)new_buckets;
|
|
306 #else
|
|
307 buckets = new_buckets;
|
|
308 #endif
|
|
309 nelem = 0;
|
|
310 }
|
|
311
|
|
312 static void free_buckets(__cilkrts_worker *w,
|
|
313 bucket **buckets,
|
|
314 size_t nbuckets)
|
|
315 {
|
|
316 size_t i;
|
|
317
|
|
318 #if REDPAR_DEBUG >= 1
|
|
319 verify_current_wkr(w);
|
|
320 fprintf(stderr, "W=%d, desc=free_buckets, buckets=%p, size=%zd\n",
|
|
321 w->self, buckets,
|
|
322 nbuckets * sizeof(*buckets));
|
|
323 #endif
|
|
324
|
|
325 for (i = 0; i < nbuckets; ++i)
|
|
326 free_bucket(w, buckets + i);
|
|
327
|
|
328 __cilkrts_frame_free(w, buckets, nbuckets * sizeof(*buckets));
|
|
329 }
|
|
330
|
|
331 static size_t minsz(size_t nelem)
|
|
332 {
|
|
333 return 1U + nelem + nelem / 8U;
|
|
334 }
|
|
335
|
|
336 static size_t nextsz(size_t nelem)
|
|
337 {
|
|
338 return 2 * nelem;
|
|
339 }
|
|
340
|
|
341 bool cilkred_map::need_rehash_p() const
|
|
342 {
|
|
343 return minsz(nelem) > nbuckets;
|
|
344 }
|
|
345
|
|
346 static inline size_t hashfun(const cilkred_map *h, void *key)
|
|
347 {
|
|
348 size_t k = (size_t) key;
|
|
349
|
|
350 k ^= k >> 21;
|
|
351 k ^= k >> 8;
|
|
352 k ^= k >> 3;
|
|
353
|
|
354 return k & (h->nbuckets - 1);
|
|
355 }
|
|
356
|
|
357 // Given a __cilkrts_hyperobject_base, return the key to that hyperobject in
|
|
358 // the reducer map.
|
|
359 static inline void* get_hyperobject_key(__cilkrts_hyperobject_base *hb)
|
|
360 {
|
|
361 // The current implementation uses the address of the lefmost view as the
|
|
362 // key.
|
|
363 return reinterpret_cast<char*>(hb) + hb->__view_offset;
|
|
364 }
|
|
365
|
|
366 // Given a hyperobject key, return a pointer to the leftmost object. In the
|
|
367 // current implementation, the address of the leftmost object IS the key, so
|
|
368 // this function is an effective noop.
|
|
369 static inline void* get_leftmost_view(void *key)
|
|
370 {
|
|
371 return key;
|
|
372 }
|
|
373
|
|
374 /* debugging support: check consistency of a reducer map */
|
|
375 void cilkred_map::check(bool allow_null_view)
|
|
376 {
|
|
377 size_t count = 0;
|
|
378
|
|
379 CILK_ASSERT(buckets);
|
|
380 for (size_t i = 0; i < nbuckets; ++i) {
|
|
381 bucket *b = buckets[i];
|
|
382 if (b)
|
|
383 for (elem *el = b->el; el->key; ++el) {
|
|
384 CILK_ASSERT(allow_null_view || el->view);
|
|
385 ++count;
|
|
386 }
|
|
387 }
|
|
388 CILK_ASSERT(nelem == count);
|
|
389 /*global_reducer_map::check();*/
|
|
390 }
|
|
391
|
|
392 /* grow bucket by one element, reallocating bucket if necessary */
|
|
393 elem *cilkred_map::grow(__cilkrts_worker *w,
|
|
394 bucket **bp)
|
|
395 {
|
|
396 size_t i, nmax, nnmax;
|
|
397 bucket *b, *nb;
|
|
398
|
|
399 b = *bp;
|
|
400 if (b) {
|
|
401 nmax = b->nmax;
|
|
402 /* find empty element if any */
|
|
403 for (i = 0; i < nmax; ++i)
|
|
404 if (b->el[i].key == 0)
|
|
405 return &(b->el[i]);
|
|
406 /* do not use the last one even if empty */
|
|
407 } else {
|
|
408 nmax = 0;
|
|
409 }
|
|
410
|
|
411 verify_current_wkr(w);
|
|
412 /* allocate a new bucket */
|
|
413 nnmax = roundup(2 * nmax);
|
|
414 nb = alloc_bucket(w, nnmax);
|
|
415
|
|
416
|
|
417 /* copy old bucket into new */
|
|
418 for (i = 0; i < nmax; ++i)
|
|
419 nb->el[i] = b->el[i];
|
|
420
|
|
421 free_bucket(w, bp); *bp = nb;
|
|
422
|
|
423 /* zero out extra elements */
|
|
424 for (; i < nnmax; ++i)
|
|
425 nb->el[i].key = 0;
|
|
426
|
|
427 /* zero out the last one */
|
|
428 nb->el[i].key = 0;
|
|
429
|
|
430 return &(nb->el[nmax]);
|
|
431 }
|
|
432
|
|
433 elem *cilkred_map::insert_no_rehash(__cilkrts_worker *w,
|
|
434 void *key,
|
|
435 __cilkrts_hyperobject_base *hb,
|
|
436 void *view)
|
|
437 {
|
|
438
|
|
439 #if REDPAR_DEBUG >= 2
|
|
440 fprintf(stderr, "[W=%d, desc=insert_no_rehash, this_map=%p]\n",
|
|
441 w->self, this);
|
|
442 verify_current_wkr(w);
|
|
443 #endif
|
|
444
|
|
445 CILK_ASSERT((w == 0 && g == 0) || w->g == g);
|
|
446 CILK_ASSERT(key != 0);
|
|
447 CILK_ASSERT(view != 0);
|
|
448
|
|
449 elem *el = grow(w, &(buckets[hashfun(this, key)]));
|
|
450
|
|
451 #if REDPAR_DEBUG >= 3
|
|
452 fprintf(stderr, "[W=%d, this=%p, inserting key=%p, view=%p, el = %p]\n",
|
|
453 w->self, this, key, view, el);
|
|
454 #endif
|
|
455
|
|
456 el->key = key;
|
|
457 el->hb = hb;
|
|
458 el->view = view;
|
|
459 ++nelem;
|
|
460
|
|
461 return el;
|
|
462 }
|
|
463
|
|
464 void cilkred_map::rehash(__cilkrts_worker *w)
|
|
465 {
|
|
466 #if REDPAR_DEBUG >= 1
|
|
467 fprintf(stderr, "[W=%d, desc=rehash, this_map=%p, g=%p, w->g=%p]\n",
|
|
468 w->self, this, g, w->g);
|
|
469 verify_current_wkr(w);
|
|
470 #endif
|
|
471 CILK_ASSERT((w == 0 && g == 0) || w->g == g);
|
|
472
|
|
473 size_t onbuckets = nbuckets;
|
|
474 size_t onelem = nelem;
|
|
475 bucket **obuckets = buckets;
|
|
476 size_t i;
|
|
477 bucket *b;
|
|
478
|
|
479 make_buckets(w, nextsz(nbuckets));
|
|
480
|
|
481 for (i = 0; i < onbuckets; ++i) {
|
|
482 b = obuckets[i];
|
|
483 if (b) {
|
|
484 elem *oel;
|
|
485 for (oel = b->el; oel->key; ++oel)
|
|
486 insert_no_rehash(w, oel->key, oel->hb, oel->view);
|
|
487 }
|
|
488 }
|
|
489
|
|
490 CILK_ASSERT(nelem == onelem);
|
|
491
|
|
492 free_buckets(w, obuckets, onbuckets);
|
|
493 }
|
|
494
|
|
495 elem *cilkred_map::rehash_and_insert(__cilkrts_worker *w,
|
|
496 void *key,
|
|
497 __cilkrts_hyperobject_base *hb,
|
|
498 void *view)
|
|
499 {
|
|
500
|
|
501 #if REDPAR_DEBUG >= 1
|
|
502 fprintf(stderr, "W=%d, this_map =%p, inserting key=%p, view=%p\n",
|
|
503 w->self, this, key, view);
|
|
504 verify_current_wkr(w);
|
|
505 #endif
|
|
506
|
|
507 if (need_rehash_p())
|
|
508 rehash(w);
|
|
509
|
|
510 return insert_no_rehash(w, key, hb, view);
|
|
511 }
|
|
512
|
|
513
|
|
514 elem *cilkred_map::lookup(void *key)
|
|
515 {
|
|
516 bucket *b = buckets[hashfun(this, key)];
|
|
517
|
|
518 if (b) {
|
|
519 elem *el;
|
|
520 for (el = b->el; el->key; ++el) {
|
|
521 if (el->key == key) {
|
|
522 CILK_ASSERT(el->view);
|
|
523 return el;
|
|
524 }
|
|
525 }
|
|
526 }
|
|
527
|
|
528 return 0;
|
|
529 }
|
|
530
|
|
531 void elem::destroy()
|
|
532 {
|
|
533 if (! is_leftmost()) {
|
|
534
|
|
535 // Call destroy_fn and deallocate_fn on the view, but not if it's the
|
|
536 // leftmost view.
|
|
537 cilk_c_monoid *monoid = &(hb->__c_monoid);
|
|
538 cilk_c_reducer_destroy_fn_t destroy_fn = monoid->destroy_fn;
|
|
539 cilk_c_reducer_deallocate_fn_t deallocate_fn = monoid->deallocate_fn;
|
|
540
|
|
541 destroy_fn((void*)hb, view);
|
|
542 deallocate_fn((void*)hb, view);
|
|
543 }
|
|
544
|
|
545 view = 0;
|
|
546 }
|
|
547
|
|
548 inline
|
|
549 bool elem::is_leftmost() const
|
|
550 {
|
|
551 // implementation uses the address of the leftmost view as the key, so if
|
|
552 // key == view, then this element refers to the leftmost view.
|
|
553 return key == view;
|
|
554 }
|
|
555
|
|
556 /* remove the reducer from the current reducer map. If the reducer
|
|
557 exists in maps other than the current one, the behavior is
|
|
558 undefined. */
|
|
559 extern "C"
|
|
560 CILK_EXPORT void __CILKRTS_STRAND_STALE(
|
|
561 __cilkrts_hyper_destroy(__cilkrts_hyperobject_base *hb))
|
|
562 {
|
|
563 // Disable Cilkscreen for the duration of this call. The destructor for
|
|
564 // this class will re-enable Cilkscreen when the method returns. This
|
|
565 // will prevent Cilkscreen from reporting apparent races in reducers
|
|
566 DisableCilkscreen x;
|
|
567
|
|
568 __cilkrts_worker* w = __cilkrts_get_tls_worker();
|
|
569 if (! w) {
|
|
570 // If no worker, then Cilk is not running and there is no reducer
|
|
571 // map. Do nothing. The reducer's destructor will take care of
|
|
572 // destroying the leftmost view.
|
|
573 return;
|
|
574 }
|
|
575
|
|
576 const char *UNSYNCED_REDUCER_MSG =
|
|
577 "Destroying a reducer while it is visible to unsynced child tasks, or\n"
|
|
578 "calling CILK_C_UNREGISTER_REDUCER() on an unregistered reducer.\n"
|
|
579 "Did you forget a _Cilk_sync or CILK_C_REGISTER_REDUCER()?";
|
|
580
|
|
581 cilkred_map* h = w->reducer_map;
|
|
582 if (NULL == h)
|
|
583 cilkos_error(UNSYNCED_REDUCER_MSG); // Does not return
|
|
584
|
|
585 if (h->merging) {
|
|
586 verify_current_wkr(w);
|
|
587 __cilkrts_bug("User error: hyperobject used by another hyperobject");
|
|
588 }
|
|
589
|
|
590 void* key = get_hyperobject_key(hb);
|
|
591 elem *el = h->lookup(key);
|
|
592
|
|
593 // Verify that the reducer is being destroyed from the leftmost strand for
|
|
594 // which the reducer is defined.
|
|
595 if (! (el && el->is_leftmost()))
|
|
596 cilkos_error(UNSYNCED_REDUCER_MSG);
|
|
597
|
|
598 #if REDPAR_DEBUG >= 3
|
|
599 fprintf(stderr, "[W=%d, key=%p, lookup in map %p, found el=%p, about to destroy]\n",
|
|
600 w->self, key, h, el);
|
|
601 #endif
|
|
602
|
|
603 // Remove the element from the hash bucket. Do not bother shrinking
|
|
604 // the bucket. Note that the destroy() function does not actually
|
|
605 // call the destructor for the leftmost view.
|
|
606 el->destroy();
|
|
607 do {
|
|
608 el[0] = el[1];
|
|
609 ++el;
|
|
610 } while (el->key);
|
|
611 --h->nelem;
|
|
612
|
|
613 #if REDPAR_DEBUG >= 2
|
|
614 fprintf(stderr, "[W=%d, desc=hyper_destroy_finish, key=%p, w->reducer_map=%p]\n",
|
|
615 w->self, key, w->reducer_map);
|
|
616 #endif
|
|
617 }
|
|
618
|
|
619 extern "C"
|
|
620 CILK_EXPORT
|
|
621 void __cilkrts_hyper_create(__cilkrts_hyperobject_base *hb)
|
|
622 {
|
|
623 // This function registers the specified hyperobject in the current
|
|
624 // reducer map and registers the initial value of the hyperobject as the
|
|
625 // leftmost view of the reducer.
|
|
626 __cilkrts_worker *w = __cilkrts_get_tls_worker();
|
|
627 if (! w) {
|
|
628 // If there is no worker, then there is nothing to do: The iniitial
|
|
629 // value will automatically be used as the left-most view when we
|
|
630 // enter Cilk.
|
|
631 return;
|
|
632 }
|
|
633
|
|
634 // Disable Cilkscreen for the duration of this call. The destructor for
|
|
635 // this class will re-enable Cilkscreen when the method returns. This
|
|
636 // will prevent Cilkscreen from reporting apparent races in reducers
|
|
637 DisableCilkscreen x;
|
|
638
|
|
639 void* key = get_hyperobject_key(hb);
|
|
640 void* view = get_leftmost_view(key);
|
|
641 cilkred_map *h = w->reducer_map;
|
|
642
|
|
643 if (__builtin_expect(!h, 0)) {
|
|
644 h = install_new_reducer_map(w);
|
|
645 #if REDPAR_DEBUG >= 2
|
|
646 fprintf(stderr, "[W=%d, hb=%p, hyper_create, isntalled new map %p, view=%p]\n",
|
|
647 w->self, hb, h, view);
|
|
648 #endif
|
|
649 }
|
|
650
|
|
651 /* Must not exist. */
|
|
652 CILK_ASSERT(h->lookup(key) == NULL);
|
|
653
|
|
654 #if REDPAR_DEBUG >= 3
|
|
655 verify_current_wkr(w);
|
|
656 fprintf(stderr, "[W=%d, hb=%p, lookup in map %p of view %p, should be null]\n",
|
|
657 w->self, hb, h, view);
|
|
658 fprintf(stderr, "W=%d, h=%p, inserting key %p, view%p\n",
|
|
659 w->self,
|
|
660 h,
|
|
661 &(hb->__c_monoid),
|
|
662 view);
|
|
663 #endif
|
|
664
|
|
665 if (h->merging)
|
|
666 __cilkrts_bug("User error: hyperobject used by another hyperobject");
|
|
667
|
|
668 CILK_ASSERT(w->reducer_map == h);
|
|
669 // The address of the leftmost value is the same as the key for lookup.
|
|
670 (void) h->rehash_and_insert(w, view, hb, view);
|
|
671 }
|
|
672
|
|
673 extern "C"
|
|
674 CILK_EXPORT void* __CILKRTS_STRAND_PURE(
|
|
675 __cilkrts_hyper_lookup(__cilkrts_hyperobject_base *hb))
|
|
676 {
|
|
677 __cilkrts_worker* w = __cilkrts_get_tls_worker_fast();
|
|
678 void* key = get_hyperobject_key(hb);
|
|
679 if (! w)
|
|
680 return get_leftmost_view(key);
|
|
681
|
|
682 // Disable Cilkscreen for the duration of this call. This will
|
|
683 // prevent Cilkscreen from reporting apparent races in reducers
|
|
684 DisableCilkscreen dguard;
|
|
685
|
|
686 if (__builtin_expect(w->g->force_reduce, 0))
|
|
687 __cilkrts_promote_own_deque(w);
|
|
688 cilkred_map* h = w->reducer_map;
|
|
689
|
|
690 if (__builtin_expect(!h, 0)) {
|
|
691 h = install_new_reducer_map(w);
|
|
692 }
|
|
693
|
|
694 if (h->merging)
|
|
695 __cilkrts_bug("User error: hyperobject used by another hyperobject");
|
|
696 elem* el = h->lookup(key);
|
|
697 if (! el) {
|
|
698 /* lookup failed; insert a new default element */
|
|
699 void *rep;
|
|
700
|
|
701 {
|
|
702 /* re-enable cilkscreen while calling the constructor */
|
|
703 EnableCilkscreen eguard;
|
|
704 if (h->is_leftmost)
|
|
705 {
|
|
706 // This special case is called only if the reducer was not
|
|
707 // registered using __cilkrts_hyper_create, e.g., if this is a
|
|
708 // C reducer in global scope or if there is no bound worker.
|
|
709 rep = get_leftmost_view(key);
|
|
710 }
|
|
711 else
|
|
712 {
|
|
713 rep = hb->__c_monoid.allocate_fn((void*)hb,
|
|
714 hb->__view_size);
|
|
715 // TBD: Handle exception on identity function
|
|
716 hb->__c_monoid.identity_fn((void*)hb, rep);
|
|
717 }
|
|
718 }
|
|
719
|
|
720 #if REDPAR_DEBUG >= 3
|
|
721 fprintf(stderr, "W=%d, h=%p, inserting key %p, view%p\n",
|
|
722 w->self,
|
|
723 h,
|
|
724 &(hb->__c_monoid),
|
|
725 rep);
|
|
726 CILK_ASSERT(w->reducer_map == h);
|
|
727 #endif
|
|
728 el = h->rehash_and_insert(w, key, hb, rep);
|
|
729 }
|
|
730
|
|
731 return el->view;
|
|
732 }
|
|
733
|
|
734 extern "C" CILK_EXPORT
|
|
735 void* __cilkrts_hyperobject_alloc(void* ignore, std::size_t bytes)
|
|
736 {
|
|
737 return std::malloc(bytes);
|
|
738 }
|
|
739
|
|
740 extern "C" CILK_EXPORT
|
|
741 void __cilkrts_hyperobject_dealloc(void* ignore, void* view)
|
|
742 {
|
|
743 std::free(view);
|
|
744 }
|
|
745
|
|
746 /* No-op destroy function */
|
|
747 extern "C" CILK_EXPORT
|
|
748 void __cilkrts_hyperobject_noop_destroy(void* ignore, void* ignore2)
|
|
749 {
|
|
750 }
|
|
751
|
|
752 cilkred_map *__cilkrts_make_reducer_map(__cilkrts_worker *w)
|
|
753 {
|
|
754 CILK_ASSERT(w);
|
|
755
|
|
756 cilkred_map *h;
|
|
757 size_t nbuckets = 1; /* default value */
|
|
758
|
|
759 h = (cilkred_map *)__cilkrts_frame_malloc(w, sizeof(*h));
|
|
760 #if REDPAR_DEBUG >= 1
|
|
761 fprintf(stderr, "[W=%d, desc=make_reducer_frame_malloc_reducer_map, h=%p]\n",
|
|
762 w->self, h);
|
|
763 #endif
|
|
764
|
|
765 h->g = w ? w->g : 0;
|
|
766 h->make_buckets(w, nbuckets);
|
|
767 h->merging = false;
|
|
768 h->is_leftmost = false;
|
|
769
|
|
770 return h;
|
|
771 }
|
|
772
|
|
773 /* Destroy a reducer map. The map must have been allocated
|
|
774 from the worker's global context and should have been
|
|
775 allocated from the same worker. */
|
|
776 void __cilkrts_destroy_reducer_map(__cilkrts_worker *w, cilkred_map *h)
|
|
777 {
|
|
778 CILK_ASSERT((w == 0 && h->g == 0) || w->g == h->g);
|
|
779 verify_current_wkr(w);
|
|
780
|
|
781 /* the reducer map is allowed to contain el->view == NULL here (and
|
|
782 only here). We set el->view == NULL only when we know that the
|
|
783 map will be destroyed immediately afterwards. */
|
|
784 DBG h->check(/*allow_null_view=*/true);
|
|
785
|
|
786 bucket *b;
|
|
787 size_t i;
|
|
788
|
|
789 for (i = 0; i < h->nbuckets; ++i) {
|
|
790 b = h->buckets[i];
|
|
791 if (b) {
|
|
792 elem *el;
|
|
793 for (el = b->el; el->key; ++el) {
|
|
794 if (el->view)
|
|
795 el->destroy();
|
|
796 }
|
|
797 }
|
|
798 }
|
|
799
|
|
800 free_buckets(w, h->buckets, h->nbuckets);
|
|
801
|
|
802 #if REDPAR_DEBUG >= 1
|
|
803 fprintf(stderr, "W=%d, destroy_red_map, freeing map h=%p, size=%zd\n",
|
|
804 w->self, h, sizeof(*h));
|
|
805 #endif
|
|
806
|
|
807 __cilkrts_frame_free(w, h, sizeof(*h));
|
|
808 }
|
|
809
|
|
810 /* Set the specified reducer map as the leftmost map if is_leftmost is true,
|
|
811 otherwise, set it to not be the leftmost map. */
|
|
812 void __cilkrts_set_leftmost_reducer_map(cilkred_map *h, int is_leftmost)
|
|
813 {
|
|
814 h->is_leftmost = is_leftmost;
|
|
815 }
|
|
816
|
|
817
|
|
818 __cilkrts_worker* cilkred_map::merge(__cilkrts_worker *w,
|
|
819 cilkred_map *other_map,
|
|
820 enum merge_kind kind)
|
|
821 {
|
|
822 // Disable Cilkscreen while the we merge the maps. The destructor for
|
|
823 // the guard class will re-enable Cilkscreen when it goes out of scope.
|
|
824 // This will prevent Cilkscreen from reporting apparent races in between
|
|
825 // the reduce function and the reducer operations. The Cilk runtime
|
|
826 // guarantees that a pair of reducer maps will only be merged when no
|
|
827 // other strand will access them.
|
|
828 DisableCilkscreen guard;
|
|
829
|
|
830 #if REDPAR_DEBUG >= 2
|
|
831 fprintf(stderr, "[W=%d, desc=merge, this_map=%p, other_map=%p]\n",
|
|
832 w->self,
|
|
833 this, other_map);
|
|
834 #endif
|
|
835 // Remember the current stack frame.
|
|
836 __cilkrts_stack_frame *current_sf = w->current_stack_frame;
|
|
837 merging = true;
|
|
838 other_map->merging = true;
|
|
839
|
|
840 // Merging to the leftmost view is a special case because every leftmost
|
|
841 // element must be initialized before the merge.
|
|
842 CILK_ASSERT(!other_map->is_leftmost /* || kind == MERGE_UNORDERED */);
|
|
843 bool merge_to_leftmost = (this->is_leftmost
|
|
844 /* && !other_map->is_leftmost */);
|
|
845
|
|
846 DBG check(/*allow_null_view=*/false);
|
|
847 DBG other_map->check(/*allow_null_view=*/false);
|
|
848
|
|
849 for (size_t i = 0; i < other_map->nbuckets; ++i) {
|
|
850 bucket *b = other_map->buckets[i];
|
|
851 if (b) {
|
|
852 for (elem *other_el = b->el; other_el->key; ++other_el) {
|
|
853 /* Steal the value from the other map, which will be
|
|
854 destroyed at the end of this operation. */
|
|
855 void *other_view = other_el->view;
|
|
856 CILK_ASSERT(other_view);
|
|
857
|
|
858 void *key = other_el->key;
|
|
859 __cilkrts_hyperobject_base *hb = other_el->hb;
|
|
860 elem *this_el = lookup(key);
|
|
861
|
|
862 if (this_el == 0 && merge_to_leftmost) {
|
|
863 /* Initialize leftmost view before merging. */
|
|
864 void* leftmost = get_leftmost_view(key);
|
|
865 // leftmost == other_view can be true if the initial view
|
|
866 // was created in other than the leftmost strand of the
|
|
867 // spawn tree, but then made visible to subsequent strands
|
|
868 // (E.g., the reducer was allocated on the heap and the
|
|
869 // pointer was returned to the caller.) In such cases,
|
|
870 // parallel semantics says that syncing with earlier
|
|
871 // strands will always result in 'this_el' being null,
|
|
872 // thus propagating the initial view up the spawn tree
|
|
873 // until it reaches the leftmost strand. When synching
|
|
874 // with the leftmost strand, leftmost == other_view will be
|
|
875 // true and we must avoid reducing the initial view with
|
|
876 // itself.
|
|
877 if (leftmost != other_view)
|
|
878 this_el = rehash_and_insert(w, key, hb, leftmost);
|
|
879 }
|
|
880
|
|
881 if (this_el == 0) {
|
|
882 /* move object from other map into this one */
|
|
883 rehash_and_insert(w, key, hb, other_view);
|
|
884 other_el->view = 0;
|
|
885 continue; /* No element-level merge necessary */
|
|
886 }
|
|
887
|
|
888 /* The same key is present in both maps with values
|
|
889 A and B. Three choices: fail, A OP B, B OP A. */
|
|
890 switch (kind)
|
|
891 {
|
|
892 case MERGE_UNORDERED:
|
|
893 __cilkrts_bug("TLS Reducer race");
|
|
894 break;
|
|
895 case MERGE_INTO_RIGHT:
|
|
896 /* Swap elements in order to preserve object
|
|
897 identity */
|
|
898 other_el->view = this_el->view;
|
|
899 this_el->view = other_view;
|
|
900 /* FALL THROUGH */
|
|
901 case MERGE_INTO_LEFT: {
|
|
902 /* Stealing should be disabled during reduce
|
|
903 (even if force-reduce is enabled). */
|
|
904
|
|
905 #if DISABLE_PARALLEL_REDUCERS
|
|
906 __cilkrts_stack_frame * volatile *saved_protected_tail;
|
|
907 saved_protected_tail = __cilkrts_disallow_stealing(w, NULL);
|
|
908 #endif
|
|
909
|
|
910 {
|
|
911 CILK_ASSERT(current_sf->worker == w);
|
|
912 CILK_ASSERT(w->current_stack_frame == current_sf);
|
|
913
|
|
914 /* TBD: if reduce throws an exception we need to stop it
|
|
915 here. */
|
|
916 hb->__c_monoid.reduce_fn((void*)hb,
|
|
917 this_el->view,
|
|
918 other_el->view);
|
|
919 w = current_sf->worker;
|
|
920
|
|
921 #if REDPAR_DEBUG >= 2
|
|
922 verify_current_wkr(w);
|
|
923 CILK_ASSERT(w->current_stack_frame == current_sf);
|
|
924 #endif
|
|
925 }
|
|
926
|
|
927 #if DISABLE_PARALLEL_REDUCERS
|
|
928 /* Restore stealing */
|
|
929 __cilkrts_restore_stealing(w, saved_protected_tail);
|
|
930 #endif
|
|
931
|
|
932 } break;
|
|
933 }
|
|
934 }
|
|
935 }
|
|
936 }
|
|
937 this->is_leftmost = this->is_leftmost || other_map->is_leftmost;
|
|
938 merging = false;
|
|
939 other_map->merging = false;
|
|
940 verify_current_wkr(w);
|
|
941 __cilkrts_destroy_reducer_map(w, other_map);
|
|
942 return w;
|
|
943 }
|
|
944
|
|
945
|
|
946 /**
|
|
947 * Print routine for debugging the merging of reducer maps.
|
|
948 * A no-op unless REDPAR_DEBUG set high enough.
|
|
949 */
|
|
950 static inline
|
|
951 void debug_map_merge(__cilkrts_worker *w,
|
|
952 cilkred_map *left_map,
|
|
953 cilkred_map *right_map,
|
|
954 __cilkrts_worker **final_wkr)
|
|
955 {
|
|
956 #if REDPAR_DEBUG >= 2
|
|
957 fprintf(stderr, "[W=%d, desc=finish_merge, left_map=%p, right_map=%p, w->reducer_map=%p, right_ans=%p, final_wkr=%d]\n",
|
|
958 w->self, left_map, right_map, w->reducer_map, right_map, (*final_wkr)->self);
|
|
959 #endif
|
|
960 }
|
|
961
|
|
962
|
|
963 /**
|
|
964 * merge RIGHT into LEFT;
|
|
965 * return whichever map allows for faster merge, and destroy the other one.
|
|
966 *
|
|
967 * *w_ptr should be the currently executing worker.
|
|
968 * *w_ptr may change during execution if the reduction is parallel.
|
|
969 */
|
|
970 cilkred_map*
|
|
971 merge_reducer_maps(__cilkrts_worker **w_ptr,
|
|
972 cilkred_map *left_map,
|
|
973 cilkred_map *right_map)
|
|
974 {
|
|
975 __cilkrts_worker *w = *w_ptr;
|
|
976 if (!left_map) {
|
|
977 debug_map_merge(w, left_map, right_map, w_ptr);
|
|
978 return right_map;
|
|
979 }
|
|
980
|
|
981 if (!right_map) {
|
|
982 debug_map_merge(w, left_map, right_map, w_ptr);
|
|
983 return left_map;
|
|
984 }
|
|
985
|
|
986 /* Special case, if left_map is leftmost, then always merge into it.
|
|
987 For C reducers this forces lazy creation of the leftmost views. */
|
|
988 if (left_map->is_leftmost || left_map->nelem > right_map->nelem) {
|
|
989 *w_ptr = left_map->merge(w, right_map, cilkred_map::MERGE_INTO_LEFT);
|
|
990 debug_map_merge(*w_ptr, left_map, right_map, w_ptr);
|
|
991 return left_map;
|
|
992 } else {
|
|
993 *w_ptr = right_map->merge(w, left_map, cilkred_map::MERGE_INTO_RIGHT);
|
|
994 debug_map_merge(*w_ptr, left_map, right_map, w_ptr);
|
|
995 return right_map;
|
|
996 }
|
|
997 }
|
|
998
|
|
999 /**
|
|
1000 * Merges RIGHT into LEFT, and then repeatedly calls
|
|
1001 * merge_reducer_maps_helper() until (*w_ptr)->reducer_map is NULL.
|
|
1002 *
|
|
1003 * *w_ptr may change as reductions execute.
|
|
1004 */
|
|
1005 cilkred_map*
|
|
1006 repeated_merge_reducer_maps(__cilkrts_worker **w_ptr,
|
|
1007 cilkred_map *left_map,
|
|
1008 cilkred_map *right_map)
|
|
1009 {
|
|
1010 // Note: if right_map == NULL but w->reducer_map != NULL, then
|
|
1011 // this loop will reduce w->reducer_map into left_map.
|
|
1012 do {
|
|
1013 left_map = merge_reducer_maps(w_ptr, left_map, right_map);
|
|
1014 verify_current_wkr(*w_ptr);
|
|
1015
|
|
1016 // Pull any newly created reducer map and loop around again.
|
|
1017 right_map = (*w_ptr)->reducer_map;
|
|
1018 (*w_ptr)->reducer_map = NULL;
|
|
1019 } while (right_map);
|
|
1020 return left_map;
|
|
1021 }
|
|
1022
|
|
1023 /* End reducer_impl.cpp */
|