111
|
1 /* global_state.h -*-C++-*-
|
|
2 *
|
|
3 *************************************************************************
|
|
4 *
|
|
5 * Copyright (C) 2009-2016, Intel Corporation
|
|
6 * All rights reserved.
|
|
7 *
|
|
8 * Redistribution and use in source and binary forms, with or without
|
|
9 * modification, are permitted provided that the following conditions
|
|
10 * are met:
|
|
11 *
|
|
12 * * Redistributions of source code must retain the above copyright
|
|
13 * notice, this list of conditions and the following disclaimer.
|
|
14 * * Redistributions in binary form must reproduce the above copyright
|
|
15 * notice, this list of conditions and the following disclaimer in
|
|
16 * the documentation and/or other materials provided with the
|
|
17 * distribution.
|
|
18 * * Neither the name of Intel Corporation nor the names of its
|
|
19 * contributors may be used to endorse or promote products derived
|
|
20 * from this software without specific prior written permission.
|
|
21 *
|
|
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
26 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
|
29 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
|
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
|
32 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
33 * POSSIBILITY OF SUCH DAMAGE.
|
|
34 *
|
|
35 * *********************************************************************
|
|
36 *
|
|
37 * PLEASE NOTE: This file is a downstream copy of a file mainitained in
|
|
38 * a repository at cilkplus.org. Changes made to this file that are not
|
|
39 * submitted through the contribution process detailed at
|
|
40 * http://www.cilkplus.org/submit-cilk-contribution will be lost the next
|
|
41 * time that a new version is released. Changes only submitted to the
|
|
42 * GNU compiler collection or posted to the git repository at
|
|
43 * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime.git are
|
|
44 * not tracked.
|
|
45 *
|
|
46 * We welcome your contributions to this open source project. Thank you
|
|
47 * for your assistance in helping us improve Cilk Plus.
|
|
48 **************************************************************************/
|
|
49
|
|
50 /**
|
|
51 * @file global_state.h
|
|
52 *
|
|
53 * @brief The global_state_t structure contains most of the global context
|
|
54 * maintained by the Intel Cilk runtime.
|
|
55 */
|
|
56
|
|
57 #ifndef INCLUDED_GLOBAL_STATE_DOT_H
|
|
58 #define INCLUDED_GLOBAL_STATE_DOT_H
|
|
59
|
|
60 #include <cilk/common.h>
|
|
61
|
|
62 #include "frame_malloc.h"
|
|
63 #include "stats.h"
|
|
64 #include "bug.h"
|
|
65 #include "cilk_fiber.h"
|
|
66
|
|
67 __CILKRTS_BEGIN_EXTERN_C
|
|
68
|
|
69 /**
|
|
70 * Non-null place-holder for a stack handle that has no meaningful value.
|
|
71 */
|
|
72 #define PLACEHOLDER_FIBER ((cilk_fiber *) -2)
|
|
73
|
|
74 /**
|
|
75 * States for record_or_replay
|
|
76 */
|
|
77 enum record_replay_t {
|
|
78 RECORD_REPLAY_NONE,
|
|
79 RECORD_LOG,
|
|
80 REPLAY_LOG
|
|
81 };
|
|
82
|
|
83 /**
|
|
84 * @brief Global state structure version.
|
|
85 *
|
|
86 * Since the global state is exposed for debugger access, we need a version
|
|
87 * number to let it know that the version of the structure is what it expects
|
|
88 * to see. If any of the fields marked as (fixed) below are changed, the
|
|
89 * version number needs to be bumped.
|
|
90 */
|
|
91 #define GLOBAL_STATE_VERSION 0
|
|
92
|
|
93 /**
|
|
94 * @brief The global state is a structure that is shared by all workers in
|
|
95 * Cilk.
|
|
96 *
|
|
97 * Make the structure ready for use by calling
|
|
98 * cilkg_init_global_state() and then cilkg_publish_global_state().
|
|
99 *
|
|
100 * The same global lock should be held while both of these methods are
|
|
101 * called. These methods are split because it is useful to execute
|
|
102 * other runtime initialization code in between.
|
|
103 *
|
|
104 * After cilkg_publish_global_state() has completed, Cilk runtime
|
|
105 * methods may call cilkg_get_global_state() to look at the published
|
|
106 * value without holding the global lock.
|
|
107 *
|
|
108 * Finally, clean up the global state by calling
|
|
109 * cilkg_deinit_global_state(). This method should be called only
|
|
110 * after all calls to cilkg_get_global_state() have completed, and
|
|
111 * while holding the global lock.
|
|
112 *
|
|
113 * Before initialization and after deinitialization, the fields in the
|
|
114 * global state have unspecified values, except for a few special
|
|
115 * fields labeled "USER SETTING", which can be read and written before
|
|
116 * initialization and after deinitialization.
|
|
117 */
|
|
118
|
|
119 struct global_state_t { /* COMMON_PORTABLE */
|
|
120
|
|
121 /* Fields described as "(fixed)" should not be changed after
|
|
122 * initialization.
|
|
123 */
|
|
124
|
|
125 /*************************************************************************
|
|
126 * Note that debugger integration must reach into the
|
|
127 * global state! The debugger integration is depending on the
|
|
128 * offsets of the addr_size, system_workers, total_workers,
|
|
129 * stealing_disabled, sysdep, and workers. If these offsets change, the
|
|
130 * debugger integration library will need to be changed to match!!!
|
|
131 *************************************************************************/
|
|
132
|
|
133 uint16_t addr_size; ///< Number of bytes for an address, used by debugger (fixed)
|
|
134
|
|
135 uint16_t version; ///< Version of this structure (fixed)
|
|
136
|
|
137 int system_workers; ///< Number of system workers (fixed)
|
|
138
|
|
139 /**
|
|
140 * @brief USER SETTING: Maximum number of user workers that can be
|
|
141 * bound to cilk workers.
|
|
142 *
|
|
143 * 0 unless set by user. Call cilkg_calc_max_user_workers to get
|
|
144 * the value.
|
|
145 */
|
|
146 int max_user_workers;
|
|
147
|
|
148 int total_workers; ///< Total number of worker threads allocated (fixed)
|
|
149
|
|
150 int workers_running; ///< True when system workers have beens started */
|
|
151
|
|
152 /// Set by debugger to disable stealing (fixed)
|
|
153 int stealing_disabled;
|
|
154
|
|
155 /// System-dependent part of the global state
|
|
156 struct global_sysdep_state *sysdep;
|
|
157
|
|
158 /// Array of worker structures.
|
|
159 __cilkrts_worker **workers;
|
|
160
|
|
161 /******* END OF DEBUGGER-INTEGRATION FIELDS ***************/
|
|
162
|
|
163 /// Number of frames in each worker's lazy task queue
|
|
164 __STDNS size_t ltqsize;
|
|
165
|
|
166 /**
|
|
167 * @brief USER SETTING: Force all possible reductions.
|
|
168 *
|
|
169 * TRUE if running a p-tool that requires reducers to call the reduce()
|
|
170 * method even if no actual stealing occurs.
|
|
171 *
|
|
172 * When set to TRUE, runtime will simulate steals, forcing calls to the
|
|
173 * the reduce() methods of reducers.
|
|
174 *
|
|
175 */
|
|
176 int force_reduce;
|
|
177
|
|
178 /// USER SETTING: Per-worker fiber pool size
|
|
179 int fiber_pool_size;
|
|
180
|
|
181 /// USER SETTING: Global fiber pool size
|
|
182 int global_fiber_pool_size;
|
|
183
|
|
184 /**
|
|
185 * @brief TRUE when workers should exit scheduling loop so we can
|
|
186 * shut down the runtime and free the global state.
|
|
187 *
|
|
188 * @note @c work_done will be checked *FREQUENTLY* in the scheduling loop
|
|
189 * by idle workers. We need to ensure that it's not in a cache line which
|
|
190 * may be invalidated by other cores. The surrounding fields are either
|
|
191 * constant after initialization or not used until shutdown (stats) so we
|
|
192 * should be OK.
|
|
193 */
|
|
194 volatile int work_done;
|
|
195
|
|
196 int under_ptool; ///< True when running under a serial PIN tool
|
|
197
|
|
198 statistics stats; ///< Statistics on use of runtime
|
|
199
|
|
200 /**
|
|
201 * @brief USER SETTING: Maximum number of stacks the runtime will
|
|
202 * allocate (apart from those created by the OS when worker
|
|
203 * threads are created).
|
|
204 *
|
|
205 * If max_stacks == 0,there is no pre-defined maximum.
|
|
206 */
|
|
207 unsigned max_stacks;
|
|
208
|
|
209 /// Size of each stack
|
|
210 size_t stack_size;
|
|
211
|
|
212 /// Global cache for per-worker memory
|
|
213 struct __cilkrts_frame_cache frame_malloc;
|
|
214
|
|
215 /// Global fiber pool
|
|
216 cilk_fiber_pool fiber_pool;
|
|
217
|
|
218 /**
|
|
219 * @brief Track whether the runtime has failed to allocate a
|
|
220 * stack.
|
|
221 *
|
|
222 * Setting this flag prevents multiple warnings from being
|
|
223 * issued.
|
|
224 */
|
|
225 int failure_to_allocate_stack;
|
|
226
|
|
227 /**
|
|
228 * @brief USER SETTING: indicate record or replay log.
|
|
229 * Set to NULL if not used in this run.
|
|
230 */
|
|
231 char *record_replay_file_name;
|
|
232
|
|
233 /**
|
|
234 * @brief Record/replay state.
|
|
235 * Valid states are:
|
|
236 * RECORD_REPLAY_NONE - Not recording or replaying a log
|
|
237 * RECORD_LOG - Recording a log for replay later
|
|
238 * REPLAY_LOG - Replay a log recorded earlier
|
|
239 */
|
|
240 enum record_replay_t record_or_replay;
|
|
241
|
|
242 /**
|
|
243 * @brief Buffer to force max_steal_failures to appear on a
|
|
244 * different cache line from the previous member variables.
|
|
245 *
|
|
246 * This padding is needed because max_steal_failures is read
|
|
247 * constantly and other modified values in the global state will
|
|
248 * cause thrashing.
|
|
249 */
|
|
250 char cache_buf[64];
|
|
251
|
|
252 /**
|
|
253 * @brief Maximum number of times a thread should fail to steal
|
|
254 * before checking if Cilk is shutting down.
|
|
255 */
|
|
256 unsigned int max_steal_failures;
|
|
257
|
|
258 /// Pointer to scheduler entry point
|
|
259 void (*scheduler)(__cilkrts_worker *w);
|
|
260
|
|
261 /**
|
|
262 * @brief Buffer to force P and Q to appear on a different cache
|
|
263 * line from the previous member variables.
|
|
264 */
|
|
265 char cache_buf_2[64];
|
|
266
|
|
267 int P; ///< USER SETTING: number of system workers + 1 (fixed)
|
|
268 int Q; ///< Number of user threads currently bound to workers
|
|
269 };
|
|
270
|
|
271 /**
|
|
272 * @brief Initialize the global state object. This method must both
|
|
273 * complete before referencing any fields in the global state, except
|
|
274 * those specified as "user-settable values".
|
|
275 */
|
|
276 global_state_t* cilkg_init_global_state();
|
|
277
|
|
278 /**
|
|
279 * @brief Publish the global state object, so that
|
|
280 * cilkg_is_published can return true.
|
|
281 *
|
|
282 * @param g - the global state created by cilkg_init_global_state() to
|
|
283 * publish.
|
|
284 *
|
|
285 * After the global state object has been published, a thread should
|
|
286 * not modify this state unless it has exclusive access (i.e., holds
|
|
287 * the global lock).
|
|
288 */
|
|
289 void cilkg_publish_global_state(global_state_t* g);
|
|
290
|
|
291 /**
|
|
292 * @brief Return true if the global state has been fully initialized
|
|
293 * and published, and has not been deinitialized.
|
|
294 */
|
|
295 int cilkg_is_published(void);
|
|
296
|
|
297 /**
|
|
298 * @brief De-initializes the global state object. Must be called to free
|
|
299 * resources when the global state is no longer needed.
|
|
300 */
|
|
301 void cilkg_deinit_global_state(void);
|
|
302
|
|
303 /**
|
|
304 * @brief Returns the global state object. Result is valid only if the
|
|
305 * global state has been published (see cilkg_publish_global_state()).
|
|
306 */
|
|
307 static inline
|
|
308 global_state_t* cilkg_get_global_state(void)
|
|
309 {
|
|
310 // "private" extern declaration:
|
|
311 extern global_state_t *cilkg_singleton_ptr;
|
|
312
|
|
313 __CILKRTS_ASSERT(cilkg_singleton_ptr); // Debug only
|
|
314 return cilkg_singleton_ptr;
|
|
315 }
|
|
316
|
|
317
|
|
318 /**
|
|
319 * @brief Implementation of __cilkrts_set_params.
|
|
320 *
|
|
321 * Set user controllable parameters
|
|
322 * @param param - string specifying parameter to be set
|
|
323 * @param value - string specifying new value
|
|
324 * @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0),
|
|
325 * CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE,
|
|
326 * CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE.
|
|
327 *
|
|
328 * @attention The wide character version __cilkrts_set_param_w() is available
|
|
329 * only on Windows.
|
|
330 *
|
|
331 * Allowable parameter names:
|
|
332 *
|
|
333 * - "nworkers" - number of processors that should run Cilk code.
|
|
334 * The value is a string of digits to be parsed by strtol.
|
|
335 *
|
|
336 * - "force reduce" - test reducer callbacks by allocating new views
|
|
337 * for every spawn within which a reducer is accessed. This can
|
|
338 * significantly reduce performance. The value is "1" or "true"
|
|
339 * to enable, "0" or "false" to disable.
|
|
340 * @warning Enabling "force reduce" when running with more than a single
|
|
341 * worker is currently broken.
|
|
342 *
|
|
343 * - "max user workers" - (Not publicly documented) Sets the number of slots
|
|
344 * allocated for user worker threads
|
|
345 *
|
|
346 * - "local stacks" - (Not publicly documented) Number of stacks we'll hold in
|
|
347 * the per-worker stack cache. Range 1 .. 42. See
|
|
348 * cilkg_init_global_state for details.
|
|
349 *
|
|
350 * - "shared stacks" - (Not publicly documented) Maximum number of stacks
|
|
351 * we'll hold in the global stack cache. Maximum value is 42. See
|
|
352 * __cilkrts_make_global_state for details
|
|
353 *
|
|
354 * - "nstacks" - (Not publicly documented at this time, though it may be
|
|
355 * exposed in the future) Sets the maximum number of stacks permitted at one
|
|
356 * time. If the runtime reaches this maximum, it will cease to allocate
|
|
357 * stacks and the app will lose parallelism. 0 means unlimited. Default is
|
|
358 * unlimited. Minimum is twice the number of worker threads, though that
|
|
359 * cannot be tested at this time.
|
|
360 */
|
|
361 int cilkg_set_param(const char* param, const char* value);
|
|
362 #ifdef _WIN32
|
|
363 /**
|
|
364 * @brief Implementation of __cilkrts_set_params for Unicode characters on
|
|
365 * Windows. See the documentation on @ref cilkg_set_param for more details.
|
|
366 *
|
|
367 * Set user controllable parameters
|
|
368 * @param param - string specifying parameter to be set
|
|
369 * @param value - string specifying new value
|
|
370 * @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0),
|
|
371 * CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE,
|
|
372 * CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE.
|
|
373 */
|
|
374 int cilkg_set_param_w(const wchar_t* param, const wchar_t* value);
|
|
375 #endif
|
|
376
|
|
377 /**
|
|
378 * @brief implementation of __cilkrts_get_nworkers()
|
|
379 */
|
|
380 static inline
|
|
381 int cilkg_get_nworkers(void)
|
|
382 {
|
|
383 // "private" extern declaration
|
|
384 extern global_state_t* cilkg_get_user_settable_values(void);
|
|
385 return cilkg_get_user_settable_values()->P;
|
|
386 }
|
|
387
|
|
388 /**
|
|
389 * @brief implementation of __cilkrts_get_total_workers()
|
|
390 */
|
|
391 static inline
|
|
392 int cilkg_get_total_workers(void)
|
|
393 {
|
|
394 // "private" extern declaration
|
|
395 extern int cilkg_calc_total_workers(void);
|
|
396
|
|
397 // This number can fluctate until initialization so we
|
|
398 // compute it from scratch
|
|
399 return cilkg_calc_total_workers();
|
|
400 }
|
|
401
|
|
402 /**
|
|
403 * @brief implementation of __cilkrts_get_force_reduce()
|
|
404 */
|
|
405 static inline
|
|
406 int cilkg_get_force_reduce(void)
|
|
407 {
|
|
408 // "private" extern declaration
|
|
409 extern global_state_t* cilkg_get_user_settable_values(void);
|
|
410 return cilkg_get_user_settable_values()->force_reduce;
|
|
411 }
|
|
412
|
|
413 /**
|
|
414 * @brief implementation of __cilkrts_get_stack_size()
|
|
415 */
|
|
416 static inline
|
|
417 size_t cilkg_get_stack_size(void)
|
|
418 {
|
|
419 // "private" extern declaration
|
|
420 extern global_state_t* cilkg_get_user_settable_values(void);
|
|
421 return cilkg_get_user_settable_values()->stack_size;
|
|
422 }
|
|
423
|
|
424 /**
|
|
425 * @brief Run the scheduler function stored in the global_state
|
|
426 *
|
|
427 * Look up the scheduler function in global_state and run it. Report a fatal
|
|
428 * error if an exception escapes the scheduler function.
|
|
429 *
|
|
430 * @param w - Worker structure to associate with the current thread.
|
|
431 *
|
|
432 * @attention The scheduler field of the global state must be set before this
|
|
433 * function is called.
|
|
434 */
|
|
435 void __cilkrts_run_scheduler_with_exceptions(__cilkrts_worker *w);
|
|
436
|
|
437 __CILKRTS_END_EXTERN_C
|
|
438
|
|
439 #endif // ! defined(INCLUDED_GLOBAL_STATE_DOT_H)
|