Mercurial > hg > CbC > CbC_gcc
comparison libgomp/testsuite/libgomp.c/sort-1.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 04ced10e8804 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Test and benchmark of a couple of parallel sorting algorithms. | |
2 Copyright (C) 2008 Free Software Foundation, Inc. | |
3 | |
4 GCC is free software; you can redistribute it and/or modify it under | |
5 the terms of the GNU General Public License as published by the Free | |
6 Software Foundation; either version 3, or (at your option) any later | |
7 version. | |
8 | |
9 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 for more details. | |
13 | |
14 You should have received a copy of the GNU General Public License | |
15 along with GCC; see the file COPYING3. If not see | |
16 <http://www.gnu.org/licenses/>. */ | |
17 | |
18 #include <limits.h> | |
19 #include <omp.h> | |
20 #include <stdbool.h> | |
21 #include <stdio.h> | |
22 #include <stdlib.h> | |
23 #include <string.h> | |
24 | |
25 int failures; | |
26 | |
27 #define THRESHOLD 100 | |
28 | |
29 static void | |
30 verify (const char *name, double stime, int *array, int count) | |
31 { | |
32 int i; | |
33 double etime = omp_get_wtime (); | |
34 | |
35 printf ("%s: %g\n", name, etime - stime); | |
36 for (i = 1; i < count; i++) | |
37 if (array[i] < array[i - 1]) | |
38 { | |
39 printf ("%s: incorrectly sorted\n", name); | |
40 failures = 1; | |
41 } | |
42 } | |
43 | |
44 static void | |
45 insertsort (int *array, int s, int e) | |
46 { | |
47 int i, j, val; | |
48 for (i = s + 1; i <= e; i++) | |
49 { | |
50 val = array[i]; | |
51 j = i; | |
52 while (j-- > s && val < array[j]) | |
53 array[j + 1] = array[j]; | |
54 array[j + 1] = val; | |
55 } | |
56 } | |
57 | |
58 struct int_pair | |
59 { | |
60 int lo; | |
61 int hi; | |
62 }; | |
63 | |
64 struct int_pair_stack | |
65 { | |
66 struct int_pair *top; | |
67 #define STACK_SIZE 4 * CHAR_BIT * sizeof (int) | |
68 struct int_pair arr[STACK_SIZE]; | |
69 }; | |
70 | |
71 static inline void | |
72 init_int_pair_stack (struct int_pair_stack *stack) | |
73 { | |
74 stack->top = &stack->arr[0]; | |
75 } | |
76 | |
77 static inline void | |
78 push_int_pair_stack (struct int_pair_stack *stack, int lo, int hi) | |
79 { | |
80 stack->top->lo = lo; | |
81 stack->top->hi = hi; | |
82 stack->top++; | |
83 } | |
84 | |
85 static inline void | |
86 pop_int_pair_stack (struct int_pair_stack *stack, int *lo, int *hi) | |
87 { | |
88 stack->top--; | |
89 *lo = stack->top->lo; | |
90 *hi = stack->top->hi; | |
91 } | |
92 | |
93 static inline int | |
94 size_int_pair_stack (struct int_pair_stack *stack) | |
95 { | |
96 return stack->top - &stack->arr[0]; | |
97 } | |
98 | |
99 static inline void | |
100 busy_wait (void) | |
101 { | |
102 #if defined __i386__ || defined __x86_64__ | |
103 __asm volatile ("rep; nop" : : : "memory"); | |
104 #elif defined __ia64__ | |
105 __asm volatile ("hint @pause" : : : "memory"); | |
106 #elif defined __sparc__ && (defined __arch64__ || defined __sparc_v9__) | |
107 __asm volatile ("membar #LoadLoad" : : : "memory"); | |
108 #else | |
109 __asm volatile ("" : : : "memory"); | |
110 #endif | |
111 } | |
112 | |
113 static inline void | |
114 swap (int *array, int a, int b) | |
115 { | |
116 int val = array[a]; | |
117 array[a] = array[b]; | |
118 array[b] = val; | |
119 } | |
120 | |
121 static inline int | |
122 choose_pivot (int *array, int lo, int hi) | |
123 { | |
124 int mid = (lo + hi) / 2; | |
125 | |
126 if (array[mid] < array[lo]) | |
127 swap (array, lo, mid); | |
128 if (array[hi] < array[mid]) | |
129 { | |
130 swap (array, mid, hi); | |
131 if (array[mid] < array[lo]) | |
132 swap (array, lo, mid); | |
133 } | |
134 return array[mid]; | |
135 } | |
136 | |
137 static inline int | |
138 partition (int *array, int lo, int hi) | |
139 { | |
140 int pivot = choose_pivot (array, lo, hi); | |
141 int left = lo; | |
142 int right = hi; | |
143 | |
144 for (;;) | |
145 { | |
146 while (array[++left] < pivot); | |
147 while (array[--right] > pivot); | |
148 if (left >= right) | |
149 break; | |
150 swap (array, left, right); | |
151 } | |
152 return left; | |
153 } | |
154 | |
155 static void | |
156 sort1 (int *array, int count) | |
157 { | |
158 omp_lock_t lock; | |
159 struct int_pair_stack global_stack; | |
160 int busy = 1; | |
161 int num_threads; | |
162 | |
163 omp_init_lock (&lock); | |
164 init_int_pair_stack (&global_stack); | |
165 #pragma omp parallel firstprivate (array, count) | |
166 { | |
167 int lo = 0, hi = 0, mid, next_lo, next_hi; | |
168 bool idle = true; | |
169 struct int_pair_stack local_stack; | |
170 | |
171 init_int_pair_stack (&local_stack); | |
172 if (omp_get_thread_num () == 0) | |
173 { | |
174 num_threads = omp_get_num_threads (); | |
175 hi = count - 1; | |
176 idle = false; | |
177 } | |
178 | |
179 for (;;) | |
180 { | |
181 if (hi - lo < THRESHOLD) | |
182 { | |
183 insertsort (array, lo, hi); | |
184 lo = hi; | |
185 } | |
186 if (lo >= hi) | |
187 { | |
188 if (size_int_pair_stack (&local_stack) == 0) | |
189 { | |
190 again: | |
191 omp_set_lock (&lock); | |
192 if (size_int_pair_stack (&global_stack) == 0) | |
193 { | |
194 if (!idle) | |
195 busy--; | |
196 if (busy == 0) | |
197 { | |
198 omp_unset_lock (&lock); | |
199 break; | |
200 } | |
201 omp_unset_lock (&lock); | |
202 idle = true; | |
203 while (size_int_pair_stack (&global_stack) == 0 | |
204 && busy) | |
205 busy_wait (); | |
206 goto again; | |
207 } | |
208 if (idle) | |
209 busy++; | |
210 pop_int_pair_stack (&global_stack, &lo, &hi); | |
211 omp_unset_lock (&lock); | |
212 idle = false; | |
213 } | |
214 else | |
215 pop_int_pair_stack (&local_stack, &lo, &hi); | |
216 } | |
217 | |
218 mid = partition (array, lo, hi); | |
219 if (mid - lo < hi - mid) | |
220 { | |
221 next_lo = mid; | |
222 next_hi = hi; | |
223 hi = mid - 1; | |
224 } | |
225 else | |
226 { | |
227 next_lo = lo; | |
228 next_hi = mid - 1; | |
229 lo = mid; | |
230 } | |
231 | |
232 if (next_hi - next_lo < THRESHOLD) | |
233 insertsort (array, next_lo, next_hi); | |
234 else | |
235 { | |
236 if (size_int_pair_stack (&global_stack) < num_threads - 1) | |
237 { | |
238 int size; | |
239 | |
240 omp_set_lock (&lock); | |
241 size = size_int_pair_stack (&global_stack); | |
242 if (size < num_threads - 1 && size < STACK_SIZE) | |
243 push_int_pair_stack (&global_stack, next_lo, next_hi); | |
244 else | |
245 push_int_pair_stack (&local_stack, next_lo, next_hi); | |
246 omp_unset_lock (&lock); | |
247 } | |
248 else | |
249 push_int_pair_stack (&local_stack, next_lo, next_hi); | |
250 } | |
251 } | |
252 } | |
253 omp_destroy_lock (&lock); | |
254 } | |
255 | |
256 static void | |
257 sort2_1 (int *array, int lo, int hi, int num_threads, int *busy) | |
258 { | |
259 int mid; | |
260 | |
261 if (hi - lo < THRESHOLD) | |
262 { | |
263 insertsort (array, lo, hi); | |
264 return; | |
265 } | |
266 | |
267 mid = partition (array, lo, hi); | |
268 | |
269 if (*busy >= num_threads) | |
270 { | |
271 sort2_1 (array, lo, mid - 1, num_threads, busy); | |
272 sort2_1 (array, mid, hi, num_threads, busy); | |
273 return; | |
274 } | |
275 | |
276 #pragma omp atomic | |
277 *busy += 1; | |
278 | |
279 #pragma omp parallel num_threads (2) \ | |
280 firstprivate (array, lo, hi, mid, num_threads, busy) | |
281 { | |
282 if (omp_get_thread_num () == 0) | |
283 sort2_1 (array, lo, mid - 1, num_threads, busy); | |
284 else | |
285 { | |
286 sort2_1 (array, mid, hi, num_threads, busy); | |
287 #pragma omp atomic | |
288 *busy -= 1; | |
289 } | |
290 } | |
291 } | |
292 | |
293 static void | |
294 sort2 (int *array, int count) | |
295 { | |
296 int num_threads; | |
297 int busy = 1; | |
298 | |
299 #pragma omp parallel | |
300 #pragma omp single nowait | |
301 num_threads = omp_get_num_threads (); | |
302 | |
303 sort2_1 (array, 0, count - 1, num_threads, &busy); | |
304 } | |
305 | |
306 #if _OPENMP >= 200805 | |
307 static void | |
308 sort3_1 (int *array, int lo, int hi) | |
309 { | |
310 int mid; | |
311 | |
312 if (hi - lo < THRESHOLD) | |
313 { | |
314 insertsort (array, lo, hi); | |
315 return; | |
316 } | |
317 | |
318 mid = partition (array, lo, hi); | |
319 #pragma omp task | |
320 sort3_1 (array, lo, mid - 1); | |
321 sort3_1 (array, mid, hi); | |
322 } | |
323 | |
324 static void | |
325 sort3 (int *array, int count) | |
326 { | |
327 #pragma omp parallel | |
328 #pragma omp single | |
329 sort3_1 (array, 0, count - 1); | |
330 } | |
331 #endif | |
332 | |
333 int | |
334 main (int argc, char **argv) | |
335 { | |
336 int i, count = 1000000; | |
337 double stime; | |
338 int *unsorted, *sorted, num_threads; | |
339 if (argc >= 2) | |
340 count = strtoul (argv[1], NULL, 0); | |
341 | |
342 unsorted = malloc (count * sizeof (int)); | |
343 sorted = malloc (count * sizeof (int)); | |
344 if (unsorted == NULL || sorted == NULL) | |
345 { | |
346 puts ("allocation failure"); | |
347 exit (1); | |
348 } | |
349 | |
350 srand (0xdeadbeef); | |
351 for (i = 0; i < count; i++) | |
352 unsorted[i] = rand (); | |
353 | |
354 omp_set_nested (1); | |
355 omp_set_dynamic (0); | |
356 #pragma omp parallel | |
357 #pragma omp single nowait | |
358 num_threads = omp_get_num_threads (); | |
359 printf ("Threads: %d\n", num_threads); | |
360 | |
361 memcpy (sorted, unsorted, count * sizeof (int)); | |
362 stime = omp_get_wtime (); | |
363 sort1 (sorted, count); | |
364 verify ("sort1", stime, sorted, count); | |
365 | |
366 memcpy (sorted, unsorted, count * sizeof (int)); | |
367 stime = omp_get_wtime (); | |
368 sort2 (sorted, count); | |
369 verify ("sort2", stime, sorted, count); | |
370 | |
371 #if _OPENMP >= 200805 | |
372 memcpy (sorted, unsorted, count * sizeof (int)); | |
373 stime = omp_get_wtime (); | |
374 sort3 (sorted, count); | |
375 verify ("sort3", stime, sorted, count); | |
376 #endif | |
377 | |
378 return 0; | |
379 } |