131
|
1 /* Platform-independent deterministic sort function.
|
145
|
2 Copyright (C) 2018-2020 Free Software Foundation, Inc.
|
131
|
3 Contributed by Alexander Monakov.
|
|
4
|
|
5 This file is part of GCC.
|
|
6
|
|
7 GCC is free software; you can redistribute it and/or modify it
|
|
8 under the terms of the GNU General Public License as published by the
|
|
9 Free Software Foundation; either version 3, or (at your option) any
|
|
10 later version.
|
|
11
|
|
12 GCC is distributed in the hope that it will be useful, but WITHOUT
|
|
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
15 for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with GCC; see the file COPYING3. If not see
|
|
19 <http://www.gnu.org/licenses/>. */
|
|
20
|
|
21 /* This implements a sort function suitable for GCC use cases:
|
|
22 - signature-compatible to C qsort, but relaxed contract:
|
|
23 - may apply the comparator to elements in a temporary buffer
|
|
24 - may abort on allocation failure
|
|
25 - deterministic (but not necessarily stable)
|
|
26 - fast, especially for common cases (0-5 elements of size 8 or 4)
|
|
27
|
|
28 The implementation uses a network sort for up to 5 elements and
|
|
29 a merge sort on top of that. Neither stage has branches depending on
|
|
30 comparator result, trading extra arithmetic for branch mispredictions. */
|
|
31
|
|
32 #ifdef GENERATOR_FILE
|
|
33 #include "bconfig.h"
|
|
34 #else
|
|
35 #include "config.h"
|
|
36 #endif
|
|
37
|
|
38 #include "system.h"
|
|
39
|
|
40 #define likely(cond) __builtin_expect ((cond), 1)
|
|
41
|
|
42 #ifdef __GNUC__
|
|
43 #define noinline __attribute__ ((__noinline__))
|
|
44 #else
|
|
45 #define noinline
|
|
46 #endif
|
|
47
|
|
48 /* C-style qsort comparator function type. */
|
|
49 typedef int cmp_fn (const void *, const void *);
|
|
50
|
|
51 /* Structure holding read-mostly (read-only in netsort) context. */
|
|
52 struct sort_ctx
|
|
53 {
|
|
54 cmp_fn *cmp; // pointer to comparator
|
|
55 char *out; // output buffer
|
|
56 size_t n; // number of elements
|
|
57 size_t size; // element size
|
|
58 size_t nlim; // limit for network sort
|
|
59 };
|
|
60
|
145
|
61 /* Like sort_ctx, but for use with qsort_r-style comparators. Several
|
|
62 functions in this file are templates that work with either context type. */
|
|
63 struct sort_r_ctx
|
|
64 {
|
|
65 void *data;
|
|
66 sort_r_cmp_fn *cmp_;
|
|
67 char *out;
|
|
68 size_t n;
|
|
69 size_t size;
|
|
70 size_t nlim;
|
|
71 int cmp (const void *a, const void *b)
|
|
72 {
|
|
73 return cmp_ (a, b, data);
|
|
74 }
|
|
75 };
|
|
76
|
131
|
77 /* Helper for netsort. Permute, possibly in-place, 2 or 3 elements,
|
|
78 placing E0 to C->OUT, E1 to C->OUT + C->SIZE, and so on. */
|
145
|
79 template<typename sort_ctx>
|
131
|
80 static void
|
|
81 reorder23 (sort_ctx *c, char *e0, char *e1, char *e2)
|
|
82 {
|
|
83 #define REORDER_23(TYPE, STRIDE, OFFSET) \
|
|
84 do { \
|
|
85 TYPE t0, t1; \
|
|
86 memcpy (&t0, e0 + OFFSET, sizeof (TYPE)); \
|
|
87 memcpy (&t1, e1 + OFFSET, sizeof (TYPE)); \
|
|
88 char *out = c->out + OFFSET; \
|
|
89 if (likely (c->n == 3)) \
|
|
90 memmove (out + 2*STRIDE, e2 + OFFSET, sizeof (TYPE));\
|
|
91 memcpy (out, &t0, sizeof (TYPE)); out += STRIDE; \
|
|
92 memcpy (out, &t1, sizeof (TYPE)); \
|
|
93 } while (0)
|
|
94
|
|
95 if (likely (c->size == sizeof (size_t)))
|
|
96 REORDER_23 (size_t, sizeof (size_t), 0);
|
|
97 else if (likely (c->size == sizeof (int)))
|
|
98 REORDER_23 (int, sizeof (int), 0);
|
|
99 else
|
|
100 {
|
|
101 size_t offset = 0, step = sizeof (size_t);
|
|
102 for (; offset + step <= c->size; offset += step)
|
|
103 REORDER_23 (size_t, c->size, offset);
|
|
104 for (; offset < c->size; offset++)
|
|
105 REORDER_23 (char, c->size, offset);
|
|
106 }
|
|
107 }
|
|
108
|
|
109 /* Like reorder23, but permute 4 or 5 elements. */
|
145
|
110 template<typename sort_ctx>
|
131
|
111 static void
|
|
112 reorder45 (sort_ctx *c, char *e0, char *e1, char *e2, char *e3, char *e4)
|
|
113 {
|
|
114 #define REORDER_45(TYPE, STRIDE, OFFSET) \
|
|
115 do { \
|
|
116 TYPE t0, t1, t2, t3; \
|
|
117 memcpy (&t0, e0 + OFFSET, sizeof (TYPE)); \
|
|
118 memcpy (&t1, e1 + OFFSET, sizeof (TYPE)); \
|
|
119 memcpy (&t2, e2 + OFFSET, sizeof (TYPE)); \
|
|
120 memcpy (&t3, e3 + OFFSET, sizeof (TYPE)); \
|
|
121 char *out = c->out + OFFSET; \
|
|
122 if (likely (c->n == 5)) \
|
|
123 memmove (out + 4*STRIDE, e4 + OFFSET, sizeof (TYPE));\
|
|
124 memcpy (out, &t0, sizeof (TYPE)); out += STRIDE; \
|
|
125 memcpy (out, &t1, sizeof (TYPE)); out += STRIDE; \
|
|
126 memcpy (out, &t2, sizeof (TYPE)); out += STRIDE; \
|
|
127 memcpy (out, &t3, sizeof (TYPE)); \
|
|
128 } while (0)
|
|
129
|
|
130 if (likely (c->size == sizeof (size_t)))
|
|
131 REORDER_45 (size_t, sizeof (size_t), 0);
|
|
132 else if (likely(c->size == sizeof (int)))
|
|
133 REORDER_45 (int, sizeof (int), 0);
|
|
134 else
|
|
135 {
|
|
136 size_t offset = 0, step = sizeof (size_t);
|
|
137 for (; offset + step <= c->size; offset += step)
|
|
138 REORDER_45 (size_t, c->size, offset);
|
|
139 for (; offset < c->size; offset++)
|
|
140 REORDER_45 (char, c->size, offset);
|
|
141 }
|
|
142 }
|
|
143
|
|
144 /* Helper for netsort. Invoke comparator CMP on E0 and E1.
|
|
145 Return E0^E1 if E0 compares less than E1, zero otherwise.
|
|
146 This is noinline to avoid code growth and confine invocation
|
|
147 to a single call site, assisting indirect branch prediction. */
|
145
|
148 template<typename sort_ctx>
|
131
|
149 noinline static intptr_t
|
145
|
150 cmp1 (char *e0, char *e1, sort_ctx *c)
|
131
|
151 {
|
|
152 intptr_t x = (intptr_t)e0 ^ (intptr_t)e1;
|
145
|
153 return x & (c->cmp (e0, e1) >> 31);
|
131
|
154 }
|
|
155
|
|
156 /* Execute network sort on 2 to 5 elements from IN, placing them into C->OUT.
|
|
157 IN may be equal to C->OUT, in which case elements are sorted in place. */
|
145
|
158 template<typename sort_ctx>
|
131
|
159 static void
|
|
160 netsort (char *in, sort_ctx *c)
|
|
161 {
|
|
162 #define CMP(e0, e1) \
|
|
163 do { \
|
145
|
164 intptr_t x = cmp1 (e1, e0, c); \
|
131
|
165 e0 = (char *)((intptr_t)e0 ^ x); \
|
|
166 e1 = (char *)((intptr_t)e1 ^ x); \
|
|
167 } while (0)
|
|
168
|
|
169 char *e0 = in, *e1 = e0 + c->size, *e2 = e1 + c->size;
|
|
170 CMP (e0, e1);
|
|
171 if (likely (c->n == 3))
|
|
172 {
|
|
173 CMP (e1, e2);
|
|
174 CMP (e0, e1);
|
|
175 }
|
|
176 if (c->n <= 3)
|
|
177 return reorder23 (c, e0, e1, e2);
|
|
178 char *e3 = e2 + c->size, *e4 = e3 + c->size;
|
|
179 if (likely (c->n == 5))
|
|
180 {
|
|
181 CMP (e3, e4);
|
|
182 CMP (e2, e4);
|
|
183 }
|
|
184 CMP (e2, e3);
|
|
185 if (likely (c->n == 5))
|
|
186 {
|
|
187 CMP (e0, e3);
|
|
188 CMP (e1, e4);
|
|
189 }
|
|
190 CMP (e0, e2);
|
|
191 CMP (e1, e3);
|
|
192 CMP (e1, e2);
|
|
193 reorder45 (c, e0, e1, e2, e3, e4);
|
|
194 }
|
|
195
|
|
196 /* Execute merge sort on N elements from IN, placing them into OUT,
|
|
197 using TMP as temporary storage if IN is equal to OUT.
|
|
198 This is a stable sort if netsort is used only for 2 or 3 elements. */
|
145
|
199 template<typename sort_ctx>
|
131
|
200 static void
|
|
201 mergesort (char *in, sort_ctx *c, size_t n, char *out, char *tmp)
|
|
202 {
|
|
203 if (likely (n <= c->nlim))
|
|
204 {
|
|
205 c->out = out;
|
|
206 c->n = n;
|
|
207 return netsort (in, c);
|
|
208 }
|
|
209 size_t nl = n / 2, nr = n - nl, sz = nl * c->size;
|
|
210 char *mid = in + sz, *r = out + sz, *l = in == out ? tmp : in;
|
|
211 /* Sort the right half, outputting to right half of OUT. */
|
|
212 mergesort (mid, c, nr, r, tmp);
|
|
213 /* Sort the left half, leaving left half of OUT free. */
|
|
214 mergesort (in, c, nl, l, mid);
|
|
215 /* Merge sorted halves given by L, R to [OUT, END). */
|
|
216 #define MERGE_ELTSIZE(SIZE) \
|
|
217 do { \
|
|
218 intptr_t mr = c->cmp (r, l) >> 31; \
|
|
219 intptr_t lr = (intptr_t)l ^ (intptr_t)r; \
|
|
220 lr = (intptr_t)l ^ (lr & mr); \
|
|
221 out = (char *)memcpy (out, (char *)lr, SIZE); \
|
|
222 out += SIZE; \
|
|
223 r += mr & SIZE; \
|
|
224 if (r == out) return; \
|
|
225 l += ~mr & SIZE; \
|
|
226 } while (r != end)
|
|
227
|
|
228 if (likely (c->cmp(r, l + (r - out) - c->size) < 0))
|
|
229 {
|
|
230 char *end = out + n * c->size;
|
|
231 if (sizeof (size_t) == 8 && likely (c->size == 8))
|
|
232 MERGE_ELTSIZE (8);
|
|
233 else if (likely (c->size == 4))
|
|
234 MERGE_ELTSIZE (4);
|
|
235 else
|
|
236 MERGE_ELTSIZE (c->size);
|
|
237 }
|
|
238 memcpy (out, l, r - out);
|
|
239 }
|
|
240
|
145
|
241 #if CHECKING_P
|
|
242 /* Adapter for using two-argument comparators in functions expecting the
|
|
243 three-argument sort_r_cmp_fn type. */
|
|
244 static int
|
|
245 cmp2to3 (const void *a, const void *b, void *c)
|
|
246 {
|
|
247 return ((cmp_fn *)c) (a, b);
|
|
248 }
|
|
249 #endif
|
|
250
|
|
251 /* Replacement for C qsort. */
|
131
|
252 void
|
|
253 gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp)
|
|
254 {
|
|
255 if (n < 2)
|
|
256 return;
|
|
257 size_t nlim = 5;
|
|
258 bool stable = (ssize_t) size < 0;
|
|
259 if (stable)
|
|
260 nlim = 3, size = ~size;
|
|
261 char *base = (char *)vbase;
|
|
262 sort_ctx c = {cmp, base, n, size, nlim};
|
|
263 long long scratch[32];
|
|
264 size_t bufsz = (n / 2) * size;
|
|
265 void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz);
|
|
266 mergesort (base, &c, n, base, (char *)buf);
|
|
267 if (buf != scratch)
|
|
268 free (buf);
|
|
269 #if CHECKING_P
|
145
|
270 qsort_chk (vbase, n, size, cmp2to3, (void*)cmp);
|
131
|
271 #endif
|
|
272 }
|
|
273
|
145
|
274 /* Substitute for Glibc qsort_r. */
|
|
275 void
|
|
276 gcc_sort_r (void *vbase, size_t n, size_t size, sort_r_cmp_fn *cmp, void *data)
|
|
277 {
|
|
278 if (n < 2)
|
|
279 return;
|
|
280 char *base = (char *)vbase;
|
|
281 sort_r_ctx c = {data, cmp, base, n, size, 5};
|
|
282 long long scratch[32];
|
|
283 size_t bufsz = (n / 2) * size;
|
|
284 void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz);
|
|
285 mergesort (base, &c, n, base, (char *)buf);
|
|
286 if (buf != scratch)
|
|
287 free (buf);
|
|
288 #if CHECKING_P
|
|
289 qsort_chk (vbase, n, size, cmp, data);
|
|
290 #endif
|
|
291 }
|
|
292
|
|
293 /* Stable sort, signature-compatible to C qsort. */
|
131
|
294 void
|
|
295 gcc_stablesort (void *vbase, size_t n, size_t size, cmp_fn *cmp)
|
|
296 {
|
|
297 gcc_qsort (vbase, n, ~size, cmp);
|
|
298 }
|