111
|
1 /* bitstring.c -- Builtins for HSAIL bitstring instructions.
|
|
2
|
145
|
3 Copyright (C) 2015-2020 Free Software Foundation, Inc.
|
111
|
4 Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com>
|
|
5 for General Processor Tech.
|
|
6
|
|
7 Permission is hereby granted, free of charge, to any person obtaining a
|
|
8 copy of this software and associated documentation files
|
|
9 (the "Software"), to deal in the Software without restriction, including
|
|
10 without limitation the rights to use, copy, modify, merge, publish,
|
|
11 distribute, sublicense, and/or sell copies of the Software, and to
|
|
12 permit persons to whom the Software is furnished to do so, subject to
|
|
13 the following conditions:
|
|
14
|
|
15 The above copyright notice and this permission notice shall be included
|
|
16 in all copies or substantial portions of the Software.
|
|
17
|
|
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
19 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
21 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
|
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
|
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
|
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
25 */
|
|
26
|
|
27 #include <stdint.h>
|
|
28 #include <limits.h>
|
|
29
|
|
30 #define BITEXTRACT(DEST_TYPE, SRC0, SRC1, SRC2) \
|
|
31 uint32_t offset = SRC1 & (sizeof (DEST_TYPE) * 8 - 1); \
|
|
32 uint32_t width = SRC2 & (sizeof (DEST_TYPE) * 8 - 1); \
|
|
33 if (width == 0) \
|
|
34 return 0; \
|
|
35 else \
|
|
36 return (SRC0 << (sizeof (DEST_TYPE) * 8 - width - offset)) \
|
|
37 >> (sizeof (DEST_TYPE) * 8 - width)
|
|
38
|
|
39 uint32_t
|
|
40 __hsail_bitextract_u32 (uint32_t src0, uint32_t src1, uint32_t src2)
|
|
41 {
|
|
42 BITEXTRACT (uint32_t, src0, src1, src2);
|
|
43 }
|
|
44
|
|
45 int32_t
|
|
46 __hsail_bitextract_s32 (int32_t src0, uint32_t src1, uint32_t src2)
|
|
47 {
|
|
48 BITEXTRACT (int32_t, src0, src1, src2);
|
|
49 }
|
|
50
|
|
51 uint64_t
|
|
52 __hsail_bitextract_u64 (uint64_t src0, uint32_t src1, uint32_t src2)
|
|
53 {
|
|
54 BITEXTRACT (uint64_t, src0, src1, src2);
|
|
55 }
|
|
56
|
|
57 int64_t
|
|
58 __hsail_bitextract_s64 (int64_t src0, uint32_t src1, uint32_t src2)
|
|
59 {
|
|
60 BITEXTRACT (int64_t, src0, src1, src2);
|
|
61 }
|
|
62
|
|
63 #define BITINSERT(DEST_TYPE, SRC0, SRC1, SRC2, SRC3) \
|
|
64 uint32_t offset = SRC2 & (sizeof (DEST_TYPE) * 8 - 1); \
|
|
65 uint32_t width = SRC3 & (sizeof (DEST_TYPE) * 8 - 1); \
|
|
66 DEST_TYPE mask = ((DEST_TYPE) 1 << width) - 1; \
|
|
67 return (SRC0 & ~(mask << offset)) | ((SRC1 & mask) << offset)
|
|
68
|
|
69 uint32_t
|
|
70 __hsail_bitinsert_u32 (uint32_t src0, uint32_t src1, uint32_t src2,
|
|
71 uint32_t src3)
|
|
72 {
|
|
73 BITINSERT (uint32_t, src0, src1, src2, src3);
|
|
74 }
|
|
75
|
|
76 int64_t
|
|
77 __hsail_bitinsert_u64 (uint64_t src0, uint64_t src1, uint32_t src2,
|
|
78 uint32_t src3)
|
|
79 {
|
|
80 BITINSERT (uint64_t, src0, src1, src2, src3);
|
|
81 }
|
|
82
|
|
83 #define BITMASK(DEST_TYPE, SRC0, SRC1) \
|
|
84 uint32_t offset = SRC0 & (sizeof (DEST_TYPE) * 8 - 1); \
|
|
85 uint32_t width = SRC1 & (sizeof (DEST_TYPE) * 8 - 1); \
|
|
86 DEST_TYPE mask = ((DEST_TYPE) 1 << width) - 1; \
|
|
87 return mask << offset
|
|
88
|
|
89 uint32_t
|
|
90 __hsail_bitmask_u32 (uint32_t src0, uint32_t src1)
|
|
91 {
|
|
92 BITMASK (uint32_t, src0, src1);
|
|
93 }
|
|
94
|
|
95 uint64_t
|
|
96 __hsail_bitmask_u64 (uint32_t src0, uint32_t src1)
|
|
97 {
|
|
98 BITMASK (uint64_t, src0, src1);
|
|
99 }
|
|
100
|
|
101 /* The dummy, but readable version from
|
|
102 http://graphics.stanford.edu/~seander/bithacks.html#BitReverseObvious
|
|
103 This (also) often maps to a single instruction in DSPs. */
|
|
104
|
|
105 #define BITREV(DEST_TYPE, SRC) \
|
|
106 DEST_TYPE v = SRC; \
|
|
107 DEST_TYPE r = v; \
|
|
108 int s = sizeof (SRC) * CHAR_BIT - 1; \
|
|
109 \
|
|
110 for (v >>= 1; v; v >>= 1) \
|
|
111 { \
|
|
112 r <<= 1; \
|
|
113 r |= v & 1; \
|
|
114 s--; \
|
|
115 } \
|
|
116 return r << s
|
|
117
|
|
118 uint32_t
|
|
119 __hsail_bitrev_u32 (uint32_t src0)
|
|
120 {
|
|
121 BITREV (uint32_t, src0);
|
|
122 }
|
|
123
|
|
124 uint64_t
|
|
125 __hsail_bitrev_u64 (uint64_t src0)
|
|
126 {
|
|
127 BITREV (uint64_t, src0);
|
|
128 }
|
|
129
|
|
130 uint32_t
|
|
131 __hsail_bitselect_u32 (uint32_t src0, uint32_t src1, uint32_t src2)
|
|
132 {
|
|
133 return (src1 & src0) | (src2 & ~src0);
|
|
134 }
|
|
135
|
|
136 uint64_t
|
|
137 __hsail_bitselect_u64 (uint64_t src0, uint64_t src1, uint64_t src2)
|
|
138 {
|
|
139 return (src1 & src0) | (src2 & ~src0);
|
|
140 }
|
|
141
|
|
142 /* Due to the defined behavior with 0, we cannot use the gcc builtin
|
|
143 __builtin_clz* () directly. __builtin_ffs () has defined behavior, but
|
|
144 returns 0 while HSAIL requires to return -1. */
|
|
145
|
|
146 uint32_t
|
|
147 __hsail_firstbit_u32 (uint32_t src0)
|
|
148 {
|
|
149 if (src0 == 0)
|
|
150 return -1;
|
|
151 return __builtin_clz (src0);
|
|
152 }
|
|
153
|
|
154 uint32_t
|
|
155 __hsail_firstbit_s32 (int32_t src0)
|
|
156 {
|
|
157 uint32_t converted = src0 >= 0 ? src0 : ~src0;
|
|
158 return __hsail_firstbit_u32 (converted);
|
|
159 }
|
|
160
|
|
161 uint32_t
|
|
162 __hsail_firstbit_u64 (uint64_t src0)
|
|
163 {
|
|
164 if (src0 == 0)
|
|
165 return -1;
|
|
166 return __builtin_clzl (src0);
|
|
167 }
|
|
168
|
|
169 uint32_t
|
|
170 __hsail_firstbit_s64 (int64_t src0)
|
|
171 {
|
|
172 uint64_t converted = src0 >= 0 ? src0 : ~src0;
|
|
173 return __hsail_firstbit_u64 (converted);
|
|
174 }
|
|
175
|
|
176 uint32_t
|
|
177 __hsail_lastbit_u32 (uint32_t src0)
|
|
178 {
|
|
179 if (src0 == 0)
|
|
180 return -1;
|
|
181 return __builtin_ctz (src0);
|
|
182 }
|
|
183
|
|
184 uint32_t
|
|
185 __hsail_lastbit_u64 (uint64_t src0)
|
|
186 {
|
|
187 if (src0 == 0)
|
|
188 return -1;
|
|
189 return __builtin_ctzl (src0);
|
|
190 }
|