Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/arm/neon.md @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
111:04ced10e8804 | 131:84e7813d76e9 |
---|---|
1 ;; ARM NEON coprocessor Machine Description | 1 ;; ARM NEON coprocessor Machine Description |
2 ;; Copyright (C) 2006-2017 Free Software Foundation, Inc. | 2 ;; Copyright (C) 2006-2018 Free Software Foundation, Inc. |
3 ;; Written by CodeSourcery. | 3 ;; Written by CodeSourcery. |
4 ;; | 4 ;; |
5 ;; This file is part of GCC. | 5 ;; This file is part of GCC. |
6 ;; | 6 ;; |
7 ;; GCC is free software; you can redistribute it and/or modify it | 7 ;; GCC is free software; you can redistribute it and/or modify it |
23 ;; type attribute definitions. | 23 ;; type attribute definitions. |
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd")) | 24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd")) |
25 | 25 |
26 (define_insn "*neon_mov<mode>" | 26 (define_insn "*neon_mov<mode>" |
27 [(set (match_operand:VDX 0 "nonimmediate_operand" | 27 [(set (match_operand:VDX 0 "nonimmediate_operand" |
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us") | 28 "=w,Un,w, w, ?r,?w,?r, ?Us") |
29 (match_operand:VDX 1 "general_operand" | 29 (match_operand:VDX 1 "general_operand" |
30 " w,w, Dn,Uni, w, r, r, Usi,r"))] | 30 " w,w, Dn,Uni, w, r, Usi,r"))] |
31 "TARGET_NEON | 31 "TARGET_NEON |
32 && (register_operand (operands[0], <MODE>mode) | 32 && (register_operand (operands[0], <MODE>mode) |
33 || register_operand (operands[1], <MODE>mode))" | 33 || register_operand (operands[1], <MODE>mode))" |
34 { | 34 { |
35 if (which_alternative == 2) | 35 if (which_alternative == 2) |
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>"; | 59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>"; |
60 default: return output_move_double (operands, true, NULL); | 60 default: return output_move_double (operands, true, NULL); |
61 } | 61 } |
62 } | 62 } |
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\ | 63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\ |
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\ | 64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,\ |
65 neon_load1_2reg, neon_store1_2reg") | 65 neon_load1_2reg, neon_store1_2reg") |
66 (set_attr "length" "4,4,4,4,4,4,8,8,8") | 66 (set_attr "length" "4,4,4,4,4,4,8,8") |
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*") | 67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,1020,*") |
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*") | 68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,1018,*") |
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")]) | 69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,1004,*")]) |
70 | 70 |
71 (define_insn "*neon_mov<mode>" | 71 (define_insn "*neon_mov<mode>" |
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand" | 72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand" |
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us") | 73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us") |
74 (match_operand:VQXMOV 1 "general_operand" | 74 (match_operand:VQXMOV 1 "general_operand" |
111 (set_attr "length" "4,8,4,8,8,8,16,8,16") | 111 (set_attr "length" "4,8,4,8,8,8,16,8,16") |
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*") | 112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*") |
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*") | 113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*") |
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")]) | 114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")]) |
115 | 115 |
116 /* We define these mov expanders to match the standard mov$a optab to prevent | |
117 the mid-end from trying to do a subreg for these modes which is the most | |
118 inefficient way to expand the move. Also big-endian subreg's aren't | |
119 allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS. | |
120 Without these RTL generation patterns the mid-end would attempt to take a | |
121 sub-reg and may ICE if it can't. */ | |
122 | |
116 (define_expand "movti" | 123 (define_expand "movti" |
117 [(set (match_operand:TI 0 "nonimmediate_operand" "") | 124 [(set (match_operand:TI 0 "nonimmediate_operand" "") |
118 (match_operand:TI 1 "general_operand" ""))] | 125 (match_operand:TI 1 "general_operand" ""))] |
119 "TARGET_NEON" | 126 "TARGET_NEON" |
120 { | 127 { |
135 if (!REG_P (operands[0])) | 142 if (!REG_P (operands[0])) |
136 operands[1] = force_reg (<MODE>mode, operands[1]); | 143 operands[1] = force_reg (<MODE>mode, operands[1]); |
137 } | 144 } |
138 }) | 145 }) |
139 | 146 |
140 (define_expand "movv4hf" | 147 (define_expand "mov<mode>" |
141 [(set (match_operand:V4HF 0 "s_register_operand") | 148 [(set (match_operand:VH 0 "s_register_operand") |
142 (match_operand:V4HF 1 "s_register_operand"))] | 149 (match_operand:VH 1 "s_register_operand"))] |
143 "TARGET_NEON && TARGET_FP16" | 150 "TARGET_NEON" |
144 { | 151 { |
145 /* We need to use force_reg to avoid TARGET_CAN_CHANGE_MODE_CLASS | |
146 causing an ICE on big-endian because it cannot extract subregs in | |
147 this case. */ | |
148 if (can_create_pseudo_p ()) | 152 if (can_create_pseudo_p ()) |
149 { | 153 { |
150 if (!REG_P (operands[0])) | 154 if (!REG_P (operands[0])) |
151 operands[1] = force_reg (V4HFmode, operands[1]); | 155 operands[1] = force_reg (<MODE>mode, operands[1]); |
152 } | |
153 }) | |
154 | |
155 (define_expand "movv8hf" | |
156 [(set (match_operand:V8HF 0 "") | |
157 (match_operand:V8HF 1 ""))] | |
158 "TARGET_NEON && TARGET_FP16" | |
159 { | |
160 /* We need to use force_reg to avoid TARGET_CAN_CHANGE_MODE_CLASS | |
161 causing an ICE on big-endian because it cannot extract subregs in | |
162 this case. */ | |
163 if (can_create_pseudo_p ()) | |
164 { | |
165 if (!REG_P (operands[0])) | |
166 operands[1] = force_reg (V8HFmode, operands[1]); | |
167 } | 156 } |
168 }) | 157 }) |
169 | 158 |
170 (define_insn "*neon_mov<mode>" | 159 (define_insn "*neon_mov<mode>" |
171 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w") | 160 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w") |
1178 vshl.u64\t%P0, %P1, %P2" | 1167 vshl.u64\t%P0, %P1, %P2" |
1179 [(set_attr "type" "neon_shift_imm, neon_shift_reg")] | 1168 [(set_attr "type" "neon_shift_imm, neon_shift_reg")] |
1180 ) | 1169 ) |
1181 | 1170 |
1182 (define_insn_and_split "ashldi3_neon" | 1171 (define_insn_and_split "ashldi3_neon" |
1183 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r, ?w,w") | 1172 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r, ?w,?w") |
1184 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w,w") | 1173 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w, w") |
1185 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm,i"))) | 1174 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm, i"))) |
1186 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X, X,X")) | 1175 (clobber (match_scratch:SI 3 "= X, X, &r, X, X, X, X")) |
1187 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X, X,X")) | 1176 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X")) |
1188 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w,X")) | 1177 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w, X")) |
1189 (clobber (reg:CC_C CC_REGNUM))] | 1178 (clobber (reg:CC_C CC_REGNUM))] |
1190 "TARGET_NEON" | 1179 "TARGET_NEON" |
1191 "#" | 1180 "#" |
1192 "TARGET_NEON && reload_completed" | 1181 "TARGET_NEON && reload_completed" |
1193 [(const_int 0)] | 1182 [(const_int 0)] |
1219 { | 1208 { |
1220 /* The shift expanders support either full overlap or no overlap. */ | 1209 /* The shift expanders support either full overlap or no overlap. */ |
1221 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) | 1210 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) |
1222 || REGNO (operands[0]) == REGNO (operands[1])); | 1211 || REGNO (operands[0]) == REGNO (operands[1])); |
1223 | 1212 |
1224 if (operands[2] == CONST1_RTX (SImode)) | 1213 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1], |
1225 /* This clobbers CC. */ | 1214 operands[2], operands[3], operands[4]); |
1226 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1])); | |
1227 else | |
1228 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1], | |
1229 operands[2], operands[3], operands[4]); | |
1230 } | 1215 } |
1231 DONE; | 1216 DONE; |
1232 }" | 1217 }" |
1233 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") | 1218 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") |
1234 (set_attr "opt" "*,*,speed,speed,speed,*,*") | 1219 (set_attr "opt" "*,*,speed,speed,speed,*,*") |
1278 ) | 1263 ) |
1279 | 1264 |
1280 ;; ashrdi3_neon | 1265 ;; ashrdi3_neon |
1281 ;; lshrdi3_neon | 1266 ;; lshrdi3_neon |
1282 (define_insn_and_split "<shift>di3_neon" | 1267 (define_insn_and_split "<shift>di3_neon" |
1283 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r,?w,?w") | 1268 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r,?w,?w") |
1284 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w") | 1269 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w") |
1285 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i"))) | 1270 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i"))) |
1286 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X")) | 1271 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X")) |
1287 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X")) | 1272 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X")) |
1288 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X")) | 1273 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X")) |
1323 { | 1308 { |
1324 /* The shift expanders support either full overlap or no overlap. */ | 1309 /* The shift expanders support either full overlap or no overlap. */ |
1325 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) | 1310 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) |
1326 || REGNO (operands[0]) == REGNO (operands[1])); | 1311 || REGNO (operands[0]) == REGNO (operands[1])); |
1327 | 1312 |
1328 if (operands[2] == CONST1_RTX (SImode)) | 1313 /* This clobbers CC (ASHIFTRT by register only). */ |
1329 /* This clobbers CC. */ | 1314 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1], |
1330 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1])); | 1315 operands[2], operands[3], operands[4]); |
1331 else | |
1332 /* This clobbers CC (ASHIFTRT by register only). */ | |
1333 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1], | |
1334 operands[2], operands[3], operands[4]); | |
1335 } | 1316 } |
1336 | 1317 |
1337 DONE; | 1318 DONE; |
1338 }" | 1319 }" |
1339 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") | 1320 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") |
2296 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3], | 2277 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3], |
2297 operands[1])); | 2278 operands[1])); |
2298 DONE; | 2279 DONE; |
2299 }) | 2280 }) |
2300 | 2281 |
2282 ;; The expand RTL structure here is not important. | |
2283 ;; We use the gen_* functions anyway. | |
2284 ;; We just need something to wrap the iterators around. | |
2285 | |
2286 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>" | |
2287 [(set (match_operand:VCVTF 0 "s_register_operand") | |
2288 (unspec:VCVTF | |
2289 [(match_operand:VCVTF 1 "s_register_operand") | |
2290 (PLUSMINUS:<VFML> | |
2291 (match_operand:<VFML> 2 "s_register_operand") | |
2292 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))] | |
2293 "TARGET_FP16FML" | |
2294 { | |
2295 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>); | |
2296 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0], | |
2297 operands[1], | |
2298 operands[2], | |
2299 operands[3], | |
2300 half, half)); | |
2301 DONE; | |
2302 }) | |
2303 | |
2304 (define_insn "vfmal_low<mode>_intrinsic" | |
2305 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2306 (fma:VCVTF | |
2307 (float_extend:VCVTF | |
2308 (vec_select:<VFMLSEL> | |
2309 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2310 (match_operand:<VFML> 4 "vect_par_constant_low" ""))) | |
2311 (float_extend:VCVTF | |
2312 (vec_select:<VFMLSEL> | |
2313 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") | |
2314 (match_operand:<VFML> 5 "vect_par_constant_low" ""))) | |
2315 (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2316 "TARGET_FP16FML" | |
2317 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3" | |
2318 [(set_attr "type" "neon_fp_mla_s<q>")] | |
2319 ) | |
2320 | |
2321 (define_insn "vfmsl_high<mode>_intrinsic" | |
2322 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2323 (fma:VCVTF | |
2324 (float_extend:VCVTF | |
2325 (neg:<VFMLSEL> | |
2326 (vec_select:<VFMLSEL> | |
2327 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2328 (match_operand:<VFML> 4 "vect_par_constant_high" "")))) | |
2329 (float_extend:VCVTF | |
2330 (vec_select:<VFMLSEL> | |
2331 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") | |
2332 (match_operand:<VFML> 5 "vect_par_constant_high" ""))) | |
2333 (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2334 "TARGET_FP16FML" | |
2335 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3" | |
2336 [(set_attr "type" "neon_fp_mla_s<q>")] | |
2337 ) | |
2338 | |
2339 (define_insn "vfmal_high<mode>_intrinsic" | |
2340 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2341 (fma:VCVTF | |
2342 (float_extend:VCVTF | |
2343 (vec_select:<VFMLSEL> | |
2344 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2345 (match_operand:<VFML> 4 "vect_par_constant_high" ""))) | |
2346 (float_extend:VCVTF | |
2347 (vec_select:<VFMLSEL> | |
2348 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") | |
2349 (match_operand:<VFML> 5 "vect_par_constant_high" ""))) | |
2350 (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2351 "TARGET_FP16FML" | |
2352 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3" | |
2353 [(set_attr "type" "neon_fp_mla_s<q>")] | |
2354 ) | |
2355 | |
2356 (define_insn "vfmsl_low<mode>_intrinsic" | |
2357 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2358 (fma:VCVTF | |
2359 (float_extend:VCVTF | |
2360 (neg:<VFMLSEL> | |
2361 (vec_select:<VFMLSEL> | |
2362 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2363 (match_operand:<VFML> 4 "vect_par_constant_low" "")))) | |
2364 (float_extend:VCVTF | |
2365 (vec_select:<VFMLSEL> | |
2366 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") | |
2367 (match_operand:<VFML> 5 "vect_par_constant_low" ""))) | |
2368 (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2369 "TARGET_FP16FML" | |
2370 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3" | |
2371 [(set_attr "type" "neon_fp_mla_s<q>")] | |
2372 ) | |
2373 | |
2374 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>" | |
2375 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand") | |
2376 (unspec:VCVTF | |
2377 [(match_operand:VCVTF 1 "s_register_operand") | |
2378 (PLUSMINUS:<VFML> | |
2379 (match_operand:<VFML> 2 "s_register_operand") | |
2380 (match_operand:<VFML> 3 "s_register_operand")) | |
2381 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))] | |
2382 "TARGET_FP16FML" | |
2383 { | |
2384 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4]))); | |
2385 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>); | |
2386 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic | |
2387 (operands[0], operands[1], | |
2388 operands[2], operands[3], | |
2389 half, lane)); | |
2390 DONE; | |
2391 }) | |
2392 | |
2393 (define_insn "vfmal_lane_low<mode>_intrinsic" | |
2394 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2395 (fma:VCVTF | |
2396 (float_extend:VCVTF | |
2397 (vec_select:<VFMLSEL> | |
2398 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2399 (match_operand:<VFML> 4 "vect_par_constant_low" ""))) | |
2400 (float_extend:VCVTF | |
2401 (vec_duplicate:<VFMLSEL> | |
2402 (vec_select:HF | |
2403 (match_operand:<VFML> 3 "s_register_operand" "x") | |
2404 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2405 (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2406 "TARGET_FP16FML" | |
2407 { | |
2408 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); | |
2409 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) | |
2410 { | |
2411 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); | |
2412 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]"; | |
2413 } | |
2414 else | |
2415 { | |
2416 operands[5] = GEN_INT (lane); | |
2417 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]"; | |
2418 } | |
2419 } | |
2420 [(set_attr "type" "neon_fp_mla_s<q>")] | |
2421 ) | |
2422 | |
2423 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>" | |
2424 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand") | |
2425 (unspec:VCVTF | |
2426 [(match_operand:VCVTF 1 "s_register_operand") | |
2427 (PLUSMINUS:<VFML> | |
2428 (match_operand:<VFML> 2 "s_register_operand") | |
2429 (match_operand:<VFMLSEL2> 3 "s_register_operand")) | |
2430 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))] | |
2431 "TARGET_FP16FML" | |
2432 { | |
2433 rtx lane | |
2434 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4]))); | |
2435 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>); | |
2436 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic | |
2437 (operands[0], operands[1], operands[2], operands[3], | |
2438 half, lane)); | |
2439 DONE; | |
2440 }) | |
2441 | |
2442 ;; Used to implement the intrinsics: | |
2443 ;; float32x4_t vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) | |
2444 ;; float32x2_t vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) | |
2445 ;; Needs a bit of care to get the modes of the different sub-expressions right | |
2446 ;; due to 'a' and 'b' having different sizes and make sure we use the right | |
2447 ;; S or D subregister to select the appropriate lane from. | |
2448 | |
2449 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic" | |
2450 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2451 (fma:VCVTF | |
2452 (float_extend:VCVTF | |
2453 (vec_select:<VFMLSEL> | |
2454 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2455 (match_operand:<VFML> 4 "vect_par_constant_low" ""))) | |
2456 (float_extend:VCVTF | |
2457 (vec_duplicate:<VFMLSEL> | |
2458 (vec_select:HF | |
2459 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") | |
2460 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2461 (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2462 "TARGET_FP16FML" | |
2463 { | |
2464 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); | |
2465 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); | |
2466 int new_lane = lane % elts_per_reg; | |
2467 int regdiff = lane / elts_per_reg; | |
2468 operands[5] = GEN_INT (new_lane); | |
2469 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes | |
2470 because we want the print_operand code to print the appropriate | |
2471 S or D register prefix. */ | |
2472 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); | |
2473 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2])); | |
2474 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]"; | |
2475 } | |
2476 [(set_attr "type" "neon_fp_mla_s<q>")] | |
2477 ) | |
2478 | |
2479 ;; Used to implement the intrinsics: | |
2480 ;; float32x4_t vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) | |
2481 ;; float32x2_t vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) | |
2482 ;; Needs a bit of care to get the modes of the different sub-expressions right | |
2483 ;; due to 'a' and 'b' having different sizes and make sure we use the right | |
2484 ;; S or D subregister to select the appropriate lane from. | |
2485 | |
2486 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic" | |
2487 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2488 (fma:VCVTF | |
2489 (float_extend:VCVTF | |
2490 (vec_select:<VFMLSEL> | |
2491 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2492 (match_operand:<VFML> 4 "vect_par_constant_high" ""))) | |
2493 (float_extend:VCVTF | |
2494 (vec_duplicate:<VFMLSEL> | |
2495 (vec_select:HF | |
2496 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") | |
2497 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2498 (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2499 "TARGET_FP16FML" | |
2500 { | |
2501 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); | |
2502 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); | |
2503 int new_lane = lane % elts_per_reg; | |
2504 int regdiff = lane / elts_per_reg; | |
2505 operands[5] = GEN_INT (new_lane); | |
2506 /* We re-create operands[3] in the halved VFMLSEL mode | |
2507 because we've calculated the correct half-width subreg to extract | |
2508 the lane from and we want to print *that* subreg instead. */ | |
2509 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); | |
2510 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]"; | |
2511 } | |
2512 [(set_attr "type" "neon_fp_mla_s<q>")] | |
2513 ) | |
2514 | |
2515 (define_insn "vfmal_lane_high<mode>_intrinsic" | |
2516 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2517 (fma:VCVTF | |
2518 (float_extend:VCVTF | |
2519 (vec_select:<VFMLSEL> | |
2520 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2521 (match_operand:<VFML> 4 "vect_par_constant_high" ""))) | |
2522 (float_extend:VCVTF | |
2523 (vec_duplicate:<VFMLSEL> | |
2524 (vec_select:HF | |
2525 (match_operand:<VFML> 3 "s_register_operand" "x") | |
2526 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2527 (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2528 "TARGET_FP16FML" | |
2529 { | |
2530 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); | |
2531 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) | |
2532 { | |
2533 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); | |
2534 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]"; | |
2535 } | |
2536 else | |
2537 { | |
2538 operands[5] = GEN_INT (lane); | |
2539 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]"; | |
2540 } | |
2541 } | |
2542 [(set_attr "type" "neon_fp_mla_s<q>")] | |
2543 ) | |
2544 | |
2545 (define_insn "vfmsl_lane_low<mode>_intrinsic" | |
2546 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2547 (fma:VCVTF | |
2548 (float_extend:VCVTF | |
2549 (neg:<VFMLSEL> | |
2550 (vec_select:<VFMLSEL> | |
2551 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2552 (match_operand:<VFML> 4 "vect_par_constant_low" "")))) | |
2553 (float_extend:VCVTF | |
2554 (vec_duplicate:<VFMLSEL> | |
2555 (vec_select:HF | |
2556 (match_operand:<VFML> 3 "s_register_operand" "x") | |
2557 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2558 (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2559 "TARGET_FP16FML" | |
2560 { | |
2561 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); | |
2562 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) | |
2563 { | |
2564 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); | |
2565 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]"; | |
2566 } | |
2567 else | |
2568 { | |
2569 operands[5] = GEN_INT (lane); | |
2570 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]"; | |
2571 } | |
2572 } | |
2573 [(set_attr "type" "neon_fp_mla_s<q>")] | |
2574 ) | |
2575 | |
2576 ;; Used to implement the intrinsics: | |
2577 ;; float32x4_t vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) | |
2578 ;; float32x2_t vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) | |
2579 ;; Needs a bit of care to get the modes of the different sub-expressions right | |
2580 ;; due to 'a' and 'b' having different sizes and make sure we use the right | |
2581 ;; S or D subregister to select the appropriate lane from. | |
2582 | |
2583 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic" | |
2584 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2585 (fma:VCVTF | |
2586 (float_extend:VCVTF | |
2587 (neg:<VFMLSEL> | |
2588 (vec_select:<VFMLSEL> | |
2589 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2590 (match_operand:<VFML> 4 "vect_par_constant_low" "")))) | |
2591 (float_extend:VCVTF | |
2592 (vec_duplicate:<VFMLSEL> | |
2593 (vec_select:HF | |
2594 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") | |
2595 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2596 (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2597 "TARGET_FP16FML" | |
2598 { | |
2599 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); | |
2600 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); | |
2601 int new_lane = lane % elts_per_reg; | |
2602 int regdiff = lane / elts_per_reg; | |
2603 operands[5] = GEN_INT (new_lane); | |
2604 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes | |
2605 because we want the print_operand code to print the appropriate | |
2606 S or D register prefix. */ | |
2607 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); | |
2608 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2])); | |
2609 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]"; | |
2610 } | |
2611 [(set_attr "type" "neon_fp_mla_s<q>")] | |
2612 ) | |
2613 | |
2614 ;; Used to implement the intrinsics: | |
2615 ;; float32x4_t vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) | |
2616 ;; float32x2_t vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) | |
2617 ;; Needs a bit of care to get the modes of the different sub-expressions right | |
2618 ;; due to 'a' and 'b' having different sizes and make sure we use the right | |
2619 ;; S or D subregister to select the appropriate lane from. | |
2620 | |
2621 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic" | |
2622 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2623 (fma:VCVTF | |
2624 (float_extend:VCVTF | |
2625 (neg:<VFMLSEL> | |
2626 (vec_select:<VFMLSEL> | |
2627 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2628 (match_operand:<VFML> 4 "vect_par_constant_high" "")))) | |
2629 (float_extend:VCVTF | |
2630 (vec_duplicate:<VFMLSEL> | |
2631 (vec_select:HF | |
2632 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") | |
2633 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2634 (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2635 "TARGET_FP16FML" | |
2636 { | |
2637 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); | |
2638 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); | |
2639 int new_lane = lane % elts_per_reg; | |
2640 int regdiff = lane / elts_per_reg; | |
2641 operands[5] = GEN_INT (new_lane); | |
2642 /* We re-create operands[3] in the halved VFMLSEL mode | |
2643 because we've calculated the correct half-width subreg to extract | |
2644 the lane from and we want to print *that* subreg instead. */ | |
2645 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); | |
2646 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]"; | |
2647 } | |
2648 [(set_attr "type" "neon_fp_mla_s<q>")] | |
2649 ) | |
2650 | |
2651 (define_insn "vfmsl_lane_high<mode>_intrinsic" | |
2652 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2653 (fma:VCVTF | |
2654 (float_extend:VCVTF | |
2655 (neg:<VFMLSEL> | |
2656 (vec_select:<VFMLSEL> | |
2657 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2658 (match_operand:<VFML> 4 "vect_par_constant_high" "")))) | |
2659 (float_extend:VCVTF | |
2660 (vec_duplicate:<VFMLSEL> | |
2661 (vec_select:HF | |
2662 (match_operand:<VFML> 3 "s_register_operand" "x") | |
2663 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2664 (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2665 "TARGET_FP16FML" | |
2666 { | |
2667 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); | |
2668 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) | |
2669 { | |
2670 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); | |
2671 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]"; | |
2672 } | |
2673 else | |
2674 { | |
2675 operands[5] = GEN_INT (lane); | |
2676 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]"; | |
2677 } | |
2678 } | |
2679 [(set_attr "type" "neon_fp_mla_s<q>")] | |
2680 ) | |
2681 | |
2301 ; Used for intrinsics when flag_unsafe_math_optimizations is false. | 2682 ; Used for intrinsics when flag_unsafe_math_optimizations is false. |
2302 | 2683 |
2303 (define_insn "neon_vmla<mode>_unspec" | 2684 (define_insn "neon_vmla<mode>_unspec" |
2304 [(set (match_operand:VDQW 0 "s_register_operand" "=w") | 2685 [(set (match_operand:VDQW 0 "s_register_operand" "=w") |
2305 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") | 2686 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") |
3120 (match_operand:VCVTF 2 "register_operand")] | 3501 (match_operand:VCVTF 2 "register_operand")] |
3121 "TARGET_NEON" | 3502 "TARGET_NEON" |
3122 "{ | 3503 "{ |
3123 rtx v_bitmask_cast; | 3504 rtx v_bitmask_cast; |
3124 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode); | 3505 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode); |
3125 int i, n_elt = GET_MODE_NUNITS (<MODE>mode); | 3506 rtx c = GEN_INT (0x80000000); |
3126 rtvec v = rtvec_alloc (n_elt); | |
3127 | |
3128 /* Create bitmask for vector select. */ | |
3129 for (i = 0; i < n_elt; ++i) | |
3130 RTVEC_ELT (v, i) = GEN_INT (0x80000000); | |
3131 | 3507 |
3132 emit_move_insn (v_bitmask, | 3508 emit_move_insn (v_bitmask, |
3133 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v)); | 3509 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c)); |
3134 emit_move_insn (operands[0], operands[2]); | 3510 emit_move_insn (operands[0], operands[2]); |
3135 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask, | 3511 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask, |
3136 <VCVTF:V_cmp_result>mode, 0); | 3512 <VCVTF:V_cmp_result>mode, 0); |
3137 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0], | 3513 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0], |
3138 operands[1])); | 3514 operands[1])); |
6317 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg)); | 6693 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg)); |
6318 DONE; | 6694 DONE; |
6319 }) | 6695 }) |
6320 | 6696 |
6321 (define_insn "neon_vabd<mode>_2" | 6697 (define_insn "neon_vabd<mode>_2" |
6322 [(set (match_operand:VDQ 0 "s_register_operand" "=w") | 6698 [(set (match_operand:VF 0 "s_register_operand" "=w") |
6323 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") | 6699 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w") |
6324 (match_operand:VDQ 2 "s_register_operand" "w"))))] | 6700 (match_operand:VF 2 "s_register_operand" "w"))))] |
6325 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" | 6701 "TARGET_NEON && flag_unsafe_math_optimizations" |
6326 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" | 6702 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" |
6327 [(set (attr "type") | 6703 [(set_attr "type" "neon_fp_abd_s<q>")] |
6328 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) | |
6329 (const_string "neon_fp_abd_s<q>") | |
6330 (const_string "neon_abd<q>")))] | |
6331 ) | 6704 ) |
6332 | 6705 |
6333 (define_insn "neon_vabd<mode>_3" | 6706 (define_insn "neon_vabd<mode>_3" |
6334 [(set (match_operand:VDQ 0 "s_register_operand" "=w") | 6707 [(set (match_operand:VF 0 "s_register_operand" "=w") |
6335 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w") | 6708 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w") |
6336 (match_operand:VDQ 2 "s_register_operand" "w")] | 6709 (match_operand:VF 2 "s_register_operand" "w")] |
6337 UNSPEC_VSUB)))] | 6710 UNSPEC_VSUB)))] |
6338 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" | 6711 "TARGET_NEON && flag_unsafe_math_optimizations" |
6339 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" | 6712 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" |
6340 [(set (attr "type") | 6713 [(set_attr "type" "neon_fp_abd_s<q>")] |
6341 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) | |
6342 (const_string "neon_fp_abd_s<q>") | |
6343 (const_string "neon_abd<q>")))] | |
6344 ) | 6714 ) |
6345 | 6715 |
6346 ;; Copy from core-to-neon regs, then extend, not vice-versa | 6716 ;; Copy from core-to-neon regs, then extend, not vice-versa |
6347 | 6717 |
6348 (define_split | 6718 (define_split |