comparison gcc/config/arm/neon.md @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
comparison
equal deleted inserted replaced
111:04ced10e8804 131:84e7813d76e9
1 ;; ARM NEON coprocessor Machine Description 1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2017 Free Software Foundation, Inc. 2 ;; Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery. 3 ;; Written by CodeSourcery.
4 ;; 4 ;;
5 ;; This file is part of GCC. 5 ;; This file is part of GCC.
6 ;; 6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it 7 ;; GCC is free software; you can redistribute it and/or modify it
23 ;; type attribute definitions. 23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd")) 24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25 25
26 (define_insn "*neon_mov<mode>" 26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand" 27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us") 28 "=w,Un,w, w, ?r,?w,?r, ?Us")
29 (match_operand:VDX 1 "general_operand" 29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))] 30 " w,w, Dn,Uni, w, r, Usi,r"))]
31 "TARGET_NEON 31 "TARGET_NEON
32 && (register_operand (operands[0], <MODE>mode) 32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))" 33 || register_operand (operands[1], <MODE>mode))"
34 { 34 {
35 if (which_alternative == 2) 35 if (which_alternative == 2)
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>"; 59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL); 60 default: return output_move_double (operands, true, NULL);
61 } 61 }
62 } 62 }
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\ 63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\ 64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,\
65 neon_load1_2reg, neon_store1_2reg") 65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8") 66 (set_attr "length" "4,4,4,4,4,4,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*") 67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*") 68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")]) 69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,1004,*")])
70 70
71 (define_insn "*neon_mov<mode>" 71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand" 72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us") 73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand" 74 (match_operand:VQXMOV 1 "general_operand"
111 (set_attr "length" "4,8,4,8,8,8,16,8,16") 111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*") 112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*") 113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")]) 114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
115 115
116 /* We define these mov expanders to match the standard mov$a optab to prevent
117 the mid-end from trying to do a subreg for these modes which is the most
118 inefficient way to expand the move. Also big-endian subreg's aren't
119 allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
120 Without these RTL generation patterns the mid-end would attempt to take a
121 sub-reg and may ICE if it can't. */
122
116 (define_expand "movti" 123 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "") 124 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))] 125 (match_operand:TI 1 "general_operand" ""))]
119 "TARGET_NEON" 126 "TARGET_NEON"
120 { 127 {
135 if (!REG_P (operands[0])) 142 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]); 143 operands[1] = force_reg (<MODE>mode, operands[1]);
137 } 144 }
138 }) 145 })
139 146
140 (define_expand "movv4hf" 147 (define_expand "mov<mode>"
141 [(set (match_operand:V4HF 0 "s_register_operand") 148 [(set (match_operand:VH 0 "s_register_operand")
142 (match_operand:V4HF 1 "s_register_operand"))] 149 (match_operand:VH 1 "s_register_operand"))]
143 "TARGET_NEON && TARGET_FP16" 150 "TARGET_NEON"
144 { 151 {
145 /* We need to use force_reg to avoid TARGET_CAN_CHANGE_MODE_CLASS
146 causing an ICE on big-endian because it cannot extract subregs in
147 this case. */
148 if (can_create_pseudo_p ()) 152 if (can_create_pseudo_p ())
149 { 153 {
150 if (!REG_P (operands[0])) 154 if (!REG_P (operands[0]))
151 operands[1] = force_reg (V4HFmode, operands[1]); 155 operands[1] = force_reg (<MODE>mode, operands[1]);
152 }
153 })
154
155 (define_expand "movv8hf"
156 [(set (match_operand:V8HF 0 "")
157 (match_operand:V8HF 1 ""))]
158 "TARGET_NEON && TARGET_FP16"
159 {
160 /* We need to use force_reg to avoid TARGET_CAN_CHANGE_MODE_CLASS
161 causing an ICE on big-endian because it cannot extract subregs in
162 this case. */
163 if (can_create_pseudo_p ())
164 {
165 if (!REG_P (operands[0]))
166 operands[1] = force_reg (V8HFmode, operands[1]);
167 } 156 }
168 }) 157 })
169 158
170 (define_insn "*neon_mov<mode>" 159 (define_insn "*neon_mov<mode>"
171 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w") 160 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
1178 vshl.u64\t%P0, %P1, %P2" 1167 vshl.u64\t%P0, %P1, %P2"
1179 [(set_attr "type" "neon_shift_imm, neon_shift_reg")] 1168 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1180 ) 1169 )
1181 1170
1182 (define_insn_and_split "ashldi3_neon" 1171 (define_insn_and_split "ashldi3_neon"
1183 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r, ?w,w") 1172 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r, ?w,?w")
1184 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w,w") 1173 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w, w")
1185 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm,i"))) 1174 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm, i")))
1186 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X, X,X")) 1175 (clobber (match_scratch:SI 3 "= X, X, &r, X, X, X, X"))
1187 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X, X,X")) 1176 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1188 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w,X")) 1177 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w, X"))
1189 (clobber (reg:CC_C CC_REGNUM))] 1178 (clobber (reg:CC_C CC_REGNUM))]
1190 "TARGET_NEON" 1179 "TARGET_NEON"
1191 "#" 1180 "#"
1192 "TARGET_NEON && reload_completed" 1181 "TARGET_NEON && reload_completed"
1193 [(const_int 0)] 1182 [(const_int 0)]
1219 { 1208 {
1220 /* The shift expanders support either full overlap or no overlap. */ 1209 /* The shift expanders support either full overlap or no overlap. */
1221 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) 1210 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1222 || REGNO (operands[0]) == REGNO (operands[1])); 1211 || REGNO (operands[0]) == REGNO (operands[1]));
1223 1212
1224 if (operands[2] == CONST1_RTX (SImode)) 1213 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1225 /* This clobbers CC. */ 1214 operands[2], operands[3], operands[4]);
1226 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1227 else
1228 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1229 operands[2], operands[3], operands[4]);
1230 } 1215 }
1231 DONE; 1216 DONE;
1232 }" 1217 }"
1233 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") 1218 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1234 (set_attr "opt" "*,*,speed,speed,speed,*,*") 1219 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1278 ) 1263 )
1279 1264
1280 ;; ashrdi3_neon 1265 ;; ashrdi3_neon
1281 ;; lshrdi3_neon 1266 ;; lshrdi3_neon
1282 (define_insn_and_split "<shift>di3_neon" 1267 (define_insn_and_split "<shift>di3_neon"
1283 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r,?w,?w") 1268 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r,?w,?w")
1284 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w") 1269 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w")
1285 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i"))) 1270 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i")))
1286 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X")) 1271 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X"))
1287 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X")) 1272 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1288 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X")) 1273 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X"))
1323 { 1308 {
1324 /* The shift expanders support either full overlap or no overlap. */ 1309 /* The shift expanders support either full overlap or no overlap. */
1325 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) 1310 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1326 || REGNO (operands[0]) == REGNO (operands[1])); 1311 || REGNO (operands[0]) == REGNO (operands[1]));
1327 1312
1328 if (operands[2] == CONST1_RTX (SImode)) 1313 /* This clobbers CC (ASHIFTRT by register only). */
1329 /* This clobbers CC. */ 1314 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1330 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1])); 1315 operands[2], operands[3], operands[4]);
1331 else
1332 /* This clobbers CC (ASHIFTRT by register only). */
1333 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1334 operands[2], operands[3], operands[4]);
1335 } 1316 }
1336 1317
1337 DONE; 1318 DONE;
1338 }" 1319 }"
1339 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") 1320 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
2296 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3], 2277 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2297 operands[1])); 2278 operands[1]));
2298 DONE; 2279 DONE;
2299 }) 2280 })
2300 2281
2282 ;; The expand RTL structure here is not important.
2283 ;; We use the gen_* functions anyway.
2284 ;; We just need something to wrap the iterators around.
2285
2286 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
2287 [(set (match_operand:VCVTF 0 "s_register_operand")
2288 (unspec:VCVTF
2289 [(match_operand:VCVTF 1 "s_register_operand")
2290 (PLUSMINUS:<VFML>
2291 (match_operand:<VFML> 2 "s_register_operand")
2292 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
2293 "TARGET_FP16FML"
2294 {
2295 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2296 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
2297 operands[1],
2298 operands[2],
2299 operands[3],
2300 half, half));
2301 DONE;
2302 })
2303
2304 (define_insn "vfmal_low<mode>_intrinsic"
2305 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2306 (fma:VCVTF
2307 (float_extend:VCVTF
2308 (vec_select:<VFMLSEL>
2309 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2310 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2311 (float_extend:VCVTF
2312 (vec_select:<VFMLSEL>
2313 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2314 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2315 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2316 "TARGET_FP16FML"
2317 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2318 [(set_attr "type" "neon_fp_mla_s<q>")]
2319 )
2320
2321 (define_insn "vfmsl_high<mode>_intrinsic"
2322 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2323 (fma:VCVTF
2324 (float_extend:VCVTF
2325 (neg:<VFMLSEL>
2326 (vec_select:<VFMLSEL>
2327 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2328 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2329 (float_extend:VCVTF
2330 (vec_select:<VFMLSEL>
2331 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2332 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2333 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2334 "TARGET_FP16FML"
2335 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2336 [(set_attr "type" "neon_fp_mla_s<q>")]
2337 )
2338
2339 (define_insn "vfmal_high<mode>_intrinsic"
2340 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2341 (fma:VCVTF
2342 (float_extend:VCVTF
2343 (vec_select:<VFMLSEL>
2344 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2345 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2346 (float_extend:VCVTF
2347 (vec_select:<VFMLSEL>
2348 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2349 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2350 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2351 "TARGET_FP16FML"
2352 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2353 [(set_attr "type" "neon_fp_mla_s<q>")]
2354 )
2355
2356 (define_insn "vfmsl_low<mode>_intrinsic"
2357 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2358 (fma:VCVTF
2359 (float_extend:VCVTF
2360 (neg:<VFMLSEL>
2361 (vec_select:<VFMLSEL>
2362 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2363 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2364 (float_extend:VCVTF
2365 (vec_select:<VFMLSEL>
2366 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2367 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2368 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2369 "TARGET_FP16FML"
2370 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2371 [(set_attr "type" "neon_fp_mla_s<q>")]
2372 )
2373
2374 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
2375 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2376 (unspec:VCVTF
2377 [(match_operand:VCVTF 1 "s_register_operand")
2378 (PLUSMINUS:<VFML>
2379 (match_operand:<VFML> 2 "s_register_operand")
2380 (match_operand:<VFML> 3 "s_register_operand"))
2381 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2382 "TARGET_FP16FML"
2383 {
2384 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
2385 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2386 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
2387 (operands[0], operands[1],
2388 operands[2], operands[3],
2389 half, lane));
2390 DONE;
2391 })
2392
2393 (define_insn "vfmal_lane_low<mode>_intrinsic"
2394 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2395 (fma:VCVTF
2396 (float_extend:VCVTF
2397 (vec_select:<VFMLSEL>
2398 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2399 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2400 (float_extend:VCVTF
2401 (vec_duplicate:<VFMLSEL>
2402 (vec_select:HF
2403 (match_operand:<VFML> 3 "s_register_operand" "x")
2404 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2405 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2406 "TARGET_FP16FML"
2407 {
2408 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2409 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2410 {
2411 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2412 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2413 }
2414 else
2415 {
2416 operands[5] = GEN_INT (lane);
2417 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2418 }
2419 }
2420 [(set_attr "type" "neon_fp_mla_s<q>")]
2421 )
2422
2423 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
2424 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2425 (unspec:VCVTF
2426 [(match_operand:VCVTF 1 "s_register_operand")
2427 (PLUSMINUS:<VFML>
2428 (match_operand:<VFML> 2 "s_register_operand")
2429 (match_operand:<VFMLSEL2> 3 "s_register_operand"))
2430 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2431 "TARGET_FP16FML"
2432 {
2433 rtx lane
2434 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
2435 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2436 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
2437 (operands[0], operands[1], operands[2], operands[3],
2438 half, lane));
2439 DONE;
2440 })
2441
2442 ;; Used to implement the intrinsics:
2443 ;; float32x4_t vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2444 ;; float32x2_t vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2445 ;; Needs a bit of care to get the modes of the different sub-expressions right
2446 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2447 ;; S or D subregister to select the appropriate lane from.
2448
2449 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
2450 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2451 (fma:VCVTF
2452 (float_extend:VCVTF
2453 (vec_select:<VFMLSEL>
2454 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2455 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2456 (float_extend:VCVTF
2457 (vec_duplicate:<VFMLSEL>
2458 (vec_select:HF
2459 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2460 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2461 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2462 "TARGET_FP16FML"
2463 {
2464 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2465 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2466 int new_lane = lane % elts_per_reg;
2467 int regdiff = lane / elts_per_reg;
2468 operands[5] = GEN_INT (new_lane);
2469 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2470 because we want the print_operand code to print the appropriate
2471 S or D register prefix. */
2472 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2473 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2474 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2475 }
2476 [(set_attr "type" "neon_fp_mla_s<q>")]
2477 )
2478
2479 ;; Used to implement the intrinsics:
2480 ;; float32x4_t vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2481 ;; float32x2_t vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2482 ;; Needs a bit of care to get the modes of the different sub-expressions right
2483 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2484 ;; S or D subregister to select the appropriate lane from.
2485
2486 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
2487 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2488 (fma:VCVTF
2489 (float_extend:VCVTF
2490 (vec_select:<VFMLSEL>
2491 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2492 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2493 (float_extend:VCVTF
2494 (vec_duplicate:<VFMLSEL>
2495 (vec_select:HF
2496 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2497 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2498 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2499 "TARGET_FP16FML"
2500 {
2501 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2502 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2503 int new_lane = lane % elts_per_reg;
2504 int regdiff = lane / elts_per_reg;
2505 operands[5] = GEN_INT (new_lane);
2506 /* We re-create operands[3] in the halved VFMLSEL mode
2507 because we've calculated the correct half-width subreg to extract
2508 the lane from and we want to print *that* subreg instead. */
2509 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2510 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2511 }
2512 [(set_attr "type" "neon_fp_mla_s<q>")]
2513 )
2514
2515 (define_insn "vfmal_lane_high<mode>_intrinsic"
2516 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2517 (fma:VCVTF
2518 (float_extend:VCVTF
2519 (vec_select:<VFMLSEL>
2520 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2521 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2522 (float_extend:VCVTF
2523 (vec_duplicate:<VFMLSEL>
2524 (vec_select:HF
2525 (match_operand:<VFML> 3 "s_register_operand" "x")
2526 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2527 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2528 "TARGET_FP16FML"
2529 {
2530 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2531 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2532 {
2533 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2534 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2535 }
2536 else
2537 {
2538 operands[5] = GEN_INT (lane);
2539 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2540 }
2541 }
2542 [(set_attr "type" "neon_fp_mla_s<q>")]
2543 )
2544
2545 (define_insn "vfmsl_lane_low<mode>_intrinsic"
2546 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2547 (fma:VCVTF
2548 (float_extend:VCVTF
2549 (neg:<VFMLSEL>
2550 (vec_select:<VFMLSEL>
2551 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2552 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2553 (float_extend:VCVTF
2554 (vec_duplicate:<VFMLSEL>
2555 (vec_select:HF
2556 (match_operand:<VFML> 3 "s_register_operand" "x")
2557 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2558 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2559 "TARGET_FP16FML"
2560 {
2561 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2562 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2563 {
2564 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2565 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2566 }
2567 else
2568 {
2569 operands[5] = GEN_INT (lane);
2570 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2571 }
2572 }
2573 [(set_attr "type" "neon_fp_mla_s<q>")]
2574 )
2575
2576 ;; Used to implement the intrinsics:
2577 ;; float32x4_t vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2578 ;; float32x2_t vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2579 ;; Needs a bit of care to get the modes of the different sub-expressions right
2580 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2581 ;; S or D subregister to select the appropriate lane from.
2582
2583 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2584 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2585 (fma:VCVTF
2586 (float_extend:VCVTF
2587 (neg:<VFMLSEL>
2588 (vec_select:<VFMLSEL>
2589 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2590 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2591 (float_extend:VCVTF
2592 (vec_duplicate:<VFMLSEL>
2593 (vec_select:HF
2594 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2595 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2596 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2597 "TARGET_FP16FML"
2598 {
2599 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2600 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2601 int new_lane = lane % elts_per_reg;
2602 int regdiff = lane / elts_per_reg;
2603 operands[5] = GEN_INT (new_lane);
2604 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2605 because we want the print_operand code to print the appropriate
2606 S or D register prefix. */
2607 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2608 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2609 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2610 }
2611 [(set_attr "type" "neon_fp_mla_s<q>")]
2612 )
2613
2614 ;; Used to implement the intrinsics:
2615 ;; float32x4_t vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2616 ;; float32x2_t vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2617 ;; Needs a bit of care to get the modes of the different sub-expressions right
2618 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2619 ;; S or D subregister to select the appropriate lane from.
2620
2621 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2622 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2623 (fma:VCVTF
2624 (float_extend:VCVTF
2625 (neg:<VFMLSEL>
2626 (vec_select:<VFMLSEL>
2627 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2628 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2629 (float_extend:VCVTF
2630 (vec_duplicate:<VFMLSEL>
2631 (vec_select:HF
2632 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2633 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2634 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2635 "TARGET_FP16FML"
2636 {
2637 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2638 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2639 int new_lane = lane % elts_per_reg;
2640 int regdiff = lane / elts_per_reg;
2641 operands[5] = GEN_INT (new_lane);
2642 /* We re-create operands[3] in the halved VFMLSEL mode
2643 because we've calculated the correct half-width subreg to extract
2644 the lane from and we want to print *that* subreg instead. */
2645 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2646 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2647 }
2648 [(set_attr "type" "neon_fp_mla_s<q>")]
2649 )
2650
2651 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2652 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2653 (fma:VCVTF
2654 (float_extend:VCVTF
2655 (neg:<VFMLSEL>
2656 (vec_select:<VFMLSEL>
2657 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2658 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2659 (float_extend:VCVTF
2660 (vec_duplicate:<VFMLSEL>
2661 (vec_select:HF
2662 (match_operand:<VFML> 3 "s_register_operand" "x")
2663 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2664 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2665 "TARGET_FP16FML"
2666 {
2667 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2668 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2669 {
2670 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2671 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2672 }
2673 else
2674 {
2675 operands[5] = GEN_INT (lane);
2676 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2677 }
2678 }
2679 [(set_attr "type" "neon_fp_mla_s<q>")]
2680 )
2681
2301 ; Used for intrinsics when flag_unsafe_math_optimizations is false. 2682 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2302 2683
2303 (define_insn "neon_vmla<mode>_unspec" 2684 (define_insn "neon_vmla<mode>_unspec"
2304 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 2685 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2305 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") 2686 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
3120 (match_operand:VCVTF 2 "register_operand")] 3501 (match_operand:VCVTF 2 "register_operand")]
3121 "TARGET_NEON" 3502 "TARGET_NEON"
3122 "{ 3503 "{
3123 rtx v_bitmask_cast; 3504 rtx v_bitmask_cast;
3124 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode); 3505 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3125 int i, n_elt = GET_MODE_NUNITS (<MODE>mode); 3506 rtx c = GEN_INT (0x80000000);
3126 rtvec v = rtvec_alloc (n_elt);
3127
3128 /* Create bitmask for vector select. */
3129 for (i = 0; i < n_elt; ++i)
3130 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
3131 3507
3132 emit_move_insn (v_bitmask, 3508 emit_move_insn (v_bitmask,
3133 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v)); 3509 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3134 emit_move_insn (operands[0], operands[2]); 3510 emit_move_insn (operands[0], operands[2]);
3135 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask, 3511 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3136 <VCVTF:V_cmp_result>mode, 0); 3512 <VCVTF:V_cmp_result>mode, 0);
3137 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0], 3513 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3138 operands[1])); 3514 operands[1]));
6317 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg)); 6693 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6318 DONE; 6694 DONE;
6319 }) 6695 })
6320 6696
6321 (define_insn "neon_vabd<mode>_2" 6697 (define_insn "neon_vabd<mode>_2"
6322 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 6698 [(set (match_operand:VF 0 "s_register_operand" "=w")
6323 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") 6699 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6324 (match_operand:VDQ 2 "s_register_operand" "w"))))] 6700 (match_operand:VF 2 "s_register_operand" "w"))))]
6325 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 6701 "TARGET_NEON && flag_unsafe_math_optimizations"
6326 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" 6702 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6327 [(set (attr "type") 6703 [(set_attr "type" "neon_fp_abd_s<q>")]
6328 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
6329 (const_string "neon_fp_abd_s<q>")
6330 (const_string "neon_abd<q>")))]
6331 ) 6704 )
6332 6705
6333 (define_insn "neon_vabd<mode>_3" 6706 (define_insn "neon_vabd<mode>_3"
6334 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 6707 [(set (match_operand:VF 0 "s_register_operand" "=w")
6335 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w") 6708 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6336 (match_operand:VDQ 2 "s_register_operand" "w")] 6709 (match_operand:VF 2 "s_register_operand" "w")]
6337 UNSPEC_VSUB)))] 6710 UNSPEC_VSUB)))]
6338 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 6711 "TARGET_NEON && flag_unsafe_math_optimizations"
6339 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" 6712 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6340 [(set (attr "type") 6713 [(set_attr "type" "neon_fp_abd_s<q>")]
6341 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
6342 (const_string "neon_fp_abd_s<q>")
6343 (const_string "neon_abd<q>")))]
6344 ) 6714 )
6345 6715
6346 ;; Copy from core-to-neon regs, then extend, not vice-versa 6716 ;; Copy from core-to-neon regs, then extend, not vice-versa
6347 6717
6348 (define_split 6718 (define_split