comparison lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @ 97:b0dd3743370f

LLVM 3.8
author Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date Wed, 14 Oct 2015 19:39:58 +0900
parents d52ff4b80465 afa8332a0e37
children 57be027de0f4
comparison
equal deleted inserted replaced
94:d52ff4b80465 97:b0dd3743370f
20 #include "llvm/Analysis/AliasAnalysis.h" 20 #include "llvm/Analysis/AliasAnalysis.h"
21 #include "llvm/Analysis/BranchProbabilityInfo.h" 21 #include "llvm/Analysis/BranchProbabilityInfo.h"
22 #include "llvm/Analysis/ConstantFolding.h" 22 #include "llvm/Analysis/ConstantFolding.h"
23 #include "llvm/Analysis/TargetLibraryInfo.h" 23 #include "llvm/Analysis/TargetLibraryInfo.h"
24 #include "llvm/Analysis/ValueTracking.h" 24 #include "llvm/Analysis/ValueTracking.h"
25 #include "llvm/CodeGen/Analysis.h" 25 #include "llvm/Analysis/VectorUtils.h"
26 #include "llvm/CodeGen/FastISel.h" 26 #include "llvm/CodeGen/FastISel.h"
27 #include "llvm/CodeGen/FunctionLoweringInfo.h" 27 #include "llvm/CodeGen/FunctionLoweringInfo.h"
28 #include "llvm/CodeGen/GCMetadata.h" 28 #include "llvm/CodeGen/GCMetadata.h"
29 #include "llvm/CodeGen/GCStrategy.h" 29 #include "llvm/CodeGen/GCStrategy.h"
30 #include "llvm/CodeGen/MachineFrameInfo.h" 30 #include "llvm/CodeGen/MachineFrameInfo.h"
33 #include "llvm/CodeGen/MachineJumpTableInfo.h" 33 #include "llvm/CodeGen/MachineJumpTableInfo.h"
34 #include "llvm/CodeGen/MachineModuleInfo.h" 34 #include "llvm/CodeGen/MachineModuleInfo.h"
35 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/MachineRegisterInfo.h"
36 #include "llvm/CodeGen/SelectionDAG.h" 36 #include "llvm/CodeGen/SelectionDAG.h"
37 #include "llvm/CodeGen/StackMaps.h" 37 #include "llvm/CodeGen/StackMaps.h"
38 #include "llvm/CodeGen/WinEHFuncInfo.h"
38 #include "llvm/IR/CallingConv.h" 39 #include "llvm/IR/CallingConv.h"
39 #include "llvm/IR/Constants.h" 40 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DataLayout.h" 41 #include "llvm/IR/DataLayout.h"
41 #include "llvm/IR/DebugInfo.h" 42 #include "llvm/IR/DebugInfo.h"
42 #include "llvm/IR/DerivedTypes.h" 43 #include "llvm/IR/DerivedTypes.h"
61 #include "llvm/Target/TargetLowering.h" 62 #include "llvm/Target/TargetLowering.h"
62 #include "llvm/Target/TargetOptions.h" 63 #include "llvm/Target/TargetOptions.h"
63 #include "llvm/Target/TargetSelectionDAGInfo.h" 64 #include "llvm/Target/TargetSelectionDAGInfo.h"
64 #include "llvm/Target/TargetSubtargetInfo.h" 65 #include "llvm/Target/TargetSubtargetInfo.h"
65 #include <algorithm> 66 #include <algorithm>
67 #include <utility>
66 using namespace llvm; 68 using namespace llvm;
67 69
68 #define DEBUG_TYPE "isel" 70 #define DEBUG_TYPE "isel"
69 71
70 /// LimitFloatPrecision - Generate low-precision inline sequences for 72 /// LimitFloatPrecision - Generate low-precision inline sequences for
76 cl::desc("Generate low-precision inline sequences " 78 cl::desc("Generate low-precision inline sequences "
77 "for some float libcalls"), 79 "for some float libcalls"),
78 cl::location(LimitFloatPrecision), 80 cl::location(LimitFloatPrecision),
79 cl::init(0)); 81 cl::init(0));
80 82
83 static cl::opt<bool>
84 EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden,
85 cl::desc("Enable fast-math-flags for DAG nodes"));
86
81 // Limit the width of DAG chains. This is important in general to prevent 87 // Limit the width of DAG chains. This is important in general to prevent
82 // prevent DAG-based analysis from blowing up. For example, alias analysis and 88 // DAG-based analysis from blowing up. For example, alias analysis and
83 // load clustering may not complete in reasonable time. It is difficult to 89 // load clustering may not complete in reasonable time. It is difficult to
84 // recognize and avoid this situation within each individual analysis, and 90 // recognize and avoid this situation within each individual analysis, and
85 // future analyses are likely to have the same behavior. Limiting DAG width is 91 // future analyses are likely to have the same behavior. Limiting DAG width is
86 // the safe approach, and will be especially important with global DAGs. 92 // the safe approach and will be especially important with global DAGs.
87 // 93 //
88 // MaxParallelChains default is arbitrarily high to avoid affecting 94 // MaxParallelChains default is arbitrarily high to avoid affecting
89 // optimization, but could be lowered to improve compile time. Any ld-ld-st-st 95 // optimization, but could be lowered to improve compile time. Any ld-ld-st-st
90 // sequence over this should have been converted to llvm.memcpy by the 96 // sequence over this should have been converted to llvm.memcpy by the
91 // frontend. It easy to induce this behavior with .ll code such as: 97 // frontend. It easy to induce this behavior with .ll code such as:
140 } else { 146 } else {
141 Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); 147 Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
142 Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); 148 Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
143 } 149 }
144 150
145 if (TLI.isBigEndian()) 151 if (DAG.getDataLayout().isBigEndian())
146 std::swap(Lo, Hi); 152 std::swap(Lo, Hi);
147 153
148 Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); 154 Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
149 155
150 if (RoundParts < NumParts) { 156 if (RoundParts < NumParts) {
154 Hi = getCopyFromParts(DAG, DL, 160 Hi = getCopyFromParts(DAG, DL,
155 Parts + RoundParts, OddParts, PartVT, OddVT, V); 161 Parts + RoundParts, OddParts, PartVT, OddVT, V);
156 162
157 // Combine the round and odd parts. 163 // Combine the round and odd parts.
158 Lo = Val; 164 Lo = Val;
159 if (TLI.isBigEndian()) 165 if (DAG.getDataLayout().isBigEndian())
160 std::swap(Lo, Hi); 166 std::swap(Lo, Hi);
161 EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); 167 EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
162 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); 168 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
163 Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, 169 Hi =
164 DAG.getConstant(Lo.getValueType().getSizeInBits(), 170 DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
165 TLI.getPointerTy())); 171 DAG.getConstant(Lo.getValueType().getSizeInBits(), DL,
172 TLI.getPointerTy(DAG.getDataLayout())));
166 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); 173 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
167 Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); 174 Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
168 } 175 }
169 } else if (PartVT.isFloatingPoint()) { 176 } else if (PartVT.isFloatingPoint()) {
170 // FP split into multiple FP parts (for ppcf128) 177 // FP split into multiple FP parts (for ppcf128)
171 assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 && 178 assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
172 "Unexpected split"); 179 "Unexpected split");
173 SDValue Lo, Hi; 180 SDValue Lo, Hi;
174 Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); 181 Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
175 Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); 182 Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
176 if (TLI.hasBigEndianPartOrdering(ValueVT)) 183 if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
177 std::swap(Lo, Hi); 184 std::swap(Lo, Hi);
178 Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); 185 Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
179 } else { 186 } else {
180 // FP split into integer parts (soft fp) 187 // FP split into integer parts (soft fp)
181 assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && 188 assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
205 } 212 }
206 213
207 if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { 214 if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
208 // FP_ROUND's are always exact here. 215 // FP_ROUND's are always exact here.
209 if (ValueVT.bitsLT(Val.getValueType())) 216 if (ValueVT.bitsLT(Val.getValueType()))
210 return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, 217 return DAG.getNode(
211 DAG.getTargetConstant(1, TLI.getPointerTy())); 218 ISD::FP_ROUND, DL, ValueVT, Val,
219 DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
212 220
213 return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); 221 return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
214 } 222 }
215 223
216 if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) 224 if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
255 TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, 263 TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
256 NumIntermediates, RegisterVT); 264 NumIntermediates, RegisterVT);
257 assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); 265 assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
258 NumParts = NumRegs; // Silence a compiler warning. 266 NumParts = NumRegs; // Silence a compiler warning.
259 assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); 267 assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
260 assert(RegisterVT == Parts[0].getSimpleValueType() && 268 assert(RegisterVT.getSizeInBits() ==
261 "Part type doesn't match part!"); 269 Parts[0].getSimpleValueType().getSizeInBits() &&
270 "Part type sizes don't match!");
262 271
263 // Assemble the parts into intermediate operands. 272 // Assemble the parts into intermediate operands.
264 SmallVector<SDValue, 8> Ops(NumIntermediates); 273 SmallVector<SDValue, 8> Ops(NumIntermediates);
265 if (NumIntermediates == NumParts) { 274 if (NumIntermediates == NumParts) {
266 // If the register was not expanded, truncate or copy the value, 275 // If the register was not expanded, truncate or copy the value,
298 // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the 307 // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
299 // elements we want. 308 // elements we want.
300 if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { 309 if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
301 assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && 310 assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
302 "Cannot narrow, it would be a lossy transformation"); 311 "Cannot narrow, it would be a lossy transformation");
303 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, 312 return DAG.getNode(
304 DAG.getConstant(0, TLI.getVectorIdxTy())); 313 ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
314 DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
305 } 315 }
306 316
307 // Vector/Vector bitcast. 317 // Vector/Vector bitcast.
308 if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) 318 if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
309 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 319 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
310 320
311 assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && 321 assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
312 "Cannot handle this kind of promotion"); 322 "Cannot handle this kind of promotion");
313 // Promoted vector extract 323 // Promoted vector extract
314 bool Smaller = ValueVT.bitsLE(PartEVT); 324 return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
315 return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
316 DL, ValueVT, Val);
317 325
318 } 326 }
319 327
320 // Trivial bitcast if the types are the same size and the destination 328 // Trivial bitcast if the types are the same size and the destination
321 // vector type is legal. 329 // vector type is legal.
329 "non-trivial scalar-to-vector conversion"); 337 "non-trivial scalar-to-vector conversion");
330 return DAG.getUNDEF(ValueVT); 338 return DAG.getUNDEF(ValueVT);
331 } 339 }
332 340
333 if (ValueVT.getVectorNumElements() == 1 && 341 if (ValueVT.getVectorNumElements() == 1 &&
334 ValueVT.getVectorElementType() != PartEVT) { 342 ValueVT.getVectorElementType() != PartEVT)
335 bool Smaller = ValueVT.bitsLE(PartEVT); 343 Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType());
336 Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
337 DL, ValueVT.getScalarType(), Val);
338 }
339 344
340 return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); 345 return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
341 } 346 }
342 347
343 static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc dl, 348 static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc dl,
355 360
356 // Handle the vector case separately. 361 // Handle the vector case separately.
357 if (ValueVT.isVector()) 362 if (ValueVT.isVector())
358 return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); 363 return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V);
359 364
360 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
361 unsigned PartBits = PartVT.getSizeInBits(); 365 unsigned PartBits = PartVT.getSizeInBits();
362 unsigned OrigNumParts = NumParts; 366 unsigned OrigNumParts = NumParts;
363 assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); 367 assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
368 "Copying to an illegal type!");
364 369
365 if (NumParts == 0) 370 if (NumParts == 0)
366 return; 371 return;
367 372
368 assert(!ValueVT.isVector() && "Vector case handled elsewhere"); 373 assert(!ValueVT.isVector() && "Vector case handled elsewhere");
423 "Do not know what to expand to!"); 428 "Do not know what to expand to!");
424 unsigned RoundParts = 1 << Log2_32(NumParts); 429 unsigned RoundParts = 1 << Log2_32(NumParts);
425 unsigned RoundBits = RoundParts * PartBits; 430 unsigned RoundBits = RoundParts * PartBits;
426 unsigned OddParts = NumParts - RoundParts; 431 unsigned OddParts = NumParts - RoundParts;
427 SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, 432 SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
428 DAG.getIntPtrConstant(RoundBits)); 433 DAG.getIntPtrConstant(RoundBits, DL));
429 getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); 434 getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
430 435
431 if (TLI.isBigEndian()) 436 if (DAG.getDataLayout().isBigEndian())
432 // The odd parts were reversed by getCopyToParts - unreverse them. 437 // The odd parts were reversed by getCopyToParts - unreverse them.
433 std::reverse(Parts + RoundParts, Parts + NumParts); 438 std::reverse(Parts + RoundParts, Parts + NumParts);
434 439
435 NumParts = RoundParts; 440 NumParts = RoundParts;
436 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); 441 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
450 EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); 455 EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
451 SDValue &Part0 = Parts[i]; 456 SDValue &Part0 = Parts[i];
452 SDValue &Part1 = Parts[i+StepSize/2]; 457 SDValue &Part1 = Parts[i+StepSize/2];
453 458
454 Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, 459 Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
455 ThisVT, Part0, DAG.getIntPtrConstant(1)); 460 ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
456 Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, 461 Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
457 ThisVT, Part0, DAG.getIntPtrConstant(0)); 462 ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
458 463
459 if (ThisBits == PartBits && ThisVT != PartVT) { 464 if (ThisBits == PartBits && ThisVT != PartVT) {
460 Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); 465 Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
461 Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1); 466 Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
462 } 467 }
463 } 468 }
464 } 469 }
465 470
466 if (TLI.isBigEndian()) 471 if (DAG.getDataLayout().isBigEndian())
467 std::reverse(Parts, Parts + OrigNumParts); 472 std::reverse(Parts, Parts + OrigNumParts);
468 } 473 }
469 474
470 475
471 /// getCopyToPartsVector - Create a series of nodes that contain the specified 476 /// getCopyToPartsVector - Create a series of nodes that contain the specified
490 EVT ElementVT = PartVT.getVectorElementType(); 495 EVT ElementVT = PartVT.getVectorElementType();
491 // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in 496 // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
492 // undef elements. 497 // undef elements.
493 SmallVector<SDValue, 16> Ops; 498 SmallVector<SDValue, 16> Ops;
494 for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) 499 for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
495 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 500 Ops.push_back(DAG.getNode(
496 ElementVT, Val, DAG.getConstant(i, 501 ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val,
497 TLI.getVectorIdxTy()))); 502 DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
498 503
499 for (unsigned i = ValueVT.getVectorNumElements(), 504 for (unsigned i = ValueVT.getVectorNumElements(),
500 e = PartVT.getVectorNumElements(); i != e; ++i) 505 e = PartVT.getVectorNumElements(); i != e; ++i)
501 Ops.push_back(DAG.getUNDEF(ElementVT)); 506 Ops.push_back(DAG.getUNDEF(ElementVT));
502 507
510 PartEVT.getVectorElementType().bitsGE( 515 PartEVT.getVectorElementType().bitsGE(
511 ValueVT.getVectorElementType()) && 516 ValueVT.getVectorElementType()) &&
512 PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { 517 PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
513 518
514 // Promoted vector extract 519 // Promoted vector extract
515 bool Smaller = PartEVT.bitsLE(ValueVT); 520 Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
516 Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
517 DL, PartVT, Val);
518 } else{ 521 } else{
519 // Vector -> scalar conversion. 522 // Vector -> scalar conversion.
520 assert(ValueVT.getVectorNumElements() == 1 && 523 assert(ValueVT.getVectorNumElements() == 1 &&
521 "Only trivial vector-to-scalar conversions should get here!"); 524 "Only trivial vector-to-scalar conversions should get here!");
522 Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 525 Val = DAG.getNode(
523 PartVT, Val, DAG.getConstant(0, TLI.getVectorIdxTy())); 526 ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
524 527 DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
525 bool Smaller = ValueVT.bitsLE(PartVT); 528
526 Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), 529 Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
527 DL, PartVT, Val);
528 } 530 }
529 531
530 Parts[0] = Val; 532 Parts[0] = Val;
531 return; 533 return;
532 } 534 }
546 548
547 // Split the vector into intermediate operands. 549 // Split the vector into intermediate operands.
548 SmallVector<SDValue, 8> Ops(NumIntermediates); 550 SmallVector<SDValue, 8> Ops(NumIntermediates);
549 for (unsigned i = 0; i != NumIntermediates; ++i) { 551 for (unsigned i = 0; i != NumIntermediates; ++i) {
550 if (IntermediateVT.isVector()) 552 if (IntermediateVT.isVector())
551 Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, 553 Ops[i] =
552 IntermediateVT, Val, 554 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
553 DAG.getConstant(i * (NumElements / NumIntermediates), 555 DAG.getConstant(i * (NumElements / NumIntermediates), DL,
554 TLI.getVectorIdxTy())); 556 TLI.getVectorIdxTy(DAG.getDataLayout())));
555 else 557 else
556 Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 558 Ops[i] = DAG.getNode(
557 IntermediateVT, Val, 559 ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
558 DAG.getConstant(i, TLI.getVectorIdxTy())); 560 DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
559 } 561 }
560 562
561 // Split the intermediate operands into legal parts. 563 // Split the intermediate operands into legal parts.
562 if (NumParts == NumIntermediates) { 564 if (NumParts == NumIntermediates) {
563 // If the register was not expanded, promote or copy the value, 565 // If the register was not expanded, promote or copy the value,
574 for (unsigned i = 0; i != NumIntermediates; ++i) 576 for (unsigned i = 0; i != NumIntermediates; ++i)
575 getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V); 577 getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
576 } 578 }
577 } 579 }
578 580
579 namespace { 581 RegsForValue::RegsForValue() {}
580 /// RegsForValue - This struct represents the registers (physical or virtual) 582
581 /// that a particular set of values is assigned, and the type information 583 RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
582 /// about the value. The most common situation is to represent one value at a 584 EVT valuevt)
583 /// time, but struct or array values are handled element-wise as multiple 585 : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
584 /// values. The splitting of aggregates is performed recursively, so that we 586
585 /// never have aggregate-typed registers. The values at this point do not 587 RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
586 /// necessarily have legal types, so each value may require one or more 588 const DataLayout &DL, unsigned Reg, Type *Ty) {
587 /// registers of some legal type. 589 ComputeValueVTs(TLI, DL, Ty, ValueVTs);
588 /// 590
589 struct RegsForValue { 591 for (EVT ValueVT : ValueVTs) {
590 /// ValueVTs - The value types of the values, which may not be legal, and 592 unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT);
591 /// may need be promoted or synthesized from one or more registers. 593 MVT RegisterVT = TLI.getRegisterType(Context, ValueVT);
592 /// 594 for (unsigned i = 0; i != NumRegs; ++i)
593 SmallVector<EVT, 4> ValueVTs; 595 Regs.push_back(Reg + i);
594 596 RegVTs.push_back(RegisterVT);
595 /// RegVTs - The value types of the registers. This is the same size as 597 Reg += NumRegs;
596 /// ValueVTs and it records, for each value, what the type of the assigned 598 }
597 /// register or registers are. (Individual values are never synthesized
598 /// from more than one type of register.)
599 ///
600 /// With virtual registers, the contents of RegVTs is redundant with TLI's
601 /// getRegisterType member function, however when with physical registers
602 /// it is necessary to have a separate record of the types.
603 ///
604 SmallVector<MVT, 4> RegVTs;
605
606 /// Regs - This list holds the registers assigned to the values.
607 /// Each legal or promoted value requires one register, and each
608 /// expanded value requires multiple registers.
609 ///
610 SmallVector<unsigned, 4> Regs;
611
612 RegsForValue() {}
613
614 RegsForValue(const SmallVector<unsigned, 4> &regs,
615 MVT regvt, EVT valuevt)
616 : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
617
618 RegsForValue(LLVMContext &Context, const TargetLowering &tli,
619 unsigned Reg, Type *Ty) {
620 ComputeValueVTs(tli, Ty, ValueVTs);
621
622 for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
623 EVT ValueVT = ValueVTs[Value];
624 unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
625 MVT RegisterVT = tli.getRegisterType(Context, ValueVT);
626 for (unsigned i = 0; i != NumRegs; ++i)
627 Regs.push_back(Reg + i);
628 RegVTs.push_back(RegisterVT);
629 Reg += NumRegs;
630 }
631 }
632
633 /// append - Add the specified values to this one.
634 void append(const RegsForValue &RHS) {
635 ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
636 RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
637 Regs.append(RHS.Regs.begin(), RHS.Regs.end());
638 }
639
640 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
641 /// this value and returns the result as a ValueVTs value. This uses
642 /// Chain/Flag as the input and updates them for the output Chain/Flag.
643 /// If the Flag pointer is NULL, no flag is used.
644 SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
645 SDLoc dl,
646 SDValue &Chain, SDValue *Flag,
647 const Value *V = nullptr) const;
648
649 /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
650 /// specified value into the registers specified by this object. This uses
651 /// Chain/Flag as the input and updates them for the output Chain/Flag.
652 /// If the Flag pointer is NULL, no flag is used.
653 void
654 getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain,
655 SDValue *Flag, const Value *V,
656 ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
657
658 /// AddInlineAsmOperands - Add this value to the specified inlineasm node
659 /// operand list. This adds the code marker, matching input operand index
660 /// (if applicable), and includes the number of values added into it.
661 void AddInlineAsmOperands(unsigned Kind,
662 bool HasMatching, unsigned MatchingIdx,
663 SelectionDAG &DAG,
664 std::vector<SDValue> &Ops) const;
665 };
666 } 599 }
667 600
668 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from 601 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
669 /// this value and returns the result as a ValueVT value. This uses 602 /// this value and returns the result as a ValueVT value. This uses
670 /// Chain/Flag as the input and updates them for the output Chain/Flag. 603 /// Chain/Flag as the input and updates them for the output Chain/Flag.
719 652
720 if (NumZeroBits == RegSize) { 653 if (NumZeroBits == RegSize) {
721 // The current value is a zero. 654 // The current value is a zero.
722 // Explicitly express that as it would be easier for 655 // Explicitly express that as it would be easier for
723 // optimizations to kick in. 656 // optimizations to kick in.
724 Parts[i] = DAG.getConstant(0, RegisterVT); 657 Parts[i] = DAG.getConstant(0, dl, RegisterVT);
725 continue; 658 continue;
726 } 659 }
727 660
728 // FIXME: We capture more information than the dag can represent. For 661 // FIXME: We capture more information than the dag can represent. For
729 // now, just use the tightest assertzext/assertsext possible. 662 // now, just use the tightest assertzext/assertsext possible.
821 754
822 /// AddInlineAsmOperands - Add this value to the specified inlineasm node 755 /// AddInlineAsmOperands - Add this value to the specified inlineasm node
823 /// operand list. This adds the code marker and includes the number of 756 /// operand list. This adds the code marker and includes the number of
824 /// values added into it. 757 /// values added into it.
825 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, 758 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
826 unsigned MatchingIdx, 759 unsigned MatchingIdx, SDLoc dl,
827 SelectionDAG &DAG, 760 SelectionDAG &DAG,
828 std::vector<SDValue> &Ops) const { 761 std::vector<SDValue> &Ops) const {
829 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 762 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
830 763
831 unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); 764 unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
841 const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); 774 const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
842 const TargetRegisterClass *RC = MRI.getRegClass(Regs.front()); 775 const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
843 Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); 776 Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
844 } 777 }
845 778
846 SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); 779 SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
847 Ops.push_back(Res); 780 Ops.push_back(Res);
848 781
849 unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); 782 unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
850 for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { 783 for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
851 unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); 784 unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
856 Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); 789 Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
857 790
858 if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { 791 if (TheReg == SP && Code == InlineAsm::Kind_Clobber) {
859 // If we clobbered the stack pointer, MFI should know about it. 792 // If we clobbered the stack pointer, MFI should know about it.
860 assert(DAG.getMachineFunction().getFrameInfo()-> 793 assert(DAG.getMachineFunction().getFrameInfo()->
861 hasInlineAsmWithSPAdjust()); 794 hasOpaqueSPAdjustment());
862 } 795 }
863 } 796 }
864 } 797 }
865 } 798 }
866 799
867 void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, 800 void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
868 const TargetLibraryInfo *li) { 801 const TargetLibraryInfo *li) {
869 AA = &aa; 802 AA = &aa;
870 GFI = gfi; 803 GFI = gfi;
871 LibInfo = li; 804 LibInfo = li;
872 DL = DAG.getTarget().getDataLayout(); 805 DL = &DAG.getDataLayout();
873 Context = DAG.getContext(); 806 Context = DAG.getContext();
874 LPadToCallSiteMap.clear(); 807 LPadToCallSiteMap.clear();
875 } 808 }
876 809
877 /// clear - Clear out the current SelectionDAG and the associated 810 /// clear - Clear out the current SelectionDAG and the associated
995 DanglingDebugInfo &DDI = DanglingDebugInfoMap[V]; 928 DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
996 if (DDI.getDI()) { 929 if (DDI.getDI()) {
997 const DbgValueInst *DI = DDI.getDI(); 930 const DbgValueInst *DI = DDI.getDI();
998 DebugLoc dl = DDI.getdl(); 931 DebugLoc dl = DDI.getdl();
999 unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); 932 unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
1000 MDNode *Variable = DI->getVariable(); 933 DILocalVariable *Variable = DI->getVariable();
1001 MDNode *Expr = DI->getExpression(); 934 DIExpression *Expr = DI->getExpression();
935 assert(Variable->isValidLocationForIntrinsic(dl) &&
936 "Expected inlined-at fields to agree");
1002 uint64_t Offset = DI->getOffset(); 937 uint64_t Offset = DI->getOffset();
1003 // A dbg.value for an alloca is always indirect. 938 // A dbg.value for an alloca is always indirect.
1004 bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; 939 bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
1005 SDDbgValue *SDV; 940 SDDbgValue *SDV;
1006 if (Val.getNode()) { 941 if (Val.getNode()) {
1007 if (!EmitFuncArgumentDbgValue(V, Variable, Expr, Offset, IsIndirect, 942 if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, IsIndirect,
1008 Val)) { 943 Val)) {
1009 SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(), 944 SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(),
1010 IsIndirect, Offset, dl, DbgSDNodeOrder); 945 IsIndirect, Offset, dl, DbgSDNodeOrder);
1011 DAG.AddDbgValue(SDV, Val.getNode(), false); 946 DAG.AddDbgValue(SDV, Val.getNode(), false);
1012 } 947 }
1014 DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); 949 DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
1015 DanglingDebugInfoMap[V] = DanglingDebugInfo(); 950 DanglingDebugInfoMap[V] = DanglingDebugInfo();
1016 } 951 }
1017 } 952 }
1018 953
954 /// getCopyFromRegs - If there was virtual register allocated for the value V
955 /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
956 SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
957 DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
958 SDValue Result;
959
960 if (It != FuncInfo.ValueMap.end()) {
961 unsigned InReg = It->second;
962 RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
963 DAG.getDataLayout(), InReg, Ty);
964 SDValue Chain = DAG.getEntryNode();
965 Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
966 resolveDanglingDebugInfo(V, Result);
967 }
968
969 return Result;
970 }
971
1019 /// getValue - Return an SDValue for the given Value. 972 /// getValue - Return an SDValue for the given Value.
1020 SDValue SelectionDAGBuilder::getValue(const Value *V) { 973 SDValue SelectionDAGBuilder::getValue(const Value *V) {
1021 // If we already have an SDValue for this value, use it. It's important 974 // If we already have an SDValue for this value, use it. It's important
1022 // to do this first, so that we don't create a CopyFromReg if we already 975 // to do this first, so that we don't create a CopyFromReg if we already
1023 // have a regular SDValue. 976 // have a regular SDValue.
1024 SDValue &N = NodeMap[V]; 977 SDValue &N = NodeMap[V];
1025 if (N.getNode()) return N; 978 if (N.getNode()) return N;
1026 979
1027 // If there's a virtual register allocated and initialized for this 980 // If there's a virtual register allocated and initialized for this
1028 // value, use it. 981 // value, use it.
1029 DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); 982 SDValue copyFromReg = getCopyFromRegs(V, V->getType());
1030 if (It != FuncInfo.ValueMap.end()) { 983 if (copyFromReg.getNode()) {
1031 unsigned InReg = It->second; 984 return copyFromReg;
1032 RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg,
1033 V->getType());
1034 SDValue Chain = DAG.getEntryNode();
1035 N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
1036 resolveDanglingDebugInfo(V, N);
1037 return N;
1038 } 985 }
1039 986
1040 // Otherwise create a new SDValue and remember it. 987 // Otherwise create a new SDValue and remember it.
1041 SDValue Val = getValueImpl(V); 988 SDValue Val = getValueImpl(V);
1042 NodeMap[V] = Val; 989 NodeMap[V] = Val;
1043 resolveDanglingDebugInfo(V, Val); 990 resolveDanglingDebugInfo(V, Val);
1044 return Val; 991 return Val;
1045 } 992 }
1046 993
994 // Return true if SDValue exists for the given Value
995 bool SelectionDAGBuilder::findValue(const Value *V) const {
996 return (NodeMap.find(V) != NodeMap.end()) ||
997 (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end());
998 }
999
1047 /// getNonRegisterValue - Return an SDValue for the given Value, but 1000 /// getNonRegisterValue - Return an SDValue for the given Value, but
1048 /// don't look in FuncInfo.ValueMap for a virtual register. 1001 /// don't look in FuncInfo.ValueMap for a virtual register.
1049 SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { 1002 SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
1050 // If we already have an SDValue for this value, use it. 1003 // If we already have an SDValue for this value, use it.
1051 SDValue &N = NodeMap[V]; 1004 SDValue &N = NodeMap[V];
1052 if (N.getNode()) return N; 1005 if (N.getNode()) {
1006 if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) {
1007 // Remove the debug location from the node as the node is about to be used
1008 // in a location which may differ from the original debug location. This
1009 // is relevant to Constant and ConstantFP nodes because they can appear
1010 // as constant expressions inside PHI nodes.
1011 N->setDebugLoc(DebugLoc());
1012 }
1013 return N;
1014 }
1053 1015
1054 // Otherwise create a new SDValue and remember it. 1016 // Otherwise create a new SDValue and remember it.
1055 SDValue Val = getValueImpl(V); 1017 SDValue Val = getValueImpl(V);
1056 NodeMap[V] = Val; 1018 NodeMap[V] = Val;
1057 resolveDanglingDebugInfo(V, Val); 1019 resolveDanglingDebugInfo(V, Val);
1062 /// Create an SDValue for the given value. 1024 /// Create an SDValue for the given value.
1063 SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { 1025 SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
1064 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1026 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1065 1027
1066 if (const Constant *C = dyn_cast<Constant>(V)) { 1028 if (const Constant *C = dyn_cast<Constant>(V)) {
1067 EVT VT = TLI.getValueType(V->getType(), true); 1029 EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
1068 1030
1069 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) 1031 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
1070 return DAG.getConstant(*CI, VT); 1032 return DAG.getConstant(*CI, getCurSDLoc(), VT);
1071 1033
1072 if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 1034 if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
1073 return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); 1035 return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
1074 1036
1075 if (isa<ConstantPointerNull>(C)) { 1037 if (isa<ConstantPointerNull>(C)) {
1076 unsigned AS = V->getType()->getPointerAddressSpace(); 1038 unsigned AS = V->getType()->getPointerAddressSpace();
1077 return DAG.getConstant(0, TLI.getPointerTy(AS)); 1039 return DAG.getConstant(0, getCurSDLoc(),
1040 TLI.getPointerTy(DAG.getDataLayout(), AS));
1078 } 1041 }
1079 1042
1080 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 1043 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
1081 return DAG.getConstantFP(*CFP, VT); 1044 return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
1082 1045
1083 if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) 1046 if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
1084 return DAG.getUNDEF(VT); 1047 return DAG.getUNDEF(VT);
1085 1048
1086 if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { 1049 if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
1126 if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { 1089 if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
1127 assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && 1090 assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
1128 "Unknown struct or array constant!"); 1091 "Unknown struct or array constant!");
1129 1092
1130 SmallVector<EVT, 4> ValueVTs; 1093 SmallVector<EVT, 4> ValueVTs;
1131 ComputeValueVTs(TLI, C->getType(), ValueVTs); 1094 ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
1132 unsigned NumElts = ValueVTs.size(); 1095 unsigned NumElts = ValueVTs.size();
1133 if (NumElts == 0) 1096 if (NumElts == 0)
1134 return SDValue(); // empty struct 1097 return SDValue(); // empty struct
1135 SmallVector<SDValue, 4> Constants(NumElts); 1098 SmallVector<SDValue, 4> Constants(NumElts);
1136 for (unsigned i = 0; i != NumElts; ++i) { 1099 for (unsigned i = 0; i != NumElts; ++i) {
1137 EVT EltVT = ValueVTs[i]; 1100 EVT EltVT = ValueVTs[i];
1138 if (isa<UndefValue>(C)) 1101 if (isa<UndefValue>(C))
1139 Constants[i] = DAG.getUNDEF(EltVT); 1102 Constants[i] = DAG.getUNDEF(EltVT);
1140 else if (EltVT.isFloatingPoint()) 1103 else if (EltVT.isFloatingPoint())
1141 Constants[i] = DAG.getConstantFP(0, EltVT); 1104 Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1142 else 1105 else
1143 Constants[i] = DAG.getConstant(0, EltVT); 1106 Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
1144 } 1107 }
1145 1108
1146 return DAG.getMergeValues(Constants, getCurSDLoc()); 1109 return DAG.getMergeValues(Constants, getCurSDLoc());
1147 } 1110 }
1148 1111
1158 if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) { 1121 if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
1159 for (unsigned i = 0; i != NumElements; ++i) 1122 for (unsigned i = 0; i != NumElements; ++i)
1160 Ops.push_back(getValue(CV->getOperand(i))); 1123 Ops.push_back(getValue(CV->getOperand(i)));
1161 } else { 1124 } else {
1162 assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); 1125 assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
1163 EVT EltVT = TLI.getValueType(VecTy->getElementType()); 1126 EVT EltVT =
1127 TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
1164 1128
1165 SDValue Op; 1129 SDValue Op;
1166 if (EltVT.isFloatingPoint()) 1130 if (EltVT.isFloatingPoint())
1167 Op = DAG.getConstantFP(0, EltVT); 1131 Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1168 else 1132 else
1169 Op = DAG.getConstant(0, EltVT); 1133 Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
1170 Ops.assign(NumElements, Op); 1134 Ops.assign(NumElements, Op);
1171 } 1135 }
1172 1136
1173 // Create a BUILD_VECTOR node. 1137 // Create a BUILD_VECTOR node.
1174 return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops); 1138 return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops);
1178 // computation. 1142 // computation.
1179 if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { 1143 if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
1180 DenseMap<const AllocaInst*, int>::iterator SI = 1144 DenseMap<const AllocaInst*, int>::iterator SI =
1181 FuncInfo.StaticAllocaMap.find(AI); 1145 FuncInfo.StaticAllocaMap.find(AI);
1182 if (SI != FuncInfo.StaticAllocaMap.end()) 1146 if (SI != FuncInfo.StaticAllocaMap.end())
1183 return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); 1147 return DAG.getFrameIndex(SI->second,
1148 TLI.getPointerTy(DAG.getDataLayout()));
1184 } 1149 }
1185 1150
1186 // If this is an instruction which fast-isel has deferred, select it now. 1151 // If this is an instruction which fast-isel has deferred, select it now.
1187 if (const Instruction *Inst = dyn_cast<Instruction>(V)) { 1152 if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
1188 unsigned InReg = FuncInfo.InitializeRegForValue(Inst); 1153 unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
1189 RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); 1154 RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
1155 Inst->getType());
1190 SDValue Chain = DAG.getEntryNode(); 1156 SDValue Chain = DAG.getEntryNode();
1191 return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); 1157 return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
1192 } 1158 }
1193 1159
1194 llvm_unreachable("Can't get register for value!"); 1160 llvm_unreachable("Can't get register for value!");
1195 } 1161 }
1196 1162
1163 void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
1164 auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1165 bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
1166 bool IsSEH = isAsynchronousEHPersonality(Pers);
1167 bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
1168 MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
1169 // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
1170 if (IsMSVCCXX || IsCoreCLR)
1171 CatchPadMBB->setIsEHFuncletEntry();
1172
1173 MachineBasicBlock *NormalDestMBB = FuncInfo.MBBMap[I.getNormalDest()];
1174
1175 // Update machine-CFG edge.
1176 FuncInfo.MBB->addSuccessor(NormalDestMBB);
1177
1178 // CatchPads in SEH are not funclets, they are merely markers which indicate
1179 // where to insert register restoration code.
1180 if (IsSEH) {
1181 DAG.setRoot(DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
1182 getControlRoot(), DAG.getBasicBlock(NormalDestMBB),
1183 DAG.getBasicBlock(FuncInfo.MF->begin())));
1184 return;
1185 }
1186
1187 // If this is not a fall-through branch or optimizations are switched off,
1188 // emit the branch.
1189 if (NormalDestMBB != NextBlock(CatchPadMBB) ||
1190 TM.getOptLevel() == CodeGenOpt::None)
1191 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
1192 getControlRoot(),
1193 DAG.getBasicBlock(NormalDestMBB)));
1194 }
1195
1196 void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
1197 // Update machine-CFG edge.
1198 MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
1199 FuncInfo.MBB->addSuccessor(TargetMBB);
1200
1201 auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1202 bool IsSEH = isAsynchronousEHPersonality(Pers);
1203 if (IsSEH) {
1204 // If this is not a fall-through branch or optimizations are switched off,
1205 // emit the branch.
1206 if (TargetMBB != NextBlock(FuncInfo.MBB) ||
1207 TM.getOptLevel() == CodeGenOpt::None)
1208 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
1209 getControlRoot(), DAG.getBasicBlock(TargetMBB)));
1210 return;
1211 }
1212
1213 // Figure out the funclet membership for the catchret's successor.
1214 // This will be used by the FuncletLayout pass to determine how to order the
1215 // BB's.
1216 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
1217 WinEHFuncInfo &EHInfo =
1218 MMI.getWinEHFuncInfo(DAG.getMachineFunction().getFunction());
1219 const BasicBlock *SuccessorColor = EHInfo.CatchRetSuccessorColorMap[&I];
1220 assert(SuccessorColor && "No parent funclet for catchret!");
1221 MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
1222 assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
1223
1224 // Create the terminator node.
1225 SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
1226 getControlRoot(), DAG.getBasicBlock(TargetMBB),
1227 DAG.getBasicBlock(SuccessorColorMBB));
1228 DAG.setRoot(Ret);
1229 }
1230
1231 void SelectionDAGBuilder::visitCatchEndPad(const CatchEndPadInst &I) {
1232 llvm_unreachable("should never codegen catchendpads");
1233 }
1234
1235 void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
1236 // Don't emit any special code for the cleanuppad instruction. It just marks
1237 // the start of a funclet.
1238 FuncInfo.MBB->setIsEHFuncletEntry();
1239 FuncInfo.MBB->setIsCleanupFuncletEntry();
1240 }
1241
1242 /// When an invoke or a cleanupret unwinds to the next EH pad, there are
1243 /// many places it could ultimately go. In the IR, we have a single unwind
1244 /// destination, but in the machine CFG, we enumerate all the possible blocks.
1245 /// This function skips over imaginary basic blocks that hold catchpad,
1246 /// terminatepad, or catchendpad instructions, and finds all the "real" machine
1247 /// basic block destinations. As those destinations may not be successors of
1248 /// EHPadBB, here we also calculate the edge weight to those destinations. The
1249 /// passed-in Weight is the edge weight to EHPadBB.
1250 static void findUnwindDestinations(
1251 FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, uint32_t Weight,
1252 SmallVectorImpl<std::pair<MachineBasicBlock *, uint32_t>> &UnwindDests) {
1253 EHPersonality Personality =
1254 classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1255 bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
1256 bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
1257
1258 while (EHPadBB) {
1259 const Instruction *Pad = EHPadBB->getFirstNonPHI();
1260 BasicBlock *NewEHPadBB = nullptr;
1261 if (isa<LandingPadInst>(Pad)) {
1262 // Stop on landingpads. They are not funclets.
1263 UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Weight);
1264 break;
1265 } else if (isa<CleanupPadInst>(Pad)) {
1266 // Stop on cleanup pads. Cleanups are always funclet entries for all known
1267 // personalities.
1268 UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Weight);
1269 UnwindDests.back().first->setIsEHFuncletEntry();
1270 break;
1271 } else if (const auto *CPI = dyn_cast<CatchPadInst>(Pad)) {
1272 // Add the catchpad handler to the possible destinations.
1273 UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Weight);
1274 // In MSVC C++, catchblocks are funclets and need prologues.
1275 if (IsMSVCCXX || IsCoreCLR)
1276 UnwindDests.back().first->setIsEHFuncletEntry();
1277 NewEHPadBB = CPI->getUnwindDest();
1278 } else if (const auto *CEPI = dyn_cast<CatchEndPadInst>(Pad))
1279 NewEHPadBB = CEPI->getUnwindDest();
1280 else if (const auto *CEPI = dyn_cast<CleanupEndPadInst>(Pad))
1281 NewEHPadBB = CEPI->getUnwindDest();
1282 else
1283 continue;
1284
1285 BranchProbabilityInfo *BPI = FuncInfo.BPI;
1286 if (BPI && NewEHPadBB) {
1287 // When BPI is available, the calculated weight cannot be zero as zero
1288 // will be turned to a default weight in MachineBlockFrequencyInfo.
1289 Weight = std::max<uint32_t>(
1290 BPI->getEdgeProbability(EHPadBB, NewEHPadBB).scale(Weight), 1);
1291 }
1292 EHPadBB = NewEHPadBB;
1293 }
1294 }
1295
1296 void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
1297 // Update successor info.
1298 SmallVector<std::pair<MachineBasicBlock *, uint32_t>, 1> UnwindDests;
1299 auto UnwindDest = I.getUnwindDest();
1300 BranchProbabilityInfo *BPI = FuncInfo.BPI;
1301 uint32_t UnwindDestWeight =
1302 BPI ? BPI->getEdgeWeight(FuncInfo.MBB->getBasicBlock(), UnwindDest) : 0;
1303 findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestWeight, UnwindDests);
1304 for (auto &UnwindDest : UnwindDests) {
1305 UnwindDest.first->setIsEHPad();
1306 addSuccessorWithWeight(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
1307 }
1308
1309 // Create the terminator node.
1310 SDValue Ret =
1311 DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
1312 DAG.setRoot(Ret);
1313 }
1314
1315 void SelectionDAGBuilder::visitCleanupEndPad(const CleanupEndPadInst &I) {
1316 report_fatal_error("visitCleanupEndPad not yet implemented!");
1317 }
1318
1319 void SelectionDAGBuilder::visitTerminatePad(const TerminatePadInst &TPI) {
1320 report_fatal_error("visitTerminatePad not yet implemented!");
1321 }
1322
1197 void SelectionDAGBuilder::visitRet(const ReturnInst &I) { 1323 void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
1198 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1324 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1325 auto &DL = DAG.getDataLayout();
1199 SDValue Chain = getControlRoot(); 1326 SDValue Chain = getControlRoot();
1200 SmallVector<ISD::OutputArg, 8> Outs; 1327 SmallVector<ISD::OutputArg, 8> Outs;
1201 SmallVector<SDValue, 8> OutVals; 1328 SmallVector<SDValue, 8> OutVals;
1202 1329
1203 if (!FuncInfo.CanLowerReturn) { 1330 if (!FuncInfo.CanLowerReturn) {
1206 1333
1207 // Emit a store of the return value through the virtual register. 1334 // Emit a store of the return value through the virtual register.
1208 // Leave Outs empty so that LowerReturn won't try to load return 1335 // Leave Outs empty so that LowerReturn won't try to load return
1209 // registers the usual way. 1336 // registers the usual way.
1210 SmallVector<EVT, 1> PtrValueVTs; 1337 SmallVector<EVT, 1> PtrValueVTs;
1211 ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), 1338 ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()),
1212 PtrValueVTs); 1339 PtrValueVTs);
1213 1340
1214 SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); 1341 SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
1215 SDValue RetOp = getValue(I.getOperand(0)); 1342 SDValue RetOp = getValue(I.getOperand(0));
1216 1343
1217 SmallVector<EVT, 4> ValueVTs; 1344 SmallVector<EVT, 4> ValueVTs;
1218 SmallVector<uint64_t, 4> Offsets; 1345 SmallVector<uint64_t, 4> Offsets;
1219 ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); 1346 ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
1220 unsigned NumValues = ValueVTs.size(); 1347 unsigned NumValues = ValueVTs.size();
1221 1348
1222 SmallVector<SDValue, 4> Chains(NumValues); 1349 SmallVector<SDValue, 4> Chains(NumValues);
1223 for (unsigned i = 0; i != NumValues; ++i) { 1350 for (unsigned i = 0; i != NumValues; ++i) {
1224 SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), 1351 SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(),
1225 RetPtr.getValueType(), RetPtr, 1352 RetPtr.getValueType(), RetPtr,
1226 DAG.getIntPtrConstant(Offsets[i])); 1353 DAG.getIntPtrConstant(Offsets[i],
1354 getCurSDLoc()));
1227 Chains[i] = 1355 Chains[i] =
1228 DAG.getStore(Chain, getCurSDLoc(), 1356 DAG.getStore(Chain, getCurSDLoc(),
1229 SDValue(RetOp.getNode(), RetOp.getResNo() + i), 1357 SDValue(RetOp.getNode(), RetOp.getResNo() + i),
1230 // FIXME: better loc info would be nice. 1358 // FIXME: better loc info would be nice.
1231 Add, MachinePointerInfo(), false, false, 0); 1359 Add, MachinePointerInfo(), false, false, 0);
1233 1361
1234 Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), 1362 Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
1235 MVT::Other, Chains); 1363 MVT::Other, Chains);
1236 } else if (I.getNumOperands() != 0) { 1364 } else if (I.getNumOperands() != 0) {
1237 SmallVector<EVT, 4> ValueVTs; 1365 SmallVector<EVT, 4> ValueVTs;
1238 ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs); 1366 ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
1239 unsigned NumValues = ValueVTs.size(); 1367 unsigned NumValues = ValueVTs.size();
1240 if (NumValues) { 1368 if (NumValues) {
1241 SDValue RetOp = getValue(I.getOperand(0)); 1369 SDValue RetOp = getValue(I.getOperand(0));
1242 1370
1243 const Function *F = I.getParent()->getParent(); 1371 const Function *F = I.getParent()->getParent();
1407 (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && 1535 (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
1408 isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { 1536 isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
1409 ISD::CondCode Condition; 1537 ISD::CondCode Condition;
1410 if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { 1538 if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
1411 Condition = getICmpCondCode(IC->getPredicate()); 1539 Condition = getICmpCondCode(IC->getPredicate());
1412 } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { 1540 } else {
1541 const FCmpInst *FC = cast<FCmpInst>(Cond);
1413 Condition = getFCmpCondCode(FC->getPredicate()); 1542 Condition = getFCmpCondCode(FC->getPredicate());
1414 if (TM.Options.NoNaNsFPMath) 1543 if (TM.Options.NoNaNsFPMath)
1415 Condition = getFCmpCodeWithoutNaN(Condition); 1544 Condition = getFCmpCodeWithoutNaN(Condition);
1416 } else {
1417 (void)Condition; // silence warning.
1418 llvm_unreachable("Unknown compare instruction");
1419 } 1545 }
1420 1546
1421 CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, 1547 CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
1422 TBB, FBB, CurBB, TWeight, FWeight); 1548 TBB, FBB, CurBB, TWeight, FWeight);
1423 SwitchCases.push_back(CB); 1549 SwitchCases.push_back(CB);
1443 void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, 1569 void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
1444 MachineBasicBlock *TBB, 1570 MachineBasicBlock *TBB,
1445 MachineBasicBlock *FBB, 1571 MachineBasicBlock *FBB,
1446 MachineBasicBlock *CurBB, 1572 MachineBasicBlock *CurBB,
1447 MachineBasicBlock *SwitchBB, 1573 MachineBasicBlock *SwitchBB,
1448 unsigned Opc, uint32_t TWeight, 1574 Instruction::BinaryOps Opc,
1575 uint32_t TWeight,
1449 uint32_t FWeight) { 1576 uint32_t FWeight) {
1450 // If this node is not part of the or/and tree, emit it as a branch. 1577 // If this node is not part of the or/and tree, emit it as a branch.
1451 const Instruction *BOp = dyn_cast<Instruction>(Cond); 1578 const Instruction *BOp = dyn_cast<Instruction>(Cond);
1452 if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || 1579 if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
1453 (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || 1580 (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
1476 // 1603 //
1477 1604
1478 // We have flexibility in setting Prob for BB1 and Prob for TmpBB. 1605 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1479 // The requirement is that 1606 // The requirement is that
1480 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) 1607 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
1481 // = TrueProb for orignal BB. 1608 // = TrueProb for original BB.
1482 // Assuming the orignal weights are A and B, one choice is to set BB1's 1609 // Assuming the original weights are A and B, one choice is to set BB1's
1483 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice 1610 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
1484 // assumes that 1611 // assumes that
1485 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. 1612 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
1486 // Another choice is to assume TrueProb for BB1 equals to TrueProb for 1613 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
1487 // TmpBB, but the math is more complicated. 1614 // TmpBB, but the math is more complicated.
1512 // This requires creation of TmpBB after CurBB. 1639 // This requires creation of TmpBB after CurBB.
1513 1640
1514 // We have flexibility in setting Prob for BB1 and Prob for TmpBB. 1641 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1515 // The requirement is that 1642 // The requirement is that
1516 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) 1643 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
1517 // = FalseProb for orignal BB. 1644 // = FalseProb for original BB.
1518 // Assuming the orignal weights are A and B, one choice is to set BB1's 1645 // Assuming the original weights are A and B, one choice is to set BB1's
1519 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice 1646 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
1520 // assumes that 1647 // assumes that
1521 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. 1648 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
1522 1649
1523 uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight; 1650 uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight;
1571 MachineBasicBlock *BrMBB = FuncInfo.MBB; 1698 MachineBasicBlock *BrMBB = FuncInfo.MBB;
1572 1699
1573 // Update machine-CFG edges. 1700 // Update machine-CFG edges.
1574 MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; 1701 MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
1575 1702
1576 // Figure out which block is immediately after the current one.
1577 MachineBasicBlock *NextBlock = nullptr;
1578 MachineFunction::iterator BBI = BrMBB;
1579 if (++BBI != FuncInfo.MF->end())
1580 NextBlock = BBI;
1581
1582 if (I.isUnconditional()) { 1703 if (I.isUnconditional()) {
1583 // Update machine-CFG edges. 1704 // Update machine-CFG edges.
1584 BrMBB->addSuccessor(Succ0MBB); 1705 BrMBB->addSuccessor(Succ0MBB);
1585 1706
1586 // If this is not a fall-through branch or optimizations are switched off, 1707 // If this is not a fall-through branch or optimizations are switched off,
1587 // emit the branch. 1708 // emit the branch.
1588 if (Succ0MBB != NextBlock || TM.getOptLevel() == CodeGenOpt::None) 1709 if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
1589 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), 1710 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
1590 MVT::Other, getControlRoot(), 1711 MVT::Other, getControlRoot(),
1591 DAG.getBasicBlock(Succ0MBB))); 1712 DAG.getBasicBlock(Succ0MBB)));
1592 1713
1593 return; 1714 return;
1613 // je foo 1734 // je foo
1614 // cmp D, E 1735 // cmp D, E
1615 // jle foo 1736 // jle foo
1616 // 1737 //
1617 if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { 1738 if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
1618 if (!DAG.getTargetLoweringInfo().isJumpExpensive() && 1739 Instruction::BinaryOps Opcode = BOp->getOpcode();
1619 BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || 1740 if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
1620 BOp->getOpcode() == Instruction::Or)) { 1741 !I.getMetadata(LLVMContext::MD_unpredictable) &&
1742 (Opcode == Instruction::And || Opcode == Instruction::Or)) {
1621 FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, 1743 FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
1622 BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB), 1744 Opcode, getEdgeWeight(BrMBB, Succ0MBB),
1623 getEdgeWeight(BrMBB, Succ1MBB)); 1745 getEdgeWeight(BrMBB, Succ1MBB));
1624 // If the compares in later blocks need to use values not currently 1746 // If the compares in later blocks need to use values not currently
1625 // exported from this block, export them now. This block should always 1747 // exported from this block, export them now. This block should always
1626 // be the first entry. 1748 // be the first entry.
1627 assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); 1749 assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
1672 if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && 1794 if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
1673 CB.CC == ISD::SETEQ) 1795 CB.CC == ISD::SETEQ)
1674 Cond = CondLHS; 1796 Cond = CondLHS;
1675 else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && 1797 else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
1676 CB.CC == ISD::SETEQ) { 1798 CB.CC == ISD::SETEQ) {
1677 SDValue True = DAG.getConstant(1, CondLHS.getValueType()); 1799 SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
1678 Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); 1800 Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
1679 } else 1801 } else
1680 Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); 1802 Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
1681 } else { 1803 } else {
1682 assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); 1804 assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
1683 1805
1684 const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); 1806 const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
1685 const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); 1807 const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
1686 1808
1687 SDValue CmpOp = getValue(CB.CmpMHS); 1809 SDValue CmpOp = getValue(CB.CmpMHS);
1688 EVT VT = CmpOp.getValueType(); 1810 EVT VT = CmpOp.getValueType();
1689 1811
1690 if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { 1812 if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
1691 Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), 1813 Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
1692 ISD::SETLE); 1814 ISD::SETLE);
1693 } else { 1815 } else {
1694 SDValue SUB = DAG.getNode(ISD::SUB, dl, 1816 SDValue SUB = DAG.getNode(ISD::SUB, dl,
1695 VT, CmpOp, DAG.getConstant(Low, VT)); 1817 VT, CmpOp, DAG.getConstant(Low, dl, VT));
1696 Cond = DAG.getSetCC(dl, MVT::i1, SUB, 1818 Cond = DAG.getSetCC(dl, MVT::i1, SUB,
1697 DAG.getConstant(High-Low, VT), ISD::SETULE); 1819 DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
1698 } 1820 }
1699 } 1821 }
1700 1822
1701 // Update successor info 1823 // Update successor info
1702 addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight); 1824 addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
1703 // TrueBB and FalseBB are always different unless the incoming IR is 1825 // TrueBB and FalseBB are always different unless the incoming IR is
1704 // degenerate. This only happens when running llc on weird IR. 1826 // degenerate. This only happens when running llc on weird IR.
1705 if (CB.TrueBB != CB.FalseBB) 1827 if (CB.TrueBB != CB.FalseBB)
1706 addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); 1828 addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
1707 1829
1708 // Set NextBlock to be the MBB immediately after the current one, if any.
1709 // This is used to avoid emitting unnecessary branches to the next block.
1710 MachineBasicBlock *NextBlock = nullptr;
1711 MachineFunction::iterator BBI = SwitchBB;
1712 if (++BBI != FuncInfo.MF->end())
1713 NextBlock = BBI;
1714
1715 // If the lhs block is the next block, invert the condition so that we can 1830 // If the lhs block is the next block, invert the condition so that we can
1716 // fall through to the lhs instead of the rhs block. 1831 // fall through to the lhs instead of the rhs block.
1717 if (CB.TrueBB == NextBlock) { 1832 if (CB.TrueBB == NextBlock(SwitchBB)) {
1718 std::swap(CB.TrueBB, CB.FalseBB); 1833 std::swap(CB.TrueBB, CB.FalseBB);
1719 SDValue True = DAG.getConstant(1, Cond.getValueType()); 1834 SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
1720 Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); 1835 Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
1721 } 1836 }
1722 1837
1723 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, 1838 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
1724 MVT::Other, getControlRoot(), Cond, 1839 MVT::Other, getControlRoot(), Cond,
1735 1850
1736 /// visitJumpTable - Emit JumpTable node in the current MBB 1851 /// visitJumpTable - Emit JumpTable node in the current MBB
1737 void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { 1852 void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
1738 // Emit the code for the jump table 1853 // Emit the code for the jump table
1739 assert(JT.Reg != -1U && "Should lower JT Header first!"); 1854 assert(JT.Reg != -1U && "Should lower JT Header first!");
1740 EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(); 1855 EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
1741 SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), 1856 SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
1742 JT.Reg, PTy); 1857 JT.Reg, PTy);
1743 SDValue Table = DAG.getJumpTable(JT.JTI, PTy); 1858 SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
1744 SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(), 1859 SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
1745 MVT::Other, Index.getValue(1), 1860 MVT::Other, Index.getValue(1),
1750 /// visitJumpTableHeader - This function emits necessary code to produce index 1865 /// visitJumpTableHeader - This function emits necessary code to produce index
1751 /// in the JumpTable from switch case. 1866 /// in the JumpTable from switch case.
1752 void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, 1867 void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
1753 JumpTableHeader &JTH, 1868 JumpTableHeader &JTH,
1754 MachineBasicBlock *SwitchBB) { 1869 MachineBasicBlock *SwitchBB) {
1870 SDLoc dl = getCurSDLoc();
1871
1755 // Subtract the lowest switch case value from the value being switched on and 1872 // Subtract the lowest switch case value from the value being switched on and
1756 // conditional branch to default mbb if the result is greater than the 1873 // conditional branch to default mbb if the result is greater than the
1757 // difference between smallest and largest cases. 1874 // difference between smallest and largest cases.
1758 SDValue SwitchOp = getValue(JTH.SValue); 1875 SDValue SwitchOp = getValue(JTH.SValue);
1759 EVT VT = SwitchOp.getValueType(); 1876 EVT VT = SwitchOp.getValueType();
1760 SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, 1877 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
1761 DAG.getConstant(JTH.First, VT)); 1878 DAG.getConstant(JTH.First, dl, VT));
1762 1879
1763 // The SDNode we just created, which holds the value being switched on minus 1880 // The SDNode we just created, which holds the value being switched on minus
1764 // the smallest case value, needs to be copied to a virtual register so it 1881 // the smallest case value, needs to be copied to a virtual register so it
1765 // can be used as an index into the jump table in a subsequent basic block. 1882 // can be used as an index into the jump table in a subsequent basic block.
1766 // This value may be smaller or larger than the target's pointer type, and 1883 // This value may be smaller or larger than the target's pointer type, and
1767 // therefore require extension or truncating. 1884 // therefore require extension or truncating.
1768 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1885 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1769 SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI.getPointerTy()); 1886 SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
1770 1887
1771 unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); 1888 unsigned JumpTableReg =
1772 SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), 1889 FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
1890 SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
1773 JumpTableReg, SwitchOp); 1891 JumpTableReg, SwitchOp);
1774 JT.Reg = JumpTableReg; 1892 JT.Reg = JumpTableReg;
1775 1893
1776 // Emit the range check for the jump table, and branch to the default block 1894 // Emit the range check for the jump table, and branch to the default block
1777 // for the switch statement if the value being switched on exceeds the largest 1895 // for the switch statement if the value being switched on exceeds the largest
1778 // case in the switch. 1896 // case in the switch.
1779 SDValue CMP = 1897 SDValue CMP = DAG.getSetCC(
1780 DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), 1898 dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
1781 Sub.getValueType()), 1899 Sub.getValueType()),
1782 Sub, DAG.getConstant(JTH.Last - JTH.First, VT), ISD::SETUGT); 1900 Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
1783 1901
1784 // Set NextBlock to be the MBB immediately after the current one, if any. 1902 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
1785 // This is used to avoid emitting unnecessary branches to the next block.
1786 MachineBasicBlock *NextBlock = nullptr;
1787 MachineFunction::iterator BBI = SwitchBB;
1788
1789 if (++BBI != FuncInfo.MF->end())
1790 NextBlock = BBI;
1791
1792 SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(),
1793 MVT::Other, CopyTo, CMP, 1903 MVT::Other, CopyTo, CMP,
1794 DAG.getBasicBlock(JT.Default)); 1904 DAG.getBasicBlock(JT.Default));
1795 1905
1796 if (JT.MBB != NextBlock) 1906 // Avoid emitting unnecessary branches to the next block.
1797 BrCond = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrCond, 1907 if (JT.MBB != NextBlock(SwitchBB))
1908 BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
1798 DAG.getBasicBlock(JT.MBB)); 1909 DAG.getBasicBlock(JT.MBB));
1799 1910
1800 DAG.setRoot(BrCond); 1911 DAG.setRoot(BrCond);
1801 } 1912 }
1802 1913
1809 void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, 1920 void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
1810 MachineBasicBlock *ParentBB) { 1921 MachineBasicBlock *ParentBB) {
1811 1922
1812 // First create the loads to the guard/stack slot for the comparison. 1923 // First create the loads to the guard/stack slot for the comparison.
1813 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1924 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1814 EVT PtrTy = TLI.getPointerTy(); 1925 EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
1815 1926
1816 MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); 1927 MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo();
1817 int FI = MFI->getStackProtectorIndex(); 1928 int FI = MFI->getStackProtectorIndex();
1818 1929
1819 const Value *IRGuard = SPD.getGuard(); 1930 const Value *IRGuard = SPD.getGuard();
1820 SDValue GuardPtr = getValue(IRGuard); 1931 SDValue GuardPtr = getValue(IRGuard);
1821 SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); 1932 SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
1822 1933
1823 unsigned Align = 1934 unsigned Align = DL->getPrefTypeAlignment(IRGuard->getType());
1824 TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType());
1825 1935
1826 SDValue Guard; 1936 SDValue Guard;
1937 SDLoc dl = getCurSDLoc();
1827 1938
1828 // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the 1939 // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the
1829 // guard value from the virtual register holding the value. Otherwise, emit a 1940 // guard value from the virtual register holding the value. Otherwise, emit a
1830 // volatile load to retrieve the stack guard value. 1941 // volatile load to retrieve the stack guard value.
1831 unsigned GuardReg = SPD.getGuardReg(); 1942 unsigned GuardReg = SPD.getGuardReg();
1832 1943
1833 if (GuardReg && TLI.useLoadStackGuardNode()) 1944 if (GuardReg && TLI.useLoadStackGuardNode())
1834 Guard = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), GuardReg, 1945 Guard = DAG.getCopyFromReg(DAG.getEntryNode(), dl, GuardReg,
1835 PtrTy); 1946 PtrTy);
1836 else 1947 else
1837 Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), 1948 Guard = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(),
1838 GuardPtr, MachinePointerInfo(IRGuard, 0), 1949 GuardPtr, MachinePointerInfo(IRGuard, 0),
1839 true, false, false, Align); 1950 true, false, false, Align);
1840 1951
1841 SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), 1952 SDValue StackSlot = DAG.getLoad(
1842 StackSlotPtr, 1953 PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
1843 MachinePointerInfo::getFixedStack(FI), 1954 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true,
1844 true, false, false, Align); 1955 false, false, Align);
1845 1956
1846 // Perform the comparison via a subtract/getsetcc. 1957 // Perform the comparison via a subtract/getsetcc.
1847 EVT VT = Guard.getValueType(); 1958 EVT VT = Guard.getValueType();
1848 SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot); 1959 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot);
1849 1960
1850 SDValue Cmp = 1961 SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
1851 DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), 1962 *DAG.getContext(),
1852 Sub.getValueType()), 1963 Sub.getValueType()),
1853 Sub, DAG.getConstant(0, VT), ISD::SETNE); 1964 Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
1854 1965
1855 // If the sub is not 0, then we know the guard/stackslot do not equal, so 1966 // If the sub is not 0, then we know the guard/stackslot do not equal, so
1856 // branch to failure MBB. 1967 // branch to failure MBB.
1857 SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), 1968 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
1858 MVT::Other, StackSlot.getOperand(0), 1969 MVT::Other, StackSlot.getOperand(0),
1859 Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); 1970 Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
1860 // Otherwise branch to success MBB. 1971 // Otherwise branch to success MBB.
1861 SDValue Br = DAG.getNode(ISD::BR, getCurSDLoc(), 1972 SDValue Br = DAG.getNode(ISD::BR, dl,
1862 MVT::Other, BrCond, 1973 MVT::Other, BrCond,
1863 DAG.getBasicBlock(SPD.getSuccessMBB())); 1974 DAG.getBasicBlock(SPD.getSuccessMBB()));
1864 1975
1865 DAG.setRoot(Br); 1976 DAG.setRoot(Br);
1866 } 1977 }
1884 1995
1885 /// visitBitTestHeader - This function emits necessary code to produce value 1996 /// visitBitTestHeader - This function emits necessary code to produce value
1886 /// suitable for "bit tests" 1997 /// suitable for "bit tests"
1887 void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, 1998 void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
1888 MachineBasicBlock *SwitchBB) { 1999 MachineBasicBlock *SwitchBB) {
2000 SDLoc dl = getCurSDLoc();
2001
1889 // Subtract the minimum value 2002 // Subtract the minimum value
1890 SDValue SwitchOp = getValue(B.SValue); 2003 SDValue SwitchOp = getValue(B.SValue);
1891 EVT VT = SwitchOp.getValueType(); 2004 EVT VT = SwitchOp.getValueType();
1892 SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, 2005 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
1893 DAG.getConstant(B.First, VT)); 2006 DAG.getConstant(B.First, dl, VT));
1894 2007
1895 // Check range 2008 // Check range
1896 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2009 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1897 SDValue RangeCmp = 2010 SDValue RangeCmp = DAG.getSetCC(
1898 DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), 2011 dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
1899 Sub.getValueType()), 2012 Sub.getValueType()),
1900 Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT); 2013 Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
1901 2014
1902 // Determine the type of the test operands. 2015 // Determine the type of the test operands.
1903 bool UsePtrType = false; 2016 bool UsePtrType = false;
1904 if (!TLI.isTypeLegal(VT)) 2017 if (!TLI.isTypeLegal(VT))
1905 UsePtrType = true; 2018 UsePtrType = true;
1911 UsePtrType = true; 2024 UsePtrType = true;
1912 break; 2025 break;
1913 } 2026 }
1914 } 2027 }
1915 if (UsePtrType) { 2028 if (UsePtrType) {
1916 VT = TLI.getPointerTy(); 2029 VT = TLI.getPointerTy(DAG.getDataLayout());
1917 Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT); 2030 Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
1918 } 2031 }
1919 2032
1920 B.RegVT = VT.getSimpleVT(); 2033 B.RegVT = VT.getSimpleVT();
1921 B.Reg = FuncInfo.CreateReg(B.RegVT); 2034 B.Reg = FuncInfo.CreateReg(B.RegVT);
1922 SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), 2035 SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
1923 B.Reg, Sub);
1924
1925 // Set NextBlock to be the MBB immediately after the current one, if any.
1926 // This is used to avoid emitting unnecessary branches to the next block.
1927 MachineBasicBlock *NextBlock = nullptr;
1928 MachineFunction::iterator BBI = SwitchBB;
1929 if (++BBI != FuncInfo.MF->end())
1930 NextBlock = BBI;
1931 2036
1932 MachineBasicBlock* MBB = B.Cases[0].ThisBB; 2037 MachineBasicBlock* MBB = B.Cases[0].ThisBB;
1933 2038
1934 addSuccessorWithWeight(SwitchBB, B.Default); 2039 addSuccessorWithWeight(SwitchBB, B.Default, B.DefaultWeight);
1935 addSuccessorWithWeight(SwitchBB, MBB); 2040 addSuccessorWithWeight(SwitchBB, MBB, B.Weight);
1936 2041
1937 SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurSDLoc(), 2042 SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
1938 MVT::Other, CopyTo, RangeCmp, 2043 MVT::Other, CopyTo, RangeCmp,
1939 DAG.getBasicBlock(B.Default)); 2044 DAG.getBasicBlock(B.Default));
1940 2045
1941 if (MBB != NextBlock) 2046 // Avoid emitting unnecessary branches to the next block.
1942 BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo, 2047 if (MBB != NextBlock(SwitchBB))
2048 BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange,
1943 DAG.getBasicBlock(MBB)); 2049 DAG.getBasicBlock(MBB));
1944 2050
1945 DAG.setRoot(BrRange); 2051 DAG.setRoot(BrRange);
1946 } 2052 }
1947 2053
1950 MachineBasicBlock* NextMBB, 2056 MachineBasicBlock* NextMBB,
1951 uint32_t BranchWeightToNext, 2057 uint32_t BranchWeightToNext,
1952 unsigned Reg, 2058 unsigned Reg,
1953 BitTestCase &B, 2059 BitTestCase &B,
1954 MachineBasicBlock *SwitchBB) { 2060 MachineBasicBlock *SwitchBB) {
2061 SDLoc dl = getCurSDLoc();
1955 MVT VT = BB.RegVT; 2062 MVT VT = BB.RegVT;
1956 SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), 2063 SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
1957 Reg, VT);
1958 SDValue Cmp; 2064 SDValue Cmp;
1959 unsigned PopCount = countPopulation(B.Mask); 2065 unsigned PopCount = countPopulation(B.Mask);
1960 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2066 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1961 if (PopCount == 1) { 2067 if (PopCount == 1) {
1962 // Testing for a single bit; just compare the shift count with what it 2068 // Testing for a single bit; just compare the shift count with what it
1963 // would need to be to shift a 1 bit in that position. 2069 // would need to be to shift a 1 bit in that position.
1964 Cmp = DAG.getSetCC( 2070 Cmp = DAG.getSetCC(
1965 getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, 2071 dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
1966 DAG.getConstant(countTrailingZeros(B.Mask), VT), ISD::SETEQ); 2072 ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
2073 ISD::SETEQ);
1967 } else if (PopCount == BB.Range) { 2074 } else if (PopCount == BB.Range) {
1968 // There is only one zero bit in the range, test for it directly. 2075 // There is only one zero bit in the range, test for it directly.
1969 Cmp = DAG.getSetCC( 2076 Cmp = DAG.getSetCC(
1970 getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, 2077 dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
1971 DAG.getConstant(countTrailingOnes(B.Mask), VT), ISD::SETNE); 2078 ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
2079 ISD::SETNE);
1972 } else { 2080 } else {
1973 // Make desired shift 2081 // Make desired shift
1974 SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT, 2082 SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
1975 DAG.getConstant(1, VT), ShiftOp); 2083 DAG.getConstant(1, dl, VT), ShiftOp);
1976 2084
1977 // Emit bit tests and jumps 2085 // Emit bit tests and jumps
1978 SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(), 2086 SDValue AndOp = DAG.getNode(ISD::AND, dl,
1979 VT, SwitchVal, DAG.getConstant(B.Mask, VT)); 2087 VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
1980 Cmp = DAG.getSetCC(getCurSDLoc(), 2088 Cmp = DAG.getSetCC(
1981 TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp, 2089 dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
1982 DAG.getConstant(0, VT), ISD::SETNE); 2090 AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
1983 } 2091 }
1984 2092
1985 // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. 2093 // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight.
1986 addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight); 2094 addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight);
1987 // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. 2095 // The branch weight from SwitchBB to NextMBB is BranchWeightToNext.
1988 addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); 2096 addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext);
1989 2097
1990 SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurSDLoc(), 2098 SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
1991 MVT::Other, getControlRoot(), 2099 MVT::Other, getControlRoot(),
1992 Cmp, DAG.getBasicBlock(B.TargetBB)); 2100 Cmp, DAG.getBasicBlock(B.TargetBB));
1993 2101
1994 // Set NextBlock to be the MBB immediately after the current one, if any. 2102 // Avoid emitting unnecessary branches to the next block.
1995 // This is used to avoid emitting unnecessary branches to the next block. 2103 if (NextMBB != NextBlock(SwitchBB))
1996 MachineBasicBlock *NextBlock = nullptr; 2104 BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
1997 MachineFunction::iterator BBI = SwitchBB;
1998 if (++BBI != FuncInfo.MF->end())
1999 NextBlock = BBI;
2000
2001 if (NextMBB != NextBlock)
2002 BrAnd = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrAnd,
2003 DAG.getBasicBlock(NextMBB)); 2105 DAG.getBasicBlock(NextMBB));
2004 2106
2005 DAG.setRoot(BrAnd); 2107 DAG.setRoot(BrAnd);
2006 } 2108 }
2007 2109
2008 void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { 2110 void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
2009 MachineBasicBlock *InvokeMBB = FuncInfo.MBB; 2111 MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
2010 2112
2011 // Retrieve successors. 2113 // Retrieve successors. Look through artificial IR level blocks like catchpads
2114 // and catchendpads for successors.
2012 MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; 2115 MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
2013 MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; 2116 const BasicBlock *EHPadBB = I.getSuccessor(1);
2014 2117
2015 const Value *Callee(I.getCalledValue()); 2118 const Value *Callee(I.getCalledValue());
2016 const Function *Fn = dyn_cast<Function>(Callee); 2119 const Function *Fn = dyn_cast<Function>(Callee);
2017 if (isa<InlineAsm>(Callee)) 2120 if (isa<InlineAsm>(Callee))
2018 visitInlineAsm(&I); 2121 visitInlineAsm(&I);
2023 case Intrinsic::donothing: 2126 case Intrinsic::donothing:
2024 // Ignore invokes to @llvm.donothing: jump directly to the next BB. 2127 // Ignore invokes to @llvm.donothing: jump directly to the next BB.
2025 break; 2128 break;
2026 case Intrinsic::experimental_patchpoint_void: 2129 case Intrinsic::experimental_patchpoint_void:
2027 case Intrinsic::experimental_patchpoint_i64: 2130 case Intrinsic::experimental_patchpoint_i64:
2028 visitPatchpoint(&I, LandingPad); 2131 visitPatchpoint(&I, EHPadBB);
2029 break; 2132 break;
2133 case Intrinsic::experimental_gc_statepoint:
2134 LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
2135 break;
2030 } 2136 }
2031 } else 2137 } else
2032 LowerCallTo(&I, getValue(Callee), false, LandingPad); 2138 LowerCallTo(&I, getValue(Callee), false, EHPadBB);
2033 2139
2034 // If the value of the invoke is used outside of its defining block, make it 2140 // If the value of the invoke is used outside of its defining block, make it
2035 // available as a virtual register. 2141 // available as a virtual register.
2036 CopyToExportRegsIfNeeded(&I); 2142 // We already took care of the exported value for the statepoint instruction
2037 2143 // during call to the LowerStatepoint.
2038 // Update successor info 2144 if (!isStatepoint(I)) {
2145 CopyToExportRegsIfNeeded(&I);
2146 }
2147
2148 SmallVector<std::pair<MachineBasicBlock *, uint32_t>, 1> UnwindDests;
2149 BranchProbabilityInfo *BPI = FuncInfo.BPI;
2150 uint32_t EHPadBBWeight =
2151 BPI ? BPI->getEdgeWeight(InvokeMBB->getBasicBlock(), EHPadBB) : 0;
2152 findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBWeight, UnwindDests);
2153
2154 // Update successor info.
2039 addSuccessorWithWeight(InvokeMBB, Return); 2155 addSuccessorWithWeight(InvokeMBB, Return);
2040 addSuccessorWithWeight(InvokeMBB, LandingPad); 2156 for (auto &UnwindDest : UnwindDests) {
2157 UnwindDest.first->setIsEHPad();
2158 addSuccessorWithWeight(InvokeMBB, UnwindDest.first, UnwindDest.second);
2159 }
2041 2160
2042 // Drop into normal successor. 2161 // Drop into normal successor.
2043 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), 2162 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
2044 MVT::Other, getControlRoot(), 2163 MVT::Other, getControlRoot(),
2045 DAG.getBasicBlock(Return))); 2164 DAG.getBasicBlock(Return)));
2048 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { 2167 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
2049 llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); 2168 llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
2050 } 2169 }
2051 2170
2052 void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { 2171 void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
2053 assert(FuncInfo.MBB->isLandingPad() && 2172 assert(FuncInfo.MBB->isEHPad() &&
2054 "Call to landingpad not in landing pad!"); 2173 "Call to landingpad not in landing pad!");
2055 2174
2056 MachineBasicBlock *MBB = FuncInfo.MBB; 2175 MachineBasicBlock *MBB = FuncInfo.MBB;
2057 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 2176 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
2058 AddLandingPadInfo(LP, MMI, MBB); 2177 AddLandingPadInfo(LP, MMI, MBB);
2063 if (TLI.getExceptionPointerRegister() == 0 && 2182 if (TLI.getExceptionPointerRegister() == 0 &&
2064 TLI.getExceptionSelectorRegister() == 0) 2183 TLI.getExceptionSelectorRegister() == 0)
2065 return; 2184 return;
2066 2185
2067 SmallVector<EVT, 2> ValueVTs; 2186 SmallVector<EVT, 2> ValueVTs;
2068 ComputeValueVTs(TLI, LP.getType(), ValueVTs); 2187 SDLoc dl = getCurSDLoc();
2188 ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
2069 assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); 2189 assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
2070 2190
2071 // Get the two live-in registers as SDValues. The physregs have already been 2191 // Get the two live-in registers as SDValues. The physregs have already been
2072 // copied into virtual registers. 2192 // copied into virtual registers.
2073 SDValue Ops[2]; 2193 SDValue Ops[2];
2074 if (FuncInfo.ExceptionPointerVirtReg) { 2194 if (FuncInfo.ExceptionPointerVirtReg) {
2075 Ops[0] = DAG.getZExtOrTrunc( 2195 Ops[0] = DAG.getZExtOrTrunc(
2076 DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), 2196 DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2077 FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), 2197 FuncInfo.ExceptionPointerVirtReg,
2078 getCurSDLoc(), ValueVTs[0]); 2198 TLI.getPointerTy(DAG.getDataLayout())),
2199 dl, ValueVTs[0]);
2079 } else { 2200 } else {
2080 Ops[0] = DAG.getConstant(0, TLI.getPointerTy()); 2201 Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
2081 } 2202 }
2082 Ops[1] = DAG.getZExtOrTrunc( 2203 Ops[1] = DAG.getZExtOrTrunc(
2083 DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), 2204 DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2084 FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()), 2205 FuncInfo.ExceptionSelectorVirtReg,
2085 getCurSDLoc(), ValueVTs[1]); 2206 TLI.getPointerTy(DAG.getDataLayout())),
2207 dl, ValueVTs[1]);
2086 2208
2087 // Merge into one. 2209 // Merge into one.
2088 SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 2210 SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
2089 DAG.getVTList(ValueVTs), Ops); 2211 DAG.getVTList(ValueVTs), Ops);
2090 setValue(&LP, Res); 2212 setValue(&LP, Res);
2091 } 2213 }
2092 2214
2093 unsigned 2215 void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
2094 SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV, 2216 #ifndef NDEBUG
2095 MachineBasicBlock *LPadBB) { 2217 for (const CaseCluster &CC : Clusters)
2096 SDValue Chain = getControlRoot(); 2218 assert(CC.Low == CC.High && "Input clusters must be single-case");
2097 2219 #endif
2098 // Get the typeid that we will dispatch on later. 2220
2099 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2221 std::sort(Clusters.begin(), Clusters.end(),
2100 const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy()); 2222 [](const CaseCluster &a, const CaseCluster &b) {
2101 unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); 2223 return a.Low->getValue().slt(b.Low->getValue());
2102 unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV); 2224 });
2103 SDValue Sel = DAG.getConstant(TypeID, TLI.getPointerTy()); 2225
2104 Chain = DAG.getCopyToReg(Chain, getCurSDLoc(), VReg, Sel); 2226 // Merge adjacent clusters with the same destination.
2105 2227 const unsigned N = Clusters.size();
2106 // Branch to the main landing pad block. 2228 unsigned DstIndex = 0;
2107 MachineBasicBlock *ClauseMBB = FuncInfo.MBB; 2229 for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
2108 ClauseMBB->addSuccessor(LPadBB); 2230 CaseCluster &CC = Clusters[SrcIndex];
2109 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, Chain, 2231 const ConstantInt *CaseVal = CC.Low;
2110 DAG.getBasicBlock(LPadBB))); 2232 MachineBasicBlock *Succ = CC.MBB;
2111 return VReg; 2233
2112 } 2234 if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
2113 2235 (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
2114 /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for 2236 // If this case has the same successor and is a neighbour, merge it into
2115 /// small case ranges). 2237 // the previous cluster.
2116 bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, 2238 Clusters[DstIndex - 1].High = CaseVal;
2117 CaseRecVector& WorkList, 2239 Clusters[DstIndex - 1].Weight += CC.Weight;
2118 const Value* SV, 2240 assert(Clusters[DstIndex - 1].Weight >= CC.Weight && "Weight overflow!");
2119 MachineBasicBlock *Default,
2120 MachineBasicBlock *SwitchBB) {
2121 // Size is the number of Cases represented by this range.
2122 size_t Size = CR.Range.second - CR.Range.first;
2123 if (Size > 3)
2124 return false;
2125
2126 // Get the MachineFunction which holds the current MBB. This is used when
2127 // inserting any additional MBBs necessary to represent the switch.
2128 MachineFunction *CurMF = FuncInfo.MF;
2129
2130 // Figure out which block is immediately after the current one.
2131 MachineBasicBlock *NextBlock = nullptr;
2132 MachineFunction::iterator BBI = CR.CaseBB;
2133
2134 if (++BBI != FuncInfo.MF->end())
2135 NextBlock = BBI;
2136
2137 BranchProbabilityInfo *BPI = FuncInfo.BPI;
2138 // If any two of the cases has the same destination, and if one value
2139 // is the same as the other, but has one bit unset that the other has set,
2140 // use bit manipulation to do two compares at once. For example:
2141 // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
2142 // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
2143 // TODO: Handle cases where CR.CaseBB != SwitchBB.
2144 if (Size == 2 && CR.CaseBB == SwitchBB) {
2145 Case &Small = *CR.Range.first;
2146 Case &Big = *(CR.Range.second-1);
2147
2148 if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
2149 const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
2150 const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
2151
2152 // Check that there is only one bit different.
2153 if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
2154 (SmallValue | BigValue) == BigValue) {
2155 // Isolate the common bit.
2156 APInt CommonBit = BigValue & ~SmallValue;
2157 assert((SmallValue | CommonBit) == BigValue &&
2158 CommonBit.countPopulation() == 1 && "Not a common bit?");
2159
2160 SDValue CondLHS = getValue(SV);
2161 EVT VT = CondLHS.getValueType();
2162 SDLoc DL = getCurSDLoc();
2163
2164 SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
2165 DAG.getConstant(CommonBit, VT));
2166 SDValue Cond = DAG.getSetCC(DL, MVT::i1,
2167 Or, DAG.getConstant(BigValue, VT),
2168 ISD::SETEQ);
2169
2170 // Update successor info.
2171 // Both Small and Big will jump to Small.BB, so we sum up the weights.
2172 addSuccessorWithWeight(SwitchBB, Small.BB,
2173 Small.ExtraWeight + Big.ExtraWeight);
2174 addSuccessorWithWeight(SwitchBB, Default,
2175 // The default destination is the first successor in IR.
2176 BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0);
2177
2178 // Insert the true branch.
2179 SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
2180 getControlRoot(), Cond,
2181 DAG.getBasicBlock(Small.BB));
2182
2183 // Insert the false branch.
2184 BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
2185 DAG.getBasicBlock(Default));
2186
2187 DAG.setRoot(BrCond);
2188 return true;
2189 }
2190 }
2191 }
2192
2193 // Order cases by weight so the most likely case will be checked first.
2194 uint32_t UnhandledWeights = 0;
2195 if (BPI) {
2196 for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) {
2197 uint32_t IWeight = I->ExtraWeight;
2198 UnhandledWeights += IWeight;
2199 for (CaseItr J = CR.Range.first; J < I; ++J) {
2200 uint32_t JWeight = J->ExtraWeight;
2201 if (IWeight > JWeight)
2202 std::swap(*I, *J);
2203 }
2204 }
2205 }
2206 // Rearrange the case blocks so that the last one falls through if possible.
2207 Case &BackCase = *(CR.Range.second-1);
2208 if (Size > 1 &&
2209 NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
2210 // The last case block won't fall through into 'NextBlock' if we emit the
2211 // branches in this order. See if rearranging a case value would help.
2212 // We start at the bottom as it's the case with the least weight.
2213 for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I)
2214 if (I->BB == NextBlock) {
2215 std::swap(*I, BackCase);
2216 break;
2217 }
2218 }
2219
2220 // Create a CaseBlock record representing a conditional branch to
2221 // the Case's target mbb if the value being switched on SV is equal
2222 // to C.
2223 MachineBasicBlock *CurBlock = CR.CaseBB;
2224 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
2225 MachineBasicBlock *FallThrough;
2226 if (I != E-1) {
2227 FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
2228 CurMF->insert(BBI, FallThrough);
2229
2230 // Put SV in a virtual register to make it available from the new blocks.
2231 ExportFromCurrentBlock(SV);
2232 } else { 2241 } else {
2233 // If the last case doesn't match, go to the default block. 2242 std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
2234 FallThrough = Default; 2243 sizeof(Clusters[SrcIndex]));
2235 } 2244 }
2236 2245 }
2237 const Value *RHS, *LHS, *MHS; 2246 Clusters.resize(DstIndex);
2238 ISD::CondCode CC;
2239 if (I->High == I->Low) {
2240 // This is just small small case range :) containing exactly 1 case
2241 CC = ISD::SETEQ;
2242 LHS = SV; RHS = I->High; MHS = nullptr;
2243 } else {
2244 CC = ISD::SETLE;
2245 LHS = I->Low; MHS = SV; RHS = I->High;
2246 }
2247
2248 // The false weight should be sum of all un-handled cases.
2249 UnhandledWeights -= I->ExtraWeight;
2250 CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough,
2251 /* me */ CurBlock,
2252 /* trueweight */ I->ExtraWeight,
2253 /* falseweight */ UnhandledWeights);
2254
2255 // If emitting the first comparison, just call visitSwitchCase to emit the
2256 // code into the current block. Otherwise, push the CaseBlock onto the
2257 // vector to be later processed by SDISel, and insert the node's MBB
2258 // before the next MBB.
2259 if (CurBlock == SwitchBB)
2260 visitSwitchCase(CB, SwitchBB);
2261 else
2262 SwitchCases.push_back(CB);
2263
2264 CurBlock = FallThrough;
2265 }
2266
2267 return true;
2268 }
2269
2270 static inline bool areJTsAllowed(const TargetLowering &TLI) {
2271 return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
2272 TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
2273 }
2274
2275 static APInt ComputeRange(const APInt &First, const APInt &Last) {
2276 uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
2277 APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
2278 return (LastExt - FirstExt + 1ULL);
2279 }
2280
2281 /// handleJTSwitchCase - Emit jumptable for current switch case range
2282 bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
2283 CaseRecVector &WorkList,
2284 const Value *SV,
2285 MachineBasicBlock *Default,
2286 MachineBasicBlock *SwitchBB) {
2287 Case& FrontCase = *CR.Range.first;
2288 Case& BackCase = *(CR.Range.second-1);
2289
2290 const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
2291 const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
2292
2293 APInt TSize(First.getBitWidth(), 0);
2294 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
2295 TSize += I->size();
2296
2297 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2298 if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries()))
2299 return false;
2300
2301 APInt Range = ComputeRange(First, Last);
2302 // The density is TSize / Range. Require at least 40%.
2303 // It should not be possible for IntTSize to saturate for sane code, but make
2304 // sure we handle Range saturation correctly.
2305 uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10);
2306 uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10);
2307 if (IntTSize * 10 < IntRange * 4)
2308 return false;
2309
2310 DEBUG(dbgs() << "Lowering jump table\n"
2311 << "First entry: " << First << ". Last entry: " << Last << '\n'
2312 << "Range: " << Range << ". Size: " << TSize << ".\n\n");
2313
2314 // Get the MachineFunction which holds the current MBB. This is used when
2315 // inserting any additional MBBs necessary to represent the switch.
2316 MachineFunction *CurMF = FuncInfo.MF;
2317
2318 // Figure out which block is immediately after the current one.
2319 MachineFunction::iterator BBI = CR.CaseBB;
2320 ++BBI;
2321
2322 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
2323
2324 // Create a new basic block to hold the code for loading the address
2325 // of the jump table, and jumping to it. Update successor information;
2326 // we will either branch to the default case for the switch, or the jump
2327 // table.
2328 MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
2329 CurMF->insert(BBI, JumpTableBB);
2330
2331 addSuccessorWithWeight(CR.CaseBB, Default);
2332 addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
2333
2334 // Build a vector of destination BBs, corresponding to each target
2335 // of the jump table. If the value of the jump table slot corresponds to
2336 // a case statement, push the case's BB onto the vector, otherwise, push
2337 // the default BB.
2338 std::vector<MachineBasicBlock*> DestBBs;
2339 APInt TEI = First;
2340 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
2341 const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
2342 const APInt &High = cast<ConstantInt>(I->High)->getValue();
2343
2344 if (Low.sle(TEI) && TEI.sle(High)) {
2345 DestBBs.push_back(I->BB);
2346 if (TEI==High)
2347 ++I;
2348 } else {
2349 DestBBs.push_back(Default);
2350 }
2351 }
2352
2353 // Calculate weight for each unique destination in CR.
2354 DenseMap<MachineBasicBlock*, uint32_t> DestWeights;
2355 if (FuncInfo.BPI)
2356 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
2357 DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
2358 DestWeights.find(I->BB);
2359 if (Itr != DestWeights.end())
2360 Itr->second += I->ExtraWeight;
2361 else
2362 DestWeights[I->BB] = I->ExtraWeight;
2363 }
2364
2365 // Update successor info. Add one edge to each unique successor.
2366 BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
2367 for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
2368 E = DestBBs.end(); I != E; ++I) {
2369 if (!SuccsHandled[(*I)->getNumber()]) {
2370 SuccsHandled[(*I)->getNumber()] = true;
2371 DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
2372 DestWeights.find(*I);
2373 addSuccessorWithWeight(JumpTableBB, *I,
2374 Itr != DestWeights.end() ? Itr->second : 0);
2375 }
2376 }
2377
2378 // Create a jump table index for this jump table.
2379 unsigned JTEncoding = TLI.getJumpTableEncoding();
2380 unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
2381 ->createJumpTableIndex(DestBBs);
2382
2383 // Set the jump table information so that we can codegen it as a second
2384 // MachineBasicBlock
2385 JumpTable JT(-1U, JTI, JumpTableBB, Default);
2386 JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB));
2387 if (CR.CaseBB == SwitchBB)
2388 visitJumpTableHeader(JT, JTH, SwitchBB);
2389
2390 JTCases.push_back(JumpTableBlock(JTH, JT));
2391 return true;
2392 }
2393
2394 /// handleBTSplitSwitchCase - emit comparison and split binary search tree into
2395 /// 2 subtrees.
2396 bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
2397 CaseRecVector& WorkList,
2398 const Value* SV,
2399 MachineBasicBlock* SwitchBB) {
2400 Case& FrontCase = *CR.Range.first;
2401 Case& BackCase = *(CR.Range.second-1);
2402
2403 // Size is the number of Cases represented by this range.
2404 unsigned Size = CR.Range.second - CR.Range.first;
2405
2406 const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
2407 const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
2408 double FMetric = 0;
2409 CaseItr Pivot = CR.Range.first + Size/2;
2410
2411 // Select optimal pivot, maximizing sum density of LHS and RHS. This will
2412 // (heuristically) allow us to emit JumpTable's later.
2413 APInt TSize(First.getBitWidth(), 0);
2414 for (CaseItr I = CR.Range.first, E = CR.Range.second;
2415 I!=E; ++I)
2416 TSize += I->size();
2417
2418 APInt LSize = FrontCase.size();
2419 APInt RSize = TSize-LSize;
2420 DEBUG(dbgs() << "Selecting best pivot: \n"
2421 << "First: " << First << ", Last: " << Last <<'\n'
2422 << "LSize: " << LSize << ", RSize: " << RSize << '\n');
2423 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2424 for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
2425 J!=E; ++I, ++J) {
2426 const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
2427 const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
2428 APInt Range = ComputeRange(LEnd, RBegin);
2429 assert((Range - 2ULL).isNonNegative() &&
2430 "Invalid case distance");
2431 // Use volatile double here to avoid excess precision issues on some hosts,
2432 // e.g. that use 80-bit X87 registers.
2433 // Only consider the density of sub-ranges that actually have sufficient
2434 // entries to be lowered as a jump table.
2435 volatile double LDensity =
2436 LSize.ult(TLI.getMinimumJumpTableEntries())
2437 ? 0.0
2438 : LSize.roundToDouble() / (LEnd - First + 1ULL).roundToDouble();
2439 volatile double RDensity =
2440 RSize.ult(TLI.getMinimumJumpTableEntries())
2441 ? 0.0
2442 : RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble();
2443 volatile double Metric = Range.logBase2() * (LDensity + RDensity);
2444 // Should always split in some non-trivial place
2445 DEBUG(dbgs() <<"=>Step\n"
2446 << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
2447 << "LDensity: " << LDensity
2448 << ", RDensity: " << RDensity << '\n'
2449 << "Metric: " << Metric << '\n');
2450 if (FMetric < Metric) {
2451 Pivot = J;
2452 FMetric = Metric;
2453 DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
2454 }
2455
2456 LSize += J->size();
2457 RSize -= J->size();
2458 }
2459
2460 if (FMetric == 0 || !areJTsAllowed(TLI))
2461 Pivot = CR.Range.first + Size/2;
2462 splitSwitchCase(CR, Pivot, WorkList, SV, SwitchBB);
2463 return true;
2464 }
2465
2466 void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot,
2467 CaseRecVector &WorkList,
2468 const Value *SV,
2469 MachineBasicBlock *SwitchBB) {
2470 // Get the MachineFunction which holds the current MBB. This is used when
2471 // inserting any additional MBBs necessary to represent the switch.
2472 MachineFunction *CurMF = FuncInfo.MF;
2473
2474 // Figure out which block is immediately after the current one.
2475 MachineFunction::iterator BBI = CR.CaseBB;
2476 ++BBI;
2477
2478 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
2479
2480 CaseRange LHSR(CR.Range.first, Pivot);
2481 CaseRange RHSR(Pivot, CR.Range.second);
2482 const Constant *C = Pivot->Low;
2483 MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr;
2484
2485 // We know that we branch to the LHS if the Value being switched on is
2486 // less than the Pivot value, C. We use this to optimize our binary
2487 // tree a bit, by recognizing that if SV is greater than or equal to the
2488 // LHS's Case Value, and that Case Value is exactly one less than the
2489 // Pivot's Value, then we can branch directly to the LHS's Target,
2490 // rather than creating a leaf node for it.
2491 if ((LHSR.second - LHSR.first) == 1 && LHSR.first->High == CR.GE &&
2492 cast<ConstantInt>(C)->getValue() ==
2493 (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
2494 TrueBB = LHSR.first->BB;
2495 } else {
2496 TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
2497 CurMF->insert(BBI, TrueBB);
2498 WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
2499
2500 // Put SV in a virtual register to make it available from the new blocks.
2501 ExportFromCurrentBlock(SV);
2502 }
2503
2504 // Similar to the optimization above, if the Value being switched on is
2505 // known to be less than the Constant CR.LT, and the current Case Value
2506 // is CR.LT - 1, then we can branch directly to the target block for
2507 // the current Case Value, rather than emitting a RHS leaf node for it.
2508 if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
2509 cast<ConstantInt>(RHSR.first->Low)->getValue() ==
2510 (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
2511 FalseBB = RHSR.first->BB;
2512 } else {
2513 FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
2514 CurMF->insert(BBI, FalseBB);
2515 WorkList.push_back(CaseRec(FalseBB, CR.LT, C, RHSR));
2516
2517 // Put SV in a virtual register to make it available from the new blocks.
2518 ExportFromCurrentBlock(SV);
2519 }
2520
2521 // Create a CaseBlock record representing a conditional branch to
2522 // the LHS node if the value being switched on SV is less than C.
2523 // Otherwise, branch to LHS.
2524 CaseBlock CB(ISD::SETLT, SV, C, nullptr, TrueBB, FalseBB, CR.CaseBB);
2525
2526 if (CR.CaseBB == SwitchBB)
2527 visitSwitchCase(CB, SwitchBB);
2528 else
2529 SwitchCases.push_back(CB);
2530 }
2531
2532 /// handleBitTestsSwitchCase - if current case range has few destination and
2533 /// range span less, than machine word bitwidth, encode case range into series
2534 /// of masks and emit bit tests with these masks.
2535 bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
2536 CaseRecVector& WorkList,
2537 const Value* SV,
2538 MachineBasicBlock* Default,
2539 MachineBasicBlock* SwitchBB) {
2540 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2541 EVT PTy = TLI.getPointerTy();
2542 unsigned IntPtrBits = PTy.getSizeInBits();
2543
2544 Case& FrontCase = *CR.Range.first;
2545 Case& BackCase = *(CR.Range.second-1);
2546
2547 // Get the MachineFunction which holds the current MBB. This is used when
2548 // inserting any additional MBBs necessary to represent the switch.
2549 MachineFunction *CurMF = FuncInfo.MF;
2550
2551 // If target does not have legal shift left, do not emit bit tests at all.
2552 if (!TLI.isOperationLegal(ISD::SHL, PTy))
2553 return false;
2554
2555 size_t numCmps = 0;
2556 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
2557 // Single case counts one, case range - two.
2558 numCmps += (I->Low == I->High ? 1 : 2);
2559 }
2560
2561 // Count unique destinations
2562 SmallSet<MachineBasicBlock*, 4> Dests;
2563 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
2564 Dests.insert(I->BB);
2565 if (Dests.size() > 3)
2566 // Don't bother the code below, if there are too much unique destinations
2567 return false;
2568 }
2569 DEBUG(dbgs() << "Total number of unique destinations: "
2570 << Dests.size() << '\n'
2571 << "Total number of comparisons: " << numCmps << '\n');
2572
2573 // Compute span of values.
2574 const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
2575 const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
2576 APInt cmpRange = maxValue - minValue;
2577
2578 DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
2579 << "Low bound: " << minValue << '\n'
2580 << "High bound: " << maxValue << '\n');
2581
2582 if (cmpRange.uge(IntPtrBits) ||
2583 (!(Dests.size() == 1 && numCmps >= 3) &&
2584 !(Dests.size() == 2 && numCmps >= 5) &&
2585 !(Dests.size() >= 3 && numCmps >= 6)))
2586 return false;
2587
2588 DEBUG(dbgs() << "Emitting bit tests\n");
2589 APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
2590
2591 // Optimize the case where all the case values fit in a
2592 // word without having to subtract minValue. In this case,
2593 // we can optimize away the subtraction.
2594 if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
2595 cmpRange = maxValue;
2596 } else {
2597 lowBound = minValue;
2598 }
2599
2600 CaseBitsVector CasesBits;
2601 unsigned i, count = 0;
2602
2603 for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
2604 MachineBasicBlock* Dest = I->BB;
2605 for (i = 0; i < count; ++i)
2606 if (Dest == CasesBits[i].BB)
2607 break;
2608
2609 if (i == count) {
2610 assert((count < 3) && "Too much destinations to test!");
2611 CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/));
2612 count++;
2613 }
2614
2615 const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
2616 const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
2617
2618 uint64_t lo = (lowValue - lowBound).getZExtValue();
2619 uint64_t hi = (highValue - lowBound).getZExtValue();
2620 CasesBits[i].ExtraWeight += I->ExtraWeight;
2621
2622 for (uint64_t j = lo; j <= hi; j++) {
2623 CasesBits[i].Mask |= 1ULL << j;
2624 CasesBits[i].Bits++;
2625 }
2626
2627 }
2628 std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
2629
2630 BitTestInfo BTC;
2631
2632 // Figure out which block is immediately after the current one.
2633 MachineFunction::iterator BBI = CR.CaseBB;
2634 ++BBI;
2635
2636 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
2637
2638 DEBUG(dbgs() << "Cases:\n");
2639 for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
2640 DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
2641 << ", Bits: " << CasesBits[i].Bits
2642 << ", BB: " << CasesBits[i].BB << '\n');
2643
2644 MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
2645 CurMF->insert(BBI, CaseBB);
2646 BTC.push_back(BitTestCase(CasesBits[i].Mask,
2647 CaseBB,
2648 CasesBits[i].BB, CasesBits[i].ExtraWeight));
2649
2650 // Put SV in a virtual register to make it available from the new blocks.
2651 ExportFromCurrentBlock(SV);
2652 }
2653
2654 BitTestBlock BTB(lowBound, cmpRange, SV,
2655 -1U, MVT::Other, (CR.CaseBB == SwitchBB),
2656 CR.CaseBB, Default, std::move(BTC));
2657
2658 if (CR.CaseBB == SwitchBB)
2659 visitBitTestHeader(BTB, SwitchBB);
2660
2661 BitTestCases.push_back(std::move(BTB));
2662
2663 return true;
2664 }
2665
2666 /// Clusterify - Transform simple list of Cases into list of CaseRange's
2667 void SelectionDAGBuilder::Clusterify(CaseVector& Cases,
2668 const SwitchInst& SI) {
2669 BranchProbabilityInfo *BPI = FuncInfo.BPI;
2670 // Start with "simple" cases.
2671 for (SwitchInst::ConstCaseIt i : SI.cases()) {
2672 const BasicBlock *SuccBB = i.getCaseSuccessor();
2673 MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
2674
2675 uint32_t ExtraWeight =
2676 BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0;
2677
2678 Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
2679 SMBB, ExtraWeight));
2680 }
2681 std::sort(Cases.begin(), Cases.end(), CaseCmp());
2682
2683 // Merge case into clusters
2684 if (Cases.size() >= 2)
2685 // Must recompute end() each iteration because it may be
2686 // invalidated by erase if we hold on to it
2687 for (CaseItr I = Cases.begin(), J = std::next(Cases.begin());
2688 J != Cases.end(); ) {
2689 const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
2690 const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
2691 MachineBasicBlock* nextBB = J->BB;
2692 MachineBasicBlock* currentBB = I->BB;
2693
2694 // If the two neighboring cases go to the same destination, merge them
2695 // into a single case.
2696 if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
2697 I->High = J->High;
2698 I->ExtraWeight += J->ExtraWeight;
2699 J = Cases.erase(J);
2700 } else {
2701 I = J++;
2702 }
2703 }
2704
2705 DEBUG({
2706 size_t numCmps = 0;
2707 for (auto &I : Cases)
2708 // A range counts double, since it requires two compares.
2709 numCmps += I.Low != I.High ? 2 : 1;
2710
2711 dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
2712 << ". Total compares: " << numCmps << '\n';
2713 });
2714 } 2247 }
2715 2248
2716 void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, 2249 void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
2717 MachineBasicBlock *Last) { 2250 MachineBasicBlock *Last) {
2718 // Update JTCases. 2251 // Update JTCases.
2722 2255
2723 // Update BitTestCases. 2256 // Update BitTestCases.
2724 for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) 2257 for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
2725 if (BitTestCases[i].Parent == First) 2258 if (BitTestCases[i].Parent == First)
2726 BitTestCases[i].Parent = Last; 2259 BitTestCases[i].Parent = Last;
2727 }
2728
2729 void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
2730 MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
2731
2732 // Figure out which block is immediately after the current one.
2733 MachineBasicBlock *NextBlock = nullptr;
2734 if (SwitchMBB + 1 != FuncInfo.MF->end())
2735 NextBlock = SwitchMBB + 1;
2736
2737
2738 // Create a vector of Cases, sorted so that we can efficiently create a binary
2739 // search tree from them.
2740 CaseVector Cases;
2741 Clusterify(Cases, SI);
2742
2743 // Get the default destination MBB.
2744 MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
2745
2746 if (isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg()) &&
2747 !Cases.empty()) {
2748 // Replace an unreachable default destination with the most popular case
2749 // destination.
2750 DenseMap<const BasicBlock *, unsigned> Popularity;
2751 unsigned MaxPop = 0;
2752 const BasicBlock *MaxBB = nullptr;
2753 for (auto I : SI.cases()) {
2754 const BasicBlock *BB = I.getCaseSuccessor();
2755 if (++Popularity[BB] > MaxPop) {
2756 MaxPop = Popularity[BB];
2757 MaxBB = BB;
2758 }
2759 }
2760
2761 // Set new default.
2762 assert(MaxPop > 0);
2763 assert(MaxBB);
2764 Default = FuncInfo.MBBMap[MaxBB];
2765
2766 // Remove cases that were pointing to the destination that is now the default.
2767 Cases.erase(std::remove_if(Cases.begin(), Cases.end(),
2768 [&](const Case &C) { return C.BB == Default; }),
2769 Cases.end());
2770 }
2771
2772 // If there is only the default destination, go there directly.
2773 if (Cases.empty()) {
2774 // Update machine-CFG edges.
2775 SwitchMBB->addSuccessor(Default);
2776
2777 // If this is not a fall-through branch, emit the branch.
2778 if (Default != NextBlock) {
2779 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
2780 getControlRoot(), DAG.getBasicBlock(Default)));
2781 }
2782 return;
2783 }
2784
2785 // Get the Value to be switched on.
2786 const Value *SV = SI.getCondition();
2787
2788 // Push the initial CaseRec onto the worklist
2789 CaseRecVector WorkList;
2790 WorkList.push_back(CaseRec(SwitchMBB,nullptr,nullptr,
2791 CaseRange(Cases.begin(),Cases.end())));
2792
2793 while (!WorkList.empty()) {
2794 // Grab a record representing a case range to process off the worklist
2795 CaseRec CR = WorkList.back();
2796 WorkList.pop_back();
2797
2798 if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
2799 continue;
2800
2801 // If the range has few cases (two or less) emit a series of specific
2802 // tests.
2803 if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
2804 continue;
2805
2806 // If the switch has more than N blocks, and is at least 40% dense, and the
2807 // target supports indirect branches, then emit a jump table rather than
2808 // lowering the switch to a binary tree of conditional branches.
2809 // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries().
2810 if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
2811 continue;
2812
2813 // Emit binary tree. We need to pick a pivot, and push left and right ranges
2814 // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
2815 handleBTSplitSwitchCase(CR, WorkList, SV, SwitchMBB);
2816 }
2817 } 2260 }
2818 2261
2819 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { 2262 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
2820 MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; 2263 MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
2821 2264
2836 getValue(I.getAddress()))); 2279 getValue(I.getAddress())));
2837 } 2280 }
2838 2281
2839 void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { 2282 void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
2840 if (DAG.getTarget().Options.TrapUnreachable) 2283 if (DAG.getTarget().Options.TrapUnreachable)
2841 DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); 2284 DAG.setRoot(
2285 DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
2842 } 2286 }
2843 2287
2844 void SelectionDAGBuilder::visitFSub(const User &I) { 2288 void SelectionDAGBuilder::visitFSub(const User &I) {
2845 // -0.0 - X --> fneg 2289 // -0.0 - X --> fneg
2846 Type *Ty = I.getType(); 2290 Type *Ty = I.getType();
2860 SDValue Op2 = getValue(I.getOperand(1)); 2304 SDValue Op2 = getValue(I.getOperand(1));
2861 2305
2862 bool nuw = false; 2306 bool nuw = false;
2863 bool nsw = false; 2307 bool nsw = false;
2864 bool exact = false; 2308 bool exact = false;
2309 FastMathFlags FMF;
2310
2865 if (const OverflowingBinaryOperator *OFBinOp = 2311 if (const OverflowingBinaryOperator *OFBinOp =
2866 dyn_cast<const OverflowingBinaryOperator>(&I)) { 2312 dyn_cast<const OverflowingBinaryOperator>(&I)) {
2867 nuw = OFBinOp->hasNoUnsignedWrap(); 2313 nuw = OFBinOp->hasNoUnsignedWrap();
2868 nsw = OFBinOp->hasNoSignedWrap(); 2314 nsw = OFBinOp->hasNoSignedWrap();
2869 } 2315 }
2870 if (const PossiblyExactOperator *ExactOp = 2316 if (const PossiblyExactOperator *ExactOp =
2871 dyn_cast<const PossiblyExactOperator>(&I)) 2317 dyn_cast<const PossiblyExactOperator>(&I))
2872 exact = ExactOp->isExact(); 2318 exact = ExactOp->isExact();
2873 2319 if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I))
2320 FMF = FPOp->getFastMathFlags();
2321
2322 SDNodeFlags Flags;
2323 Flags.setExact(exact);
2324 Flags.setNoSignedWrap(nsw);
2325 Flags.setNoUnsignedWrap(nuw);
2326 if (EnableFMFInDAG) {
2327 Flags.setAllowReciprocal(FMF.allowReciprocal());
2328 Flags.setNoInfs(FMF.noInfs());
2329 Flags.setNoNaNs(FMF.noNaNs());
2330 Flags.setNoSignedZeros(FMF.noSignedZeros());
2331 Flags.setUnsafeAlgebra(FMF.unsafeAlgebra());
2332 }
2874 SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), 2333 SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
2875 Op1, Op2, nuw, nsw, exact); 2334 Op1, Op2, &Flags);
2876 setValue(&I, BinNodeValue); 2335 setValue(&I, BinNodeValue);
2877 } 2336 }
2878 2337
2879 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { 2338 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
2880 SDValue Op1 = getValue(I.getOperand(0)); 2339 SDValue Op1 = getValue(I.getOperand(0));
2881 SDValue Op2 = getValue(I.getOperand(1)); 2340 SDValue Op2 = getValue(I.getOperand(1));
2882 2341
2883 EVT ShiftTy = 2342 EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
2884 DAG.getTargetLoweringInfo().getShiftAmountTy(Op2.getValueType()); 2343 Op2.getValueType(), DAG.getDataLayout());
2885 2344
2886 // Coerce the shift amount to the right type if we can. 2345 // Coerce the shift amount to the right type if we can.
2887 if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { 2346 if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
2888 unsigned ShiftSize = ShiftTy.getSizeInBits(); 2347 unsigned ShiftSize = ShiftTy.getSizeInBits();
2889 unsigned Op2Size = Op2.getValueType().getSizeInBits(); 2348 unsigned Op2Size = Op2.getValueType().getSizeInBits();
2918 } 2377 }
2919 if (const PossiblyExactOperator *ExactOp = 2378 if (const PossiblyExactOperator *ExactOp =
2920 dyn_cast<const PossiblyExactOperator>(&I)) 2379 dyn_cast<const PossiblyExactOperator>(&I))
2921 exact = ExactOp->isExact(); 2380 exact = ExactOp->isExact();
2922 } 2381 }
2923 2382 SDNodeFlags Flags;
2383 Flags.setExact(exact);
2384 Flags.setNoSignedWrap(nsw);
2385 Flags.setNoUnsignedWrap(nuw);
2924 SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, 2386 SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
2925 nuw, nsw, exact); 2387 &Flags);
2926 setValue(&I, Res); 2388 setValue(&I, Res);
2927 } 2389 }
2928 2390
2929 void SelectionDAGBuilder::visitSDiv(const User &I) { 2391 void SelectionDAGBuilder::visitSDiv(const User &I) {
2930 SDValue Op1 = getValue(I.getOperand(0)); 2392 SDValue Op1 = getValue(I.getOperand(0));
2931 SDValue Op2 = getValue(I.getOperand(1)); 2393 SDValue Op2 = getValue(I.getOperand(1));
2932 2394
2933 // Turn exact SDivs into multiplications. 2395 SDNodeFlags Flags;
2934 // FIXME: This should be in DAGCombiner, but it doesn't have access to the 2396 Flags.setExact(isa<PossiblyExactOperator>(&I) &&
2935 // exact bit. 2397 cast<PossiblyExactOperator>(&I)->isExact());
2936 if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() && 2398 setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
2937 !isa<ConstantSDNode>(Op1) && 2399 Op2, &Flags));
2938 isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue())
2939 setValue(&I, DAG.getTargetLoweringInfo()
2940 .BuildExactSDIV(Op1, Op2, getCurSDLoc(), DAG));
2941 else
2942 setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(),
2943 Op1, Op2));
2944 } 2400 }
2945 2401
2946 void SelectionDAGBuilder::visitICmp(const User &I) { 2402 void SelectionDAGBuilder::visitICmp(const User &I) {
2947 ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; 2403 ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
2948 if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) 2404 if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
2951 predicate = ICmpInst::Predicate(IC->getPredicate()); 2407 predicate = ICmpInst::Predicate(IC->getPredicate());
2952 SDValue Op1 = getValue(I.getOperand(0)); 2408 SDValue Op1 = getValue(I.getOperand(0));
2953 SDValue Op2 = getValue(I.getOperand(1)); 2409 SDValue Op2 = getValue(I.getOperand(1));
2954 ISD::CondCode Opcode = getICmpCondCode(predicate); 2410 ISD::CondCode Opcode = getICmpCondCode(predicate);
2955 2411
2956 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 2412 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2413 I.getType());
2957 setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); 2414 setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
2958 } 2415 }
2959 2416
2960 void SelectionDAGBuilder::visitFCmp(const User &I) { 2417 void SelectionDAGBuilder::visitFCmp(const User &I) {
2961 FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; 2418 FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
2964 else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) 2421 else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
2965 predicate = FCmpInst::Predicate(FC->getPredicate()); 2422 predicate = FCmpInst::Predicate(FC->getPredicate());
2966 SDValue Op1 = getValue(I.getOperand(0)); 2423 SDValue Op1 = getValue(I.getOperand(0));
2967 SDValue Op2 = getValue(I.getOperand(1)); 2424 SDValue Op2 = getValue(I.getOperand(1));
2968 ISD::CondCode Condition = getFCmpCondCode(predicate); 2425 ISD::CondCode Condition = getFCmpCondCode(predicate);
2426
2427 // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
2428 // FIXME: We should propagate the fast-math-flags to the DAG node itself for
2429 // further optimization, but currently FMF is only applicable to binary nodes.
2969 if (TM.Options.NoNaNsFPMath) 2430 if (TM.Options.NoNaNsFPMath)
2970 Condition = getFCmpCodeWithoutNaN(Condition); 2431 Condition = getFCmpCodeWithoutNaN(Condition);
2971 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 2432 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2433 I.getType());
2972 setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); 2434 setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
2973 } 2435 }
2974 2436
2975 void SelectionDAGBuilder::visitSelect(const User &I) { 2437 void SelectionDAGBuilder::visitSelect(const User &I) {
2976 SmallVector<EVT, 4> ValueVTs; 2438 SmallVector<EVT, 4> ValueVTs;
2977 ComputeValueVTs(DAG.getTargetLoweringInfo(), I.getType(), ValueVTs); 2439 ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
2440 ValueVTs);
2978 unsigned NumValues = ValueVTs.size(); 2441 unsigned NumValues = ValueVTs.size();
2979 if (NumValues == 0) return; 2442 if (NumValues == 0) return;
2980 2443
2981 SmallVector<SDValue, 4> Values(NumValues); 2444 SmallVector<SDValue, 4> Values(NumValues);
2982 SDValue Cond = getValue(I.getOperand(0)); 2445 SDValue Cond = getValue(I.getOperand(0));
2983 SDValue TrueVal = getValue(I.getOperand(1)); 2446 SDValue LHSVal = getValue(I.getOperand(1));
2984 SDValue FalseVal = getValue(I.getOperand(2)); 2447 SDValue RHSVal = getValue(I.getOperand(2));
2448 auto BaseOps = {Cond};
2985 ISD::NodeType OpCode = Cond.getValueType().isVector() ? 2449 ISD::NodeType OpCode = Cond.getValueType().isVector() ?
2986 ISD::VSELECT : ISD::SELECT; 2450 ISD::VSELECT : ISD::SELECT;
2987 2451
2988 for (unsigned i = 0; i != NumValues; ++i) 2452 // Min/max matching is only viable if all output VTs are the same.
2453 if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
2454 EVT VT = ValueVTs[0];
2455 LLVMContext &Ctx = *DAG.getContext();
2456 auto &TLI = DAG.getTargetLoweringInfo();
2457 while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector)
2458 VT = TLI.getTypeToTransformTo(Ctx, VT);
2459
2460 Value *LHS, *RHS;
2461 auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
2462 ISD::NodeType Opc = ISD::DELETED_NODE;
2463 switch (SPR.Flavor) {
2464 case SPF_UMAX: Opc = ISD::UMAX; break;
2465 case SPF_UMIN: Opc = ISD::UMIN; break;
2466 case SPF_SMAX: Opc = ISD::SMAX; break;
2467 case SPF_SMIN: Opc = ISD::SMIN; break;
2468 case SPF_FMINNUM:
2469 switch (SPR.NaNBehavior) {
2470 case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
2471 case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break;
2472 case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
2473 case SPNB_RETURNS_ANY:
2474 Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ? ISD::FMINNUM
2475 : ISD::FMINNAN;
2476 break;
2477 }
2478 break;
2479 case SPF_FMAXNUM:
2480 switch (SPR.NaNBehavior) {
2481 case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
2482 case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break;
2483 case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
2484 case SPNB_RETURNS_ANY:
2485 Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ? ISD::FMAXNUM
2486 : ISD::FMAXNAN;
2487 break;
2488 }
2489 break;
2490 default: break;
2491 }
2492
2493 if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) &&
2494 // If the underlying comparison instruction is used by any other instruction,
2495 // the consumed instructions won't be destroyed, so it is not profitable
2496 // to convert to a min/max.
2497 cast<SelectInst>(&I)->getCondition()->hasOneUse()) {
2498 OpCode = Opc;
2499 LHSVal = getValue(LHS);
2500 RHSVal = getValue(RHS);
2501 BaseOps = {};
2502 }
2503 }
2504
2505 for (unsigned i = 0; i != NumValues; ++i) {
2506 SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
2507 Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
2508 Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
2989 Values[i] = DAG.getNode(OpCode, getCurSDLoc(), 2509 Values[i] = DAG.getNode(OpCode, getCurSDLoc(),
2990 TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), 2510 LHSVal.getNode()->getValueType(LHSVal.getResNo()+i),
2991 Cond, 2511 Ops);
2992 SDValue(TrueVal.getNode(), 2512 }
2993 TrueVal.getResNo() + i),
2994 SDValue(FalseVal.getNode(),
2995 FalseVal.getResNo() + i));
2996 2513
2997 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 2514 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
2998 DAG.getVTList(ValueVTs), Values)); 2515 DAG.getVTList(ValueVTs), Values));
2999 } 2516 }
3000 2517
3001 void SelectionDAGBuilder::visitTrunc(const User &I) { 2518 void SelectionDAGBuilder::visitTrunc(const User &I) {
3002 // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). 2519 // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
3003 SDValue N = getValue(I.getOperand(0)); 2520 SDValue N = getValue(I.getOperand(0));
3004 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 2521 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2522 I.getType());
3005 setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); 2523 setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
3006 } 2524 }
3007 2525
3008 void SelectionDAGBuilder::visitZExt(const User &I) { 2526 void SelectionDAGBuilder::visitZExt(const User &I) {
3009 // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). 2527 // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3010 // ZExt also can't be a cast to bool for same reason. So, nothing much to do 2528 // ZExt also can't be a cast to bool for same reason. So, nothing much to do
3011 SDValue N = getValue(I.getOperand(0)); 2529 SDValue N = getValue(I.getOperand(0));
3012 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 2530 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2531 I.getType());
3013 setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); 2532 setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
3014 } 2533 }
3015 2534
3016 void SelectionDAGBuilder::visitSExt(const User &I) { 2535 void SelectionDAGBuilder::visitSExt(const User &I) {
3017 // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). 2536 // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3018 // SExt also can't be a cast to bool for same reason. So, nothing much to do 2537 // SExt also can't be a cast to bool for same reason. So, nothing much to do
3019 SDValue N = getValue(I.getOperand(0)); 2538 SDValue N = getValue(I.getOperand(0));
3020 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 2539 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2540 I.getType());
3021 setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); 2541 setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
3022 } 2542 }
3023 2543
3024 void SelectionDAGBuilder::visitFPTrunc(const User &I) { 2544 void SelectionDAGBuilder::visitFPTrunc(const User &I) {
3025 // FPTrunc is never a no-op cast, no need to check 2545 // FPTrunc is never a no-op cast, no need to check
3026 SDValue N = getValue(I.getOperand(0)); 2546 SDValue N = getValue(I.getOperand(0));
2547 SDLoc dl = getCurSDLoc();
3027 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2548 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3028 EVT DestVT = TLI.getValueType(I.getType()); 2549 EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3029 setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), DestVT, N, 2550 setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
3030 DAG.getTargetConstant(0, TLI.getPointerTy()))); 2551 DAG.getTargetConstant(
2552 0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
3031 } 2553 }
3032 2554
3033 void SelectionDAGBuilder::visitFPExt(const User &I) { 2555 void SelectionDAGBuilder::visitFPExt(const User &I) {
3034 // FPExt is never a no-op cast, no need to check 2556 // FPExt is never a no-op cast, no need to check
3035 SDValue N = getValue(I.getOperand(0)); 2557 SDValue N = getValue(I.getOperand(0));
3036 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 2558 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2559 I.getType());
3037 setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); 2560 setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
3038 } 2561 }
3039 2562
3040 void SelectionDAGBuilder::visitFPToUI(const User &I) { 2563 void SelectionDAGBuilder::visitFPToUI(const User &I) {
3041 // FPToUI is never a no-op cast, no need to check 2564 // FPToUI is never a no-op cast, no need to check
3042 SDValue N = getValue(I.getOperand(0)); 2565 SDValue N = getValue(I.getOperand(0));
3043 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 2566 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2567 I.getType());
3044 setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); 2568 setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
3045 } 2569 }
3046 2570
3047 void SelectionDAGBuilder::visitFPToSI(const User &I) { 2571 void SelectionDAGBuilder::visitFPToSI(const User &I) {
3048 // FPToSI is never a no-op cast, no need to check 2572 // FPToSI is never a no-op cast, no need to check
3049 SDValue N = getValue(I.getOperand(0)); 2573 SDValue N = getValue(I.getOperand(0));
3050 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 2574 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2575 I.getType());
3051 setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); 2576 setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
3052 } 2577 }
3053 2578
3054 void SelectionDAGBuilder::visitUIToFP(const User &I) { 2579 void SelectionDAGBuilder::visitUIToFP(const User &I) {
3055 // UIToFP is never a no-op cast, no need to check 2580 // UIToFP is never a no-op cast, no need to check
3056 SDValue N = getValue(I.getOperand(0)); 2581 SDValue N = getValue(I.getOperand(0));
3057 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 2582 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2583 I.getType());
3058 setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); 2584 setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
3059 } 2585 }
3060 2586
3061 void SelectionDAGBuilder::visitSIToFP(const User &I) { 2587 void SelectionDAGBuilder::visitSIToFP(const User &I) {
3062 // SIToFP is never a no-op cast, no need to check 2588 // SIToFP is never a no-op cast, no need to check
3063 SDValue N = getValue(I.getOperand(0)); 2589 SDValue N = getValue(I.getOperand(0));
3064 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 2590 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2591 I.getType());
3065 setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); 2592 setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
3066 } 2593 }
3067 2594
3068 void SelectionDAGBuilder::visitPtrToInt(const User &I) { 2595 void SelectionDAGBuilder::visitPtrToInt(const User &I) {
3069 // What to do depends on the size of the integer and the size of the pointer. 2596 // What to do depends on the size of the integer and the size of the pointer.
3070 // We can either truncate, zero extend, or no-op, accordingly. 2597 // We can either truncate, zero extend, or no-op, accordingly.
3071 SDValue N = getValue(I.getOperand(0)); 2598 SDValue N = getValue(I.getOperand(0));
3072 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 2599 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2600 I.getType());
3073 setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); 2601 setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
3074 } 2602 }
3075 2603
3076 void SelectionDAGBuilder::visitIntToPtr(const User &I) { 2604 void SelectionDAGBuilder::visitIntToPtr(const User &I) {
3077 // What to do depends on the size of the integer and the size of the pointer. 2605 // What to do depends on the size of the integer and the size of the pointer.
3078 // We can either truncate, zero extend, or no-op, accordingly. 2606 // We can either truncate, zero extend, or no-op, accordingly.
3079 SDValue N = getValue(I.getOperand(0)); 2607 SDValue N = getValue(I.getOperand(0));
3080 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 2608 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2609 I.getType());
3081 setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); 2610 setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
3082 } 2611 }
3083 2612
3084 void SelectionDAGBuilder::visitBitCast(const User &I) { 2613 void SelectionDAGBuilder::visitBitCast(const User &I) {
3085 SDValue N = getValue(I.getOperand(0)); 2614 SDValue N = getValue(I.getOperand(0));
3086 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 2615 SDLoc dl = getCurSDLoc();
2616 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2617 I.getType());
3087 2618
3088 // BitCast assures us that source and destination are the same size so this is 2619 // BitCast assures us that source and destination are the same size so this is
3089 // either a BITCAST or a no-op. 2620 // either a BITCAST or a no-op.
3090 if (DestVT != N.getValueType()) 2621 if (DestVT != N.getValueType())
3091 setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(), 2622 setValue(&I, DAG.getNode(ISD::BITCAST, dl,
3092 DestVT, N)); // convert types. 2623 DestVT, N)); // convert types.
3093 // Check if the original LLVM IR Operand was a ConstantInt, because getValue() 2624 // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
3094 // might fold any kind of constant expression to an integer constant and that 2625 // might fold any kind of constant expression to an integer constant and that
3095 // is not what we are looking for. Only regcognize a bitcast of a genuine 2626 // is not what we are looking for. Only regcognize a bitcast of a genuine
3096 // constant integer as an opaque constant. 2627 // constant integer as an opaque constant.
3097 else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0))) 2628 else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
3098 setValue(&I, DAG.getConstant(C->getValue(), DestVT, /*isTarget=*/false, 2629 setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
3099 /*isOpaque*/true)); 2630 /*isOpaque*/true));
3100 else 2631 else
3101 setValue(&I, N); // noop cast. 2632 setValue(&I, N); // noop cast.
3102 } 2633 }
3103 2634
3104 void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { 2635 void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
3105 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2636 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3106 const Value *SV = I.getOperand(0); 2637 const Value *SV = I.getOperand(0);
3107 SDValue N = getValue(SV); 2638 SDValue N = getValue(SV);
3108 EVT DestVT = TLI.getValueType(I.getType()); 2639 EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3109 2640
3110 unsigned SrcAS = SV->getType()->getPointerAddressSpace(); 2641 unsigned SrcAS = SV->getType()->getPointerAddressSpace();
3111 unsigned DestAS = I.getType()->getPointerAddressSpace(); 2642 unsigned DestAS = I.getType()->getPointerAddressSpace();
3112 2643
3113 if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) 2644 if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
3118 2649
3119 void SelectionDAGBuilder::visitInsertElement(const User &I) { 2650 void SelectionDAGBuilder::visitInsertElement(const User &I) {
3120 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2651 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3121 SDValue InVec = getValue(I.getOperand(0)); 2652 SDValue InVec = getValue(I.getOperand(0));
3122 SDValue InVal = getValue(I.getOperand(1)); 2653 SDValue InVal = getValue(I.getOperand(1));
3123 SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), 2654 SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
3124 getCurSDLoc(), TLI.getVectorIdxTy()); 2655 TLI.getVectorIdxTy(DAG.getDataLayout()));
3125 setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), 2656 setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
3126 TLI.getValueType(I.getType()), InVec, InVal, InIdx)); 2657 TLI.getValueType(DAG.getDataLayout(), I.getType()),
2658 InVec, InVal, InIdx));
3127 } 2659 }
3128 2660
3129 void SelectionDAGBuilder::visitExtractElement(const User &I) { 2661 void SelectionDAGBuilder::visitExtractElement(const User &I) {
3130 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2662 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3131 SDValue InVec = getValue(I.getOperand(0)); 2663 SDValue InVec = getValue(I.getOperand(0));
3132 SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), 2664 SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
3133 getCurSDLoc(), TLI.getVectorIdxTy()); 2665 TLI.getVectorIdxTy(DAG.getDataLayout()));
3134 setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), 2666 setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
3135 TLI.getValueType(I.getType()), InVec, InIdx)); 2667 TLI.getValueType(DAG.getDataLayout(), I.getType()),
2668 InVec, InIdx));
3136 } 2669 }
3137 2670
3138 // Utility for visitShuffleVector - Return true if every element in Mask, 2671 // Utility for visitShuffleVector - Return true if every element in Mask,
3139 // beginning from position Pos and ending in Pos+Size, falls within the 2672 // beginning from position Pos and ending in Pos+Size, falls within the
3140 // specified sequential range [L, L+Pos). or is undef. 2673 // specified sequential range [L, L+Pos). or is undef.
3153 SmallVector<int, 8> Mask; 2686 SmallVector<int, 8> Mask;
3154 ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); 2687 ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
3155 unsigned MaskNumElts = Mask.size(); 2688 unsigned MaskNumElts = Mask.size();
3156 2689
3157 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2690 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3158 EVT VT = TLI.getValueType(I.getType()); 2691 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3159 EVT SrcVT = Src1.getValueType(); 2692 EVT SrcVT = Src1.getValueType();
3160 unsigned SrcNumElts = SrcVT.getVectorNumElements(); 2693 unsigned SrcNumElts = SrcVT.getVectorNumElements();
3161 2694
3162 if (SrcNumElts == MaskNumElts) { 2695 if (SrcNumElts == MaskNumElts) {
3163 setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, 2696 setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2,
3271 // Extract appropriate subvector and generate a vector shuffle 2804 // Extract appropriate subvector and generate a vector shuffle
3272 for (unsigned Input = 0; Input < 2; ++Input) { 2805 for (unsigned Input = 0; Input < 2; ++Input) {
3273 SDValue &Src = Input == 0 ? Src1 : Src2; 2806 SDValue &Src = Input == 0 ? Src1 : Src2;
3274 if (RangeUse[Input] == 0) 2807 if (RangeUse[Input] == 0)
3275 Src = DAG.getUNDEF(VT); 2808 Src = DAG.getUNDEF(VT);
3276 else 2809 else {
2810 SDLoc dl = getCurSDLoc();
3277 Src = DAG.getNode( 2811 Src = DAG.getNode(
3278 ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, Src, 2812 ISD::EXTRACT_SUBVECTOR, dl, VT, Src,
3279 DAG.getConstant(StartIdx[Input], TLI.getVectorIdxTy())); 2813 DAG.getConstant(StartIdx[Input], dl,
2814 TLI.getVectorIdxTy(DAG.getDataLayout())));
2815 }
3280 } 2816 }
3281 2817
3282 // Calculate new mask. 2818 // Calculate new mask.
3283 SmallVector<int, 8> MappedOps; 2819 SmallVector<int, 8> MappedOps;
3284 for (unsigned i = 0; i != MaskNumElts; ++i) { 2820 for (unsigned i = 0; i != MaskNumElts; ++i) {
3300 2836
3301 // We can't use either concat vectors or extract subvectors so fall back to 2837 // We can't use either concat vectors or extract subvectors so fall back to
3302 // replacing the shuffle with extract and build vector. 2838 // replacing the shuffle with extract and build vector.
3303 // to insert and build vector. 2839 // to insert and build vector.
3304 EVT EltVT = VT.getVectorElementType(); 2840 EVT EltVT = VT.getVectorElementType();
3305 EVT IdxVT = TLI.getVectorIdxTy(); 2841 EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
2842 SDLoc dl = getCurSDLoc();
3306 SmallVector<SDValue,8> Ops; 2843 SmallVector<SDValue,8> Ops;
3307 for (unsigned i = 0; i != MaskNumElts; ++i) { 2844 for (unsigned i = 0; i != MaskNumElts; ++i) {
3308 int Idx = Mask[i]; 2845 int Idx = Mask[i];
3309 SDValue Res; 2846 SDValue Res;
3310 2847
3312 Res = DAG.getUNDEF(EltVT); 2849 Res = DAG.getUNDEF(EltVT);
3313 } else { 2850 } else {
3314 SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; 2851 SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
3315 if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; 2852 if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
3316 2853
3317 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), 2854 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
3318 EltVT, Src, DAG.getConstant(Idx, IdxVT)); 2855 EltVT, Src, DAG.getConstant(Idx, dl, IdxVT));
3319 } 2856 }
3320 2857
3321 Ops.push_back(Res); 2858 Ops.push_back(Res);
3322 } 2859 }
3323 2860
3324 setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops)); 2861 setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops));
3325 } 2862 }
3326 2863
3327 void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { 2864 void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
3328 const Value *Op0 = I.getOperand(0); 2865 const Value *Op0 = I.getOperand(0);
3329 const Value *Op1 = I.getOperand(1); 2866 const Value *Op1 = I.getOperand(1);
3334 2871
3335 unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); 2872 unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
3336 2873
3337 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2874 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3338 SmallVector<EVT, 4> AggValueVTs; 2875 SmallVector<EVT, 4> AggValueVTs;
3339 ComputeValueVTs(TLI, AggTy, AggValueVTs); 2876 ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
3340 SmallVector<EVT, 4> ValValueVTs; 2877 SmallVector<EVT, 4> ValValueVTs;
3341 ComputeValueVTs(TLI, ValTy, ValValueVTs); 2878 ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3342 2879
3343 unsigned NumAggValues = AggValueVTs.size(); 2880 unsigned NumAggValues = AggValueVTs.size();
3344 unsigned NumValValues = ValValueVTs.size(); 2881 unsigned NumValValues = ValValueVTs.size();
3345 SmallVector<SDValue, 4> Values(NumAggValues); 2882 SmallVector<SDValue, 4> Values(NumAggValues);
3346 2883
3380 2917
3381 unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); 2918 unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
3382 2919
3383 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2920 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3384 SmallVector<EVT, 4> ValValueVTs; 2921 SmallVector<EVT, 4> ValValueVTs;
3385 ComputeValueVTs(TLI, ValTy, ValValueVTs); 2922 ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3386 2923
3387 unsigned NumValValues = ValValueVTs.size(); 2924 unsigned NumValValues = ValValueVTs.size();
3388 2925
3389 // Ignore a extractvalue that produces an empty object 2926 // Ignore a extractvalue that produces an empty object
3390 if (!NumValValues) { 2927 if (!NumValValues) {
3411 // Note that the pointer operand may be a vector of pointers. Take the scalar 2948 // Note that the pointer operand may be a vector of pointers. Take the scalar
3412 // element which holds a pointer. 2949 // element which holds a pointer.
3413 Type *Ty = Op0->getType()->getScalarType(); 2950 Type *Ty = Op0->getType()->getScalarType();
3414 unsigned AS = Ty->getPointerAddressSpace(); 2951 unsigned AS = Ty->getPointerAddressSpace();
3415 SDValue N = getValue(Op0); 2952 SDValue N = getValue(Op0);
3416 2953 SDLoc dl = getCurSDLoc();
2954
2955 // Normalize Vector GEP - all scalar operands should be converted to the
2956 // splat vector.
2957 unsigned VectorWidth = I.getType()->isVectorTy() ?
2958 cast<VectorType>(I.getType())->getVectorNumElements() : 0;
2959
2960 if (VectorWidth && !N.getValueType().isVector()) {
2961 MVT VT = MVT::getVectorVT(N.getValueType().getSimpleVT(), VectorWidth);
2962 SmallVector<SDValue, 16> Ops(VectorWidth, N);
2963 N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
2964 }
3417 for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); 2965 for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
3418 OI != E; ++OI) { 2966 OI != E; ++OI) {
3419 const Value *Idx = *OI; 2967 const Value *Idx = *OI;
3420 if (StructType *StTy = dyn_cast<StructType>(Ty)) { 2968 if (StructType *StTy = dyn_cast<StructType>(Ty)) {
3421 unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); 2969 unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
3422 if (Field) { 2970 if (Field) {
3423 // N = N + Offset 2971 // N = N + Offset
3424 uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); 2972 uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
3425 N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, 2973 N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
3426 DAG.getConstant(Offset, N.getValueType())); 2974 DAG.getConstant(Offset, dl, N.getValueType()));
3427 } 2975 }
3428 2976
3429 Ty = StTy->getElementType(Field); 2977 Ty = StTy->getElementType(Field);
3430 } else { 2978 } else {
3431 Ty = cast<SequentialType>(Ty)->getElementType(); 2979 Ty = cast<SequentialType>(Ty)->getElementType();
3432 2980 MVT PtrTy =
3433 // If this is a constant subscript, handle it quickly. 2981 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS);
3434 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2982 unsigned PtrSize = PtrTy.getSizeInBits();
3435 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { 2983 APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty));
3436 if (CI->isZero()) continue; 2984
3437 uint64_t Offs = 2985 // If this is a scalar constant or a splat vector of constants,
3438 DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); 2986 // handle it quickly.
3439 SDValue OffsVal; 2987 const auto *CI = dyn_cast<ConstantInt>(Idx);
3440 EVT PTy = TLI.getPointerTy(AS); 2988 if (!CI && isa<ConstantDataVector>(Idx) &&
3441 unsigned PtrBits = PTy.getSizeInBits(); 2989 cast<ConstantDataVector>(Idx)->getSplatValue())
3442 if (PtrBits < 64) 2990 CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
3443 OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy, 2991
3444 DAG.getConstant(Offs, MVT::i64)); 2992 if (CI) {
3445 else 2993 if (CI->isZero())
3446 OffsVal = DAG.getConstant(Offs, PTy); 2994 continue;
3447 2995 APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
3448 N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, 2996 SDValue OffsVal = VectorWidth ?
3449 OffsVal); 2997 DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) :
2998 DAG.getConstant(Offs, dl, PtrTy);
2999 N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal);
3450 continue; 3000 continue;
3451 } 3001 }
3452 3002
3453 // N = N + Idx * ElementSize; 3003 // N = N + Idx * ElementSize;
3454 APInt ElementSize =
3455 APInt(TLI.getPointerSizeInBits(AS), DL->getTypeAllocSize(Ty));
3456 SDValue IdxN = getValue(Idx); 3004 SDValue IdxN = getValue(Idx);
3457 3005
3006 if (!IdxN.getValueType().isVector() && VectorWidth) {
3007 MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth);
3008 SmallVector<SDValue, 16> Ops(VectorWidth, IdxN);
3009 IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
3010 }
3458 // If the index is smaller or larger than intptr_t, truncate or extend 3011 // If the index is smaller or larger than intptr_t, truncate or extend
3459 // it. 3012 // it.
3460 IdxN = DAG.getSExtOrTrunc(IdxN, getCurSDLoc(), N.getValueType()); 3013 IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
3461 3014
3462 // If this is a multiply by a power of two, turn it into a shl 3015 // If this is a multiply by a power of two, turn it into a shl
3463 // immediately. This is a very common case. 3016 // immediately. This is a very common case.
3464 if (ElementSize != 1) { 3017 if (ElementSize != 1) {
3465 if (ElementSize.isPowerOf2()) { 3018 if (ElementSize.isPowerOf2()) {
3466 unsigned Amt = ElementSize.logBase2(); 3019 unsigned Amt = ElementSize.logBase2();
3467 IdxN = DAG.getNode(ISD::SHL, getCurSDLoc(), 3020 IdxN = DAG.getNode(ISD::SHL, dl,
3468 N.getValueType(), IdxN, 3021 N.getValueType(), IdxN,
3469 DAG.getConstant(Amt, IdxN.getValueType())); 3022 DAG.getConstant(Amt, dl, IdxN.getValueType()));
3470 } else { 3023 } else {
3471 SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType()); 3024 SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType());
3472 IdxN = DAG.getNode(ISD::MUL, getCurSDLoc(), 3025 IdxN = DAG.getNode(ISD::MUL, dl,
3473 N.getValueType(), IdxN, Scale); 3026 N.getValueType(), IdxN, Scale);
3474 } 3027 }
3475 } 3028 }
3476 3029
3477 N = DAG.getNode(ISD::ADD, getCurSDLoc(), 3030 N = DAG.getNode(ISD::ADD, dl,
3478 N.getValueType(), N, IdxN); 3031 N.getValueType(), N, IdxN);
3479 } 3032 }
3480 } 3033 }
3481 3034
3482 setValue(&I, N); 3035 setValue(&I, N);
3486 // If this is a fixed sized alloca in the entry block of the function, 3039 // If this is a fixed sized alloca in the entry block of the function,
3487 // allocate it statically on the stack. 3040 // allocate it statically on the stack.
3488 if (FuncInfo.StaticAllocaMap.count(&I)) 3041 if (FuncInfo.StaticAllocaMap.count(&I))
3489 return; // getValue will auto-populate this. 3042 return; // getValue will auto-populate this.
3490 3043
3044 SDLoc dl = getCurSDLoc();
3491 Type *Ty = I.getAllocatedType(); 3045 Type *Ty = I.getAllocatedType();
3492 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3046 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3493 uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); 3047 auto &DL = DAG.getDataLayout();
3048 uint64_t TySize = DL.getTypeAllocSize(Ty);
3494 unsigned Align = 3049 unsigned Align =
3495 std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), 3050 std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment());
3496 I.getAlignment());
3497 3051
3498 SDValue AllocSize = getValue(I.getArraySize()); 3052 SDValue AllocSize = getValue(I.getArraySize());
3499 3053
3500 EVT IntPtr = TLI.getPointerTy(); 3054 EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
3501 if (AllocSize.getValueType() != IntPtr) 3055 if (AllocSize.getValueType() != IntPtr)
3502 AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr); 3056 AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
3503 3057
3504 AllocSize = DAG.getNode(ISD::MUL, getCurSDLoc(), IntPtr, 3058 AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
3505 AllocSize, 3059 AllocSize,
3506 DAG.getConstant(TySize, IntPtr)); 3060 DAG.getConstant(TySize, dl, IntPtr));
3507 3061
3508 // Handle alignment. If the requested alignment is less than or equal to 3062 // Handle alignment. If the requested alignment is less than or equal to
3509 // the stack alignment, ignore it. If the size is greater than or equal to 3063 // the stack alignment, ignore it. If the size is greater than or equal to
3510 // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. 3064 // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
3511 unsigned StackAlign = 3065 unsigned StackAlign =
3513 if (Align <= StackAlign) 3067 if (Align <= StackAlign)
3514 Align = 0; 3068 Align = 0;
3515 3069
3516 // Round the size of the allocation up to the stack alignment size 3070 // Round the size of the allocation up to the stack alignment size
3517 // by add SA-1 to the size. 3071 // by add SA-1 to the size.
3518 AllocSize = DAG.getNode(ISD::ADD, getCurSDLoc(), 3072 AllocSize = DAG.getNode(ISD::ADD, dl,
3519 AllocSize.getValueType(), AllocSize, 3073 AllocSize.getValueType(), AllocSize,
3520 DAG.getIntPtrConstant(StackAlign-1)); 3074 DAG.getIntPtrConstant(StackAlign - 1, dl));
3521 3075
3522 // Mask out the low bits for alignment purposes. 3076 // Mask out the low bits for alignment purposes.
3523 AllocSize = DAG.getNode(ISD::AND, getCurSDLoc(), 3077 AllocSize = DAG.getNode(ISD::AND, dl,
3524 AllocSize.getValueType(), AllocSize, 3078 AllocSize.getValueType(), AllocSize,
3525 DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); 3079 DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1),
3526 3080 dl));
3527 SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; 3081
3082 SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) };
3528 SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); 3083 SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
3529 SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), VTs, Ops); 3084 SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
3530 setValue(&I, DSA); 3085 setValue(&I, DSA);
3531 DAG.setRoot(DSA.getValue(1)); 3086 DAG.setRoot(DSA.getValue(1));
3532 3087
3533 assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects()); 3088 assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects());
3534 } 3089 }
3542 3097
3543 Type *Ty = I.getType(); 3098 Type *Ty = I.getType();
3544 3099
3545 bool isVolatile = I.isVolatile(); 3100 bool isVolatile = I.isVolatile();
3546 bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; 3101 bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
3547 bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; 3102
3103 // The IR notion of invariant_load only guarantees that all *non-faulting*
3104 // invariant loads result in the same value. The MI notion of invariant load
3105 // guarantees that the load can be legally moved to any location within its
3106 // containing function. The MI notion of invariant_load is stronger than the
3107 // IR notion of invariant_load -- an MI invariant_load is an IR invariant_load
3108 // with a guarantee that the location being loaded from is dereferenceable
3109 // throughout the function's lifetime.
3110
3111 bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr &&
3112 isDereferenceablePointer(SV, DAG.getDataLayout());
3548 unsigned Alignment = I.getAlignment(); 3113 unsigned Alignment = I.getAlignment();
3549 3114
3550 AAMDNodes AAInfo; 3115 AAMDNodes AAInfo;
3551 I.getAAMetadata(AAInfo); 3116 I.getAAMetadata(AAInfo);
3552 const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); 3117 const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3553 3118
3554 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3119 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3555 SmallVector<EVT, 4> ValueVTs; 3120 SmallVector<EVT, 4> ValueVTs;
3556 SmallVector<uint64_t, 4> Offsets; 3121 SmallVector<uint64_t, 4> Offsets;
3557 ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); 3122 ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets);
3558 unsigned NumValues = ValueVTs.size(); 3123 unsigned NumValues = ValueVTs.size();
3559 if (NumValues == 0) 3124 if (NumValues == 0)
3560 return; 3125 return;
3561 3126
3562 SDValue Root; 3127 SDValue Root;
3563 bool ConstantMemory = false; 3128 bool ConstantMemory = false;
3564 if (isVolatile || NumValues > MaxParallelChains) 3129 if (isVolatile || NumValues > MaxParallelChains)
3565 // Serialize volatile loads with other side effects. 3130 // Serialize volatile loads with other side effects.
3566 Root = getRoot(); 3131 Root = getRoot();
3567 else if (AA->pointsToConstantMemory( 3132 else if (AA->pointsToConstantMemory(MemoryLocation(
3568 AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), AAInfo))) { 3133 SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
3569 // Do not serialize (non-volatile) loads of constant memory with anything. 3134 // Do not serialize (non-volatile) loads of constant memory with anything.
3570 Root = DAG.getEntryNode(); 3135 Root = DAG.getEntryNode();
3571 ConstantMemory = true; 3136 ConstantMemory = true;
3572 } else { 3137 } else {
3573 // Do not serialize non-volatile loads against each other. 3138 // Do not serialize non-volatile loads against each other.
3574 Root = DAG.getRoot(); 3139 Root = DAG.getRoot();
3575 } 3140 }
3576 3141
3142 SDLoc dl = getCurSDLoc();
3143
3577 if (isVolatile) 3144 if (isVolatile)
3578 Root = TLI.prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); 3145 Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
3579 3146
3580 SmallVector<SDValue, 4> Values(NumValues); 3147 SmallVector<SDValue, 4> Values(NumValues);
3581 SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), 3148 SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3582 NumValues));
3583 EVT PtrVT = Ptr.getValueType(); 3149 EVT PtrVT = Ptr.getValueType();
3584 unsigned ChainI = 0; 3150 unsigned ChainI = 0;
3585 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { 3151 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3586 // Serializing loads here may result in excessive register pressure, and 3152 // Serializing loads here may result in excessive register pressure, and
3587 // TokenFactor places arbitrary choke points on the scheduler. SD scheduling 3153 // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
3589 // they are side-effect free or do not alias. The optimizer should really 3155 // they are side-effect free or do not alias. The optimizer should really
3590 // avoid this case by converting large object/array copies to llvm.memcpy 3156 // avoid this case by converting large object/array copies to llvm.memcpy
3591 // (MaxParallelChains should always remain as failsafe). 3157 // (MaxParallelChains should always remain as failsafe).
3592 if (ChainI == MaxParallelChains) { 3158 if (ChainI == MaxParallelChains) {
3593 assert(PendingLoads.empty() && "PendingLoads must be serialized first"); 3159 assert(PendingLoads.empty() && "PendingLoads must be serialized first");
3594 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 3160 SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3595 makeArrayRef(Chains.data(), ChainI)); 3161 makeArrayRef(Chains.data(), ChainI));
3596 Root = Chain; 3162 Root = Chain;
3597 ChainI = 0; 3163 ChainI = 0;
3598 } 3164 }
3599 SDValue A = DAG.getNode(ISD::ADD, getCurSDLoc(), 3165 SDValue A = DAG.getNode(ISD::ADD, dl,
3600 PtrVT, Ptr, 3166 PtrVT, Ptr,
3601 DAG.getConstant(Offsets[i], PtrVT)); 3167 DAG.getConstant(Offsets[i], dl, PtrVT));
3602 SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root, 3168 SDValue L = DAG.getLoad(ValueVTs[i], dl, Root,
3603 A, MachinePointerInfo(SV, Offsets[i]), isVolatile, 3169 A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
3604 isNonTemporal, isInvariant, Alignment, AAInfo, 3170 isNonTemporal, isInvariant, Alignment, AAInfo,
3605 Ranges); 3171 Ranges);
3606 3172
3607 Values[i] = L; 3173 Values[i] = L;
3608 Chains[ChainI] = L.getValue(1); 3174 Chains[ChainI] = L.getValue(1);
3609 } 3175 }
3610 3176
3611 if (!ConstantMemory) { 3177 if (!ConstantMemory) {
3612 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 3178 SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3613 makeArrayRef(Chains.data(), ChainI)); 3179 makeArrayRef(Chains.data(), ChainI));
3614 if (isVolatile) 3180 if (isVolatile)
3615 DAG.setRoot(Chain); 3181 DAG.setRoot(Chain);
3616 else 3182 else
3617 PendingLoads.push_back(Chain); 3183 PendingLoads.push_back(Chain);
3618 } 3184 }
3619 3185
3620 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 3186 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
3621 DAG.getVTList(ValueVTs), Values)); 3187 DAG.getVTList(ValueVTs), Values));
3622 } 3188 }
3623 3189
3624 void SelectionDAGBuilder::visitStore(const StoreInst &I) { 3190 void SelectionDAGBuilder::visitStore(const StoreInst &I) {
3625 if (I.isAtomic()) 3191 if (I.isAtomic())
3628 const Value *SrcV = I.getOperand(0); 3194 const Value *SrcV = I.getOperand(0);
3629 const Value *PtrV = I.getOperand(1); 3195 const Value *PtrV = I.getOperand(1);
3630 3196
3631 SmallVector<EVT, 4> ValueVTs; 3197 SmallVector<EVT, 4> ValueVTs;
3632 SmallVector<uint64_t, 4> Offsets; 3198 SmallVector<uint64_t, 4> Offsets;
3633 ComputeValueVTs(DAG.getTargetLoweringInfo(), SrcV->getType(), 3199 ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
3634 ValueVTs, &Offsets); 3200 SrcV->getType(), ValueVTs, &Offsets);
3635 unsigned NumValues = ValueVTs.size(); 3201 unsigned NumValues = ValueVTs.size();
3636 if (NumValues == 0) 3202 if (NumValues == 0)
3637 return; 3203 return;
3638 3204
3639 // Get the lowered operands. Note that we do this after 3205 // Get the lowered operands. Note that we do this after
3641 // the operands won't have values in the map. 3207 // the operands won't have values in the map.
3642 SDValue Src = getValue(SrcV); 3208 SDValue Src = getValue(SrcV);
3643 SDValue Ptr = getValue(PtrV); 3209 SDValue Ptr = getValue(PtrV);
3644 3210
3645 SDValue Root = getRoot(); 3211 SDValue Root = getRoot();
3646 SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), 3212 SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3647 NumValues));
3648 EVT PtrVT = Ptr.getValueType(); 3213 EVT PtrVT = Ptr.getValueType();
3649 bool isVolatile = I.isVolatile(); 3214 bool isVolatile = I.isVolatile();
3650 bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; 3215 bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
3651 unsigned Alignment = I.getAlignment(); 3216 unsigned Alignment = I.getAlignment();
3217 SDLoc dl = getCurSDLoc();
3652 3218
3653 AAMDNodes AAInfo; 3219 AAMDNodes AAInfo;
3654 I.getAAMetadata(AAInfo); 3220 I.getAAMetadata(AAInfo);
3655 3221
3656 unsigned ChainI = 0; 3222 unsigned ChainI = 0;
3657 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { 3223 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3658 // See visitLoad comments. 3224 // See visitLoad comments.
3659 if (ChainI == MaxParallelChains) { 3225 if (ChainI == MaxParallelChains) {
3660 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 3226 SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3661 makeArrayRef(Chains.data(), ChainI)); 3227 makeArrayRef(Chains.data(), ChainI));
3662 Root = Chain; 3228 Root = Chain;
3663 ChainI = 0; 3229 ChainI = 0;
3664 } 3230 }
3665 SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, Ptr, 3231 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
3666 DAG.getConstant(Offsets[i], PtrVT)); 3232 DAG.getConstant(Offsets[i], dl, PtrVT));
3667 SDValue St = DAG.getStore(Root, getCurSDLoc(), 3233 SDValue St = DAG.getStore(Root, dl,
3668 SDValue(Src.getNode(), Src.getResNo() + i), 3234 SDValue(Src.getNode(), Src.getResNo() + i),
3669 Add, MachinePointerInfo(PtrV, Offsets[i]), 3235 Add, MachinePointerInfo(PtrV, Offsets[i]),
3670 isVolatile, isNonTemporal, Alignment, AAInfo); 3236 isVolatile, isNonTemporal, Alignment, AAInfo);
3671 Chains[ChainI] = St; 3237 Chains[ChainI] = St;
3672 } 3238 }
3673 3239
3674 SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 3240 SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3675 makeArrayRef(Chains.data(), ChainI)); 3241 makeArrayRef(Chains.data(), ChainI));
3676 DAG.setRoot(StoreNode); 3242 DAG.setRoot(StoreNode);
3677 } 3243 }
3678 3244
3679 void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { 3245 void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
3680 SDLoc sdl = getCurSDLoc(); 3246 SDLoc sdl = getCurSDLoc();
3681 3247
3682 // llvm.masked.store.*(Src0, Ptr, alignemt, Mask) 3248 // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
3683 Value *PtrOperand = I.getArgOperand(1); 3249 Value *PtrOperand = I.getArgOperand(1);
3684 SDValue Ptr = getValue(PtrOperand); 3250 SDValue Ptr = getValue(PtrOperand);
3685 SDValue Src0 = getValue(I.getArgOperand(0)); 3251 SDValue Src0 = getValue(I.getArgOperand(0));
3686 SDValue Mask = getValue(I.getArgOperand(3)); 3252 SDValue Mask = getValue(I.getArgOperand(3));
3687 EVT VT = Src0.getValueType(); 3253 EVT VT = Src0.getValueType();
3701 MMO, false); 3267 MMO, false);
3702 DAG.setRoot(StoreNode); 3268 DAG.setRoot(StoreNode);
3703 setValue(&I, StoreNode); 3269 setValue(&I, StoreNode);
3704 } 3270 }
3705 3271
3272 // Get a uniform base for the Gather/Scatter intrinsic.
3273 // The first argument of the Gather/Scatter intrinsic is a vector of pointers.
3274 // We try to represent it as a base pointer + vector of indices.
3275 // Usually, the vector of pointers comes from a 'getelementptr' instruction.
3276 // The first operand of the GEP may be a single pointer or a vector of pointers
3277 // Example:
3278 // %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
3279 // or
3280 // %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
3281 // %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
3282 //
3283 // When the first GEP operand is a single pointer - it is the uniform base we
3284 // are looking for. If first operand of the GEP is a splat vector - we
3285 // extract the spalt value and use it as a uniform base.
3286 // In all other cases the function returns 'false'.
3287 //
3288 static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index,
3289 SelectionDAGBuilder* SDB) {
3290
3291 SelectionDAG& DAG = SDB->DAG;
3292 LLVMContext &Context = *DAG.getContext();
3293
3294 assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
3295 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
3296 if (!GEP || GEP->getNumOperands() > 2)
3297 return false;
3298
3299 Value *GEPPtr = GEP->getPointerOperand();
3300 if (!GEPPtr->getType()->isVectorTy())
3301 Ptr = GEPPtr;
3302 else if (!(Ptr = getSplatValue(GEPPtr)))
3303 return false;
3304
3305 Value *IndexVal = GEP->getOperand(1);
3306
3307 // The operands of the GEP may be defined in another basic block.
3308 // In this case we'll not find nodes for the operands.
3309 if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
3310 return false;
3311
3312 Base = SDB->getValue(Ptr);
3313 Index = SDB->getValue(IndexVal);
3314
3315 // Suppress sign extension.
3316 if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
3317 if (SDB->findValue(Sext->getOperand(0))) {
3318 IndexVal = Sext->getOperand(0);
3319 Index = SDB->getValue(IndexVal);
3320 }
3321 }
3322 if (!Index.getValueType().isVector()) {
3323 unsigned GEPWidth = GEP->getType()->getVectorNumElements();
3324 EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
3325 SmallVector<SDValue, 16> Ops(GEPWidth, Index);
3326 Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops);
3327 }
3328 return true;
3329 }
3330
3331 void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
3332 SDLoc sdl = getCurSDLoc();
3333
3334 // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
3335 Value *Ptr = I.getArgOperand(1);
3336 SDValue Src0 = getValue(I.getArgOperand(0));
3337 SDValue Mask = getValue(I.getArgOperand(3));
3338 EVT VT = Src0.getValueType();
3339 unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
3340 if (!Alignment)
3341 Alignment = DAG.getEVTAlignment(VT);
3342 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3343
3344 AAMDNodes AAInfo;
3345 I.getAAMetadata(AAInfo);
3346
3347 SDValue Base;
3348 SDValue Index;
3349 Value *BasePtr = Ptr;
3350 bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
3351
3352 Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
3353 MachineMemOperand *MMO = DAG.getMachineFunction().
3354 getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
3355 MachineMemOperand::MOStore, VT.getStoreSize(),
3356 Alignment, AAInfo);
3357 if (!UniformBase) {
3358 Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
3359 Index = getValue(Ptr);
3360 }
3361 SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index };
3362 SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
3363 Ops, MMO);
3364 DAG.setRoot(Scatter);
3365 setValue(&I, Scatter);
3366 }
3367
3706 void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { 3368 void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
3707 SDLoc sdl = getCurSDLoc(); 3369 SDLoc sdl = getCurSDLoc();
3708 3370
3709 // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) 3371 // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
3710 Value *PtrOperand = I.getArgOperand(0); 3372 Value *PtrOperand = I.getArgOperand(0);
3711 SDValue Ptr = getValue(PtrOperand); 3373 SDValue Ptr = getValue(PtrOperand);
3712 SDValue Src0 = getValue(I.getArgOperand(3)); 3374 SDValue Src0 = getValue(I.getArgOperand(3));
3713 SDValue Mask = getValue(I.getArgOperand(2)); 3375 SDValue Mask = getValue(I.getArgOperand(2));
3714 3376
3715 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3377 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3716 EVT VT = TLI.getValueType(I.getType()); 3378 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3717 unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); 3379 unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
3718 if (!Alignment) 3380 if (!Alignment)
3719 Alignment = DAG.getEVTAlignment(VT); 3381 Alignment = DAG.getEVTAlignment(VT);
3720 3382
3721 AAMDNodes AAInfo; 3383 AAMDNodes AAInfo;
3722 I.getAAMetadata(AAInfo); 3384 I.getAAMetadata(AAInfo);
3723 const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); 3385 const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3724 3386
3725 SDValue InChain = DAG.getRoot(); 3387 SDValue InChain = DAG.getRoot();
3726 if (AA->pointsToConstantMemory( 3388 if (AA->pointsToConstantMemory(MemoryLocation(
3727 AliasAnalysis::Location(PtrOperand, 3389 PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()),
3728 AA->getTypeStoreSize(I.getType()), 3390 AAInfo))) {
3729 AAInfo))) {
3730 // Do not serialize (non-volatile) loads of constant memory with anything. 3391 // Do not serialize (non-volatile) loads of constant memory with anything.
3731 InChain = DAG.getEntryNode(); 3392 InChain = DAG.getEntryNode();
3732 } 3393 }
3733 3394
3734 MachineMemOperand *MMO = 3395 MachineMemOperand *MMO =
3740 SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, 3401 SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
3741 ISD::NON_EXTLOAD); 3402 ISD::NON_EXTLOAD);
3742 SDValue OutChain = Load.getValue(1); 3403 SDValue OutChain = Load.getValue(1);
3743 DAG.setRoot(OutChain); 3404 DAG.setRoot(OutChain);
3744 setValue(&I, Load); 3405 setValue(&I, Load);
3406 }
3407
3408 void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
3409 SDLoc sdl = getCurSDLoc();
3410
3411 // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
3412 Value *Ptr = I.getArgOperand(0);
3413 SDValue Src0 = getValue(I.getArgOperand(3));
3414 SDValue Mask = getValue(I.getArgOperand(2));
3415
3416 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3417 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3418 unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
3419 if (!Alignment)
3420 Alignment = DAG.getEVTAlignment(VT);
3421
3422 AAMDNodes AAInfo;
3423 I.getAAMetadata(AAInfo);
3424 const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3425
3426 SDValue Root = DAG.getRoot();
3427 SDValue Base;
3428 SDValue Index;
3429 Value *BasePtr = Ptr;
3430 bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
3431 bool ConstantMemory = false;
3432 if (UniformBase &&
3433 AA->pointsToConstantMemory(MemoryLocation(
3434 BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
3435 AAInfo))) {
3436 // Do not serialize (non-volatile) loads of constant memory with anything.
3437 Root = DAG.getEntryNode();
3438 ConstantMemory = true;
3439 }
3440
3441 MachineMemOperand *MMO =
3442 DAG.getMachineFunction().
3443 getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
3444 MachineMemOperand::MOLoad, VT.getStoreSize(),
3445 Alignment, AAInfo, Ranges);
3446
3447 if (!UniformBase) {
3448 Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
3449 Index = getValue(Ptr);
3450 }
3451 SDValue Ops[] = { Root, Src0, Mask, Base, Index };
3452 SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
3453 Ops, MMO);
3454
3455 SDValue OutChain = Gather.getValue(1);
3456 if (!ConstantMemory)
3457 PendingLoads.push_back(OutChain);
3458 setValue(&I, Gather);
3745 } 3459 }
3746 3460
3747 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { 3461 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
3748 SDLoc dl = getCurSDLoc(); 3462 SDLoc dl = getCurSDLoc();
3749 AtomicOrdering SuccessOrder = I.getSuccessOrdering(); 3463 AtomicOrdering SuccessOrder = I.getSuccessOrdering();
3806 void SelectionDAGBuilder::visitFence(const FenceInst &I) { 3520 void SelectionDAGBuilder::visitFence(const FenceInst &I) {
3807 SDLoc dl = getCurSDLoc(); 3521 SDLoc dl = getCurSDLoc();
3808 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3522 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3809 SDValue Ops[3]; 3523 SDValue Ops[3];
3810 Ops[0] = getRoot(); 3524 Ops[0] = getRoot();
3811 Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy()); 3525 Ops[1] = DAG.getConstant(I.getOrdering(), dl,
3812 Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy()); 3526 TLI.getPointerTy(DAG.getDataLayout()));
3527 Ops[2] = DAG.getConstant(I.getSynchScope(), dl,
3528 TLI.getPointerTy(DAG.getDataLayout()));
3813 DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); 3529 DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
3814 } 3530 }
3815 3531
3816 void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { 3532 void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
3817 SDLoc dl = getCurSDLoc(); 3533 SDLoc dl = getCurSDLoc();
3819 SynchronizationScope Scope = I.getSynchScope(); 3535 SynchronizationScope Scope = I.getSynchScope();
3820 3536
3821 SDValue InChain = getRoot(); 3537 SDValue InChain = getRoot();
3822 3538
3823 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3539 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3824 EVT VT = TLI.getValueType(I.getType()); 3540 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3825 3541
3826 if (I.getAlignment() < VT.getSizeInBits() / 8) 3542 if (I.getAlignment() < VT.getSizeInBits() / 8)
3827 report_fatal_error("Cannot generate unaligned atomic load"); 3543 report_fatal_error("Cannot generate unaligned atomic load");
3828 3544
3829 MachineMemOperand *MMO = 3545 MachineMemOperand *MMO =
3854 SynchronizationScope Scope = I.getSynchScope(); 3570 SynchronizationScope Scope = I.getSynchScope();
3855 3571
3856 SDValue InChain = getRoot(); 3572 SDValue InChain = getRoot();
3857 3573
3858 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3574 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3859 EVT VT = TLI.getValueType(I.getValueOperand()->getType()); 3575 EVT VT =
3576 TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
3860 3577
3861 if (I.getAlignment() < VT.getSizeInBits() / 8) 3578 if (I.getAlignment() < VT.getSizeInBits() / 8)
3862 report_fatal_error("Cannot generate unaligned atomic store"); 3579 report_fatal_error("Cannot generate unaligned atomic store");
3863 3580
3864 SDValue OutChain = 3581 SDValue OutChain =
3896 bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); 3613 bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
3897 3614
3898 // Add the intrinsic ID as an integer operand if it's not a target intrinsic. 3615 // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
3899 if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || 3616 if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
3900 Info.opc == ISD::INTRINSIC_W_CHAIN) 3617 Info.opc == ISD::INTRINSIC_W_CHAIN)
3901 Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); 3618 Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
3619 TLI.getPointerTy(DAG.getDataLayout())));
3902 3620
3903 // Add all operands of the call to the operand list. 3621 // Add all operands of the call to the operand list.
3904 for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { 3622 for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
3905 SDValue Op = getValue(I.getArgOperand(i)); 3623 SDValue Op = getValue(I.getArgOperand(i));
3906 Ops.push_back(Op); 3624 Ops.push_back(Op);
3907 } 3625 }
3908 3626
3909 SmallVector<EVT, 4> ValueVTs; 3627 SmallVector<EVT, 4> ValueVTs;
3910 ComputeValueVTs(TLI, I.getType(), ValueVTs); 3628 ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
3911 3629
3912 if (HasChain) 3630 if (HasChain)
3913 ValueVTs.push_back(MVT::Other); 3631 ValueVTs.push_back(MVT::Other);
3914 3632
3915 SDVTList VTs = DAG.getVTList(ValueVTs); 3633 SDVTList VTs = DAG.getVTList(ValueVTs);
3939 DAG.setRoot(Chain); 3657 DAG.setRoot(Chain);
3940 } 3658 }
3941 3659
3942 if (!I.getType()->isVoidTy()) { 3660 if (!I.getType()->isVoidTy()) {
3943 if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { 3661 if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
3944 EVT VT = TLI.getValueType(PTy); 3662 EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
3945 Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); 3663 Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
3946 } 3664 }
3947 3665
3948 setValue(&I, Result); 3666 setValue(&I, Result);
3949 } 3667 }
3956 /// 3674 ///
3957 /// where Op is the hexadecimal representation of floating point value. 3675 /// where Op is the hexadecimal representation of floating point value.
3958 static SDValue 3676 static SDValue
3959 GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { 3677 GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) {
3960 SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, 3678 SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
3961 DAG.getConstant(0x007fffff, MVT::i32)); 3679 DAG.getConstant(0x007fffff, dl, MVT::i32));
3962 SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, 3680 SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
3963 DAG.getConstant(0x3f800000, MVT::i32)); 3681 DAG.getConstant(0x3f800000, dl, MVT::i32));
3964 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); 3682 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
3965 } 3683 }
3966 3684
3967 /// GetExponent - Get the exponent: 3685 /// GetExponent - Get the exponent:
3968 /// 3686 ///
3971 /// where Op is the hexadecimal representation of floating point value. 3689 /// where Op is the hexadecimal representation of floating point value.
3972 static SDValue 3690 static SDValue
3973 GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, 3691 GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
3974 SDLoc dl) { 3692 SDLoc dl) {
3975 SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, 3693 SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
3976 DAG.getConstant(0x7f800000, MVT::i32)); 3694 DAG.getConstant(0x7f800000, dl, MVT::i32));
3977 SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, 3695 SDValue t1 = DAG.getNode(
3978 DAG.getConstant(23, TLI.getPointerTy())); 3696 ISD::SRL, dl, MVT::i32, t0,
3697 DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
3979 SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, 3698 SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
3980 DAG.getConstant(127, MVT::i32)); 3699 DAG.getConstant(127, dl, MVT::i32));
3981 return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); 3700 return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
3982 } 3701 }
3983 3702
3984 /// getF32Constant - Get 32-bit floating point constant. 3703 /// getF32Constant - Get 32-bit floating point constant.
3985 static SDValue 3704 static SDValue
3986 getF32Constant(SelectionDAG &DAG, unsigned Flt) { 3705 getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) {
3987 return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), 3706 return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl,
3988 MVT::f32); 3707 MVT::f32);
3708 }
3709
3710 static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
3711 SelectionDAG &DAG) {
3712 // TODO: What fast-math-flags should be set on the floating-point nodes?
3713
3714 // IntegerPartOfX = ((int32_t)(t0);
3715 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
3716
3717 // FractionalPartOfX = t0 - (float)IntegerPartOfX;
3718 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
3719 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
3720
3721 // IntegerPartOfX <<= 23;
3722 IntegerPartOfX = DAG.getNode(
3723 ISD::SHL, dl, MVT::i32, IntegerPartOfX,
3724 DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy(
3725 DAG.getDataLayout())));
3726
3727 SDValue TwoToFractionalPartOfX;
3728 if (LimitFloatPrecision <= 6) {
3729 // For floating-point precision of 6:
3730 //
3731 // TwoToFractionalPartOfX =
3732 // 0.997535578f +
3733 // (0.735607626f + 0.252464424f * x) * x;
3734 //
3735 // error 0.0144103317, which is 6 bits
3736 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3737 getF32Constant(DAG, 0x3e814304, dl));
3738 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3739 getF32Constant(DAG, 0x3f3c50c8, dl));
3740 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3741 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3742 getF32Constant(DAG, 0x3f7f5e7e, dl));
3743 } else if (LimitFloatPrecision <= 12) {
3744 // For floating-point precision of 12:
3745 //
3746 // TwoToFractionalPartOfX =
3747 // 0.999892986f +
3748 // (0.696457318f +
3749 // (0.224338339f + 0.792043434e-1f * x) * x) * x;
3750 //
3751 // error 0.000107046256, which is 13 to 14 bits
3752 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3753 getF32Constant(DAG, 0x3da235e3, dl));
3754 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3755 getF32Constant(DAG, 0x3e65b8f3, dl));
3756 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3757 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3758 getF32Constant(DAG, 0x3f324b07, dl));
3759 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3760 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3761 getF32Constant(DAG, 0x3f7ff8fd, dl));
3762 } else { // LimitFloatPrecision <= 18
3763 // For floating-point precision of 18:
3764 //
3765 // TwoToFractionalPartOfX =
3766 // 0.999999982f +
3767 // (0.693148872f +
3768 // (0.240227044f +
3769 // (0.554906021e-1f +
3770 // (0.961591928e-2f +
3771 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
3772 // error 2.47208000*10^(-7), which is better than 18 bits
3773 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3774 getF32Constant(DAG, 0x3924b03e, dl));
3775 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3776 getF32Constant(DAG, 0x3ab24b87, dl));
3777 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3778 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3779 getF32Constant(DAG, 0x3c1d8c17, dl));
3780 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3781 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3782 getF32Constant(DAG, 0x3d634a1d, dl));
3783 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3784 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3785 getF32Constant(DAG, 0x3e75fe14, dl));
3786 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3787 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
3788 getF32Constant(DAG, 0x3f317234, dl));
3789 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
3790 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
3791 getF32Constant(DAG, 0x3f800000, dl));
3792 }
3793
3794 // Add the exponent into the result in integer domain.
3795 SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
3796 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
3797 DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
3989 } 3798 }
3990 3799
3991 /// expandExp - Lower an exp intrinsic. Handles the special sequences for 3800 /// expandExp - Lower an exp intrinsic. Handles the special sequences for
3992 /// limited-precision mode. 3801 /// limited-precision mode.
3993 static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, 3802 static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
3997 3806
3998 // Put the exponent in the right bit position for later addition to the 3807 // Put the exponent in the right bit position for later addition to the
3999 // final result: 3808 // final result:
4000 // 3809 //
4001 // #define LOG2OFe 1.4426950f 3810 // #define LOG2OFe 1.4426950f
4002 // IntegerPartOfX = ((int32_t)(X * LOG2OFe)); 3811 // t0 = Op * LOG2OFe
3812
3813 // TODO: What fast-math-flags should be set here?
4003 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, 3814 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
4004 getF32Constant(DAG, 0x3fb8aa3b)); 3815 getF32Constant(DAG, 0x3fb8aa3b, dl));
4005 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); 3816 return getLimitedPrecisionExp2(t0, dl, DAG);
4006
4007 // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
4008 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
4009 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
4010
4011 // IntegerPartOfX <<= 23;
4012 IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
4013 DAG.getConstant(23, TLI.getPointerTy()));
4014
4015 SDValue TwoToFracPartOfX;
4016 if (LimitFloatPrecision <= 6) {
4017 // For floating-point precision of 6:
4018 //
4019 // TwoToFractionalPartOfX =
4020 // 0.997535578f +
4021 // (0.735607626f + 0.252464424f * x) * x;
4022 //
4023 // error 0.0144103317, which is 6 bits
4024 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4025 getF32Constant(DAG, 0x3e814304));
4026 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4027 getF32Constant(DAG, 0x3f3c50c8));
4028 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4029 TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4030 getF32Constant(DAG, 0x3f7f5e7e));
4031 } else if (LimitFloatPrecision <= 12) {
4032 // For floating-point precision of 12:
4033 //
4034 // TwoToFractionalPartOfX =
4035 // 0.999892986f +
4036 // (0.696457318f +
4037 // (0.224338339f + 0.792043434e-1f * x) * x) * x;
4038 //
4039 // 0.000107046256 error, which is 13 to 14 bits
4040 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4041 getF32Constant(DAG, 0x3da235e3));
4042 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4043 getF32Constant(DAG, 0x3e65b8f3));
4044 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4045 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4046 getF32Constant(DAG, 0x3f324b07));
4047 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4048 TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4049 getF32Constant(DAG, 0x3f7ff8fd));
4050 } else { // LimitFloatPrecision <= 18
4051 // For floating-point precision of 18:
4052 //
4053 // TwoToFractionalPartOfX =
4054 // 0.999999982f +
4055 // (0.693148872f +
4056 // (0.240227044f +
4057 // (0.554906021e-1f +
4058 // (0.961591928e-2f +
4059 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
4060 //
4061 // error 2.47208000*10^(-7), which is better than 18 bits
4062 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4063 getF32Constant(DAG, 0x3924b03e));
4064 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4065 getF32Constant(DAG, 0x3ab24b87));
4066 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4067 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4068 getF32Constant(DAG, 0x3c1d8c17));
4069 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4070 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4071 getF32Constant(DAG, 0x3d634a1d));
4072 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4073 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4074 getF32Constant(DAG, 0x3e75fe14));
4075 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4076 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
4077 getF32Constant(DAG, 0x3f317234));
4078 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
4079 TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
4080 getF32Constant(DAG, 0x3f800000));
4081 }
4082
4083 // Add the exponent into the result in integer domain.
4084 SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX);
4085 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4086 DAG.getNode(ISD::ADD, dl, MVT::i32,
4087 t13, IntegerPartOfX));
4088 } 3817 }
4089 3818
4090 // No special expansion. 3819 // No special expansion.
4091 return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); 3820 return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
4092 } 3821 }
4093 3822
4094 /// expandLog - Lower a log intrinsic. Handles the special sequences for 3823 /// expandLog - Lower a log intrinsic. Handles the special sequences for
4095 /// limited-precision mode. 3824 /// limited-precision mode.
4096 static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, 3825 static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
4097 const TargetLowering &TLI) { 3826 const TargetLowering &TLI) {
3827
3828 // TODO: What fast-math-flags should be set on the floating-point nodes?
3829
4098 if (Op.getValueType() == MVT::f32 && 3830 if (Op.getValueType() == MVT::f32 &&
4099 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 3831 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4100 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 3832 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4101 3833
4102 // Scale the exponent by log(2) [0.69314718f]. 3834 // Scale the exponent by log(2) [0.69314718f].
4103 SDValue Exp = GetExponent(DAG, Op1, TLI, dl); 3835 SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4104 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, 3836 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4105 getF32Constant(DAG, 0x3f317218)); 3837 getF32Constant(DAG, 0x3f317218, dl));
4106 3838
4107 // Get the significand and build it into a floating-point number with 3839 // Get the significand and build it into a floating-point number with
4108 // exponent of 1. 3840 // exponent of 1.
4109 SDValue X = GetSignificand(DAG, Op1, dl); 3841 SDValue X = GetSignificand(DAG, Op1, dl);
4110 3842
4116 // -1.1609546f + 3848 // -1.1609546f +
4117 // (1.4034025f - 0.23903021f * x) * x; 3849 // (1.4034025f - 0.23903021f * x) * x;
4118 // 3850 //
4119 // error 0.0034276066, which is better than 8 bits 3851 // error 0.0034276066, which is better than 8 bits
4120 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 3852 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4121 getF32Constant(DAG, 0xbe74c456)); 3853 getF32Constant(DAG, 0xbe74c456, dl));
4122 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 3854 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4123 getF32Constant(DAG, 0x3fb3a2b1)); 3855 getF32Constant(DAG, 0x3fb3a2b1, dl));
4124 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 3856 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4125 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 3857 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4126 getF32Constant(DAG, 0x3f949a29)); 3858 getF32Constant(DAG, 0x3f949a29, dl));
4127 } else if (LimitFloatPrecision <= 12) { 3859 } else if (LimitFloatPrecision <= 12) {
4128 // For floating-point precision of 12: 3860 // For floating-point precision of 12:
4129 // 3861 //
4130 // LogOfMantissa = 3862 // LogOfMantissa =
4131 // -1.7417939f + 3863 // -1.7417939f +
4133 // (-1.4699568f + 3865 // (-1.4699568f +
4134 // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; 3866 // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
4135 // 3867 //
4136 // error 0.000061011436, which is 14 bits 3868 // error 0.000061011436, which is 14 bits
4137 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 3869 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4138 getF32Constant(DAG, 0xbd67b6d6)); 3870 getF32Constant(DAG, 0xbd67b6d6, dl));
4139 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 3871 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4140 getF32Constant(DAG, 0x3ee4f4b8)); 3872 getF32Constant(DAG, 0x3ee4f4b8, dl));
4141 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 3873 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4142 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 3874 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4143 getF32Constant(DAG, 0x3fbc278b)); 3875 getF32Constant(DAG, 0x3fbc278b, dl));
4144 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 3876 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4145 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 3877 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4146 getF32Constant(DAG, 0x40348e95)); 3878 getF32Constant(DAG, 0x40348e95, dl));
4147 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 3879 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4148 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 3880 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4149 getF32Constant(DAG, 0x3fdef31a)); 3881 getF32Constant(DAG, 0x3fdef31a, dl));
4150 } else { // LimitFloatPrecision <= 18 3882 } else { // LimitFloatPrecision <= 18
4151 // For floating-point precision of 18: 3883 // For floating-point precision of 18:
4152 // 3884 //
4153 // LogOfMantissa = 3885 // LogOfMantissa =
4154 // -2.1072184f + 3886 // -2.1072184f +
4158 // (-0.87823314f + 3890 // (-0.87823314f +
4159 // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; 3891 // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
4160 // 3892 //
4161 // error 0.0000023660568, which is better than 18 bits 3893 // error 0.0000023660568, which is better than 18 bits
4162 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 3894 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4163 getF32Constant(DAG, 0xbc91e5ac)); 3895 getF32Constant(DAG, 0xbc91e5ac, dl));
4164 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 3896 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4165 getF32Constant(DAG, 0x3e4350aa)); 3897 getF32Constant(DAG, 0x3e4350aa, dl));
4166 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 3898 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4167 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 3899 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4168 getF32Constant(DAG, 0x3f60d3e3)); 3900 getF32Constant(DAG, 0x3f60d3e3, dl));
4169 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 3901 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4170 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 3902 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4171 getF32Constant(DAG, 0x4011cdf0)); 3903 getF32Constant(DAG, 0x4011cdf0, dl));
4172 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 3904 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4173 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 3905 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4174 getF32Constant(DAG, 0x406cfd1c)); 3906 getF32Constant(DAG, 0x406cfd1c, dl));
4175 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 3907 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4176 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 3908 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4177 getF32Constant(DAG, 0x408797cb)); 3909 getF32Constant(DAG, 0x408797cb, dl));
4178 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 3910 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4179 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, 3911 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4180 getF32Constant(DAG, 0x4006dcab)); 3912 getF32Constant(DAG, 0x4006dcab, dl));
4181 } 3913 }
4182 3914
4183 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); 3915 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
4184 } 3916 }
4185 3917
4189 3921
4190 /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for 3922 /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
4191 /// limited-precision mode. 3923 /// limited-precision mode.
4192 static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, 3924 static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
4193 const TargetLowering &TLI) { 3925 const TargetLowering &TLI) {
3926
3927 // TODO: What fast-math-flags should be set on the floating-point nodes?
3928
4194 if (Op.getValueType() == MVT::f32 && 3929 if (Op.getValueType() == MVT::f32 &&
4195 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 3930 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4196 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 3931 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4197 3932
4198 // Get the exponent. 3933 // Get the exponent.
4210 // 3945 //
4211 // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; 3946 // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
4212 // 3947 //
4213 // error 0.0049451742, which is more than 7 bits 3948 // error 0.0049451742, which is more than 7 bits
4214 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 3949 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4215 getF32Constant(DAG, 0xbeb08fe0)); 3950 getF32Constant(DAG, 0xbeb08fe0, dl));
4216 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 3951 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4217 getF32Constant(DAG, 0x40019463)); 3952 getF32Constant(DAG, 0x40019463, dl));
4218 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 3953 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4219 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 3954 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4220 getF32Constant(DAG, 0x3fd6633d)); 3955 getF32Constant(DAG, 0x3fd6633d, dl));
4221 } else if (LimitFloatPrecision <= 12) { 3956 } else if (LimitFloatPrecision <= 12) {
4222 // For floating-point precision of 12: 3957 // For floating-point precision of 12:
4223 // 3958 //
4224 // Log2ofMantissa = 3959 // Log2ofMantissa =
4225 // -2.51285454f + 3960 // -2.51285454f +
4227 // (-2.12067489f + 3962 // (-2.12067489f +
4228 // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; 3963 // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
4229 // 3964 //
4230 // error 0.0000876136000, which is better than 13 bits 3965 // error 0.0000876136000, which is better than 13 bits
4231 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 3966 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4232 getF32Constant(DAG, 0xbda7262e)); 3967 getF32Constant(DAG, 0xbda7262e, dl));
4233 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 3968 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4234 getF32Constant(DAG, 0x3f25280b)); 3969 getF32Constant(DAG, 0x3f25280b, dl));
4235 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 3970 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4236 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 3971 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4237 getF32Constant(DAG, 0x4007b923)); 3972 getF32Constant(DAG, 0x4007b923, dl));
4238 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 3973 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4239 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 3974 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4240 getF32Constant(DAG, 0x40823e2f)); 3975 getF32Constant(DAG, 0x40823e2f, dl));
4241 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 3976 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4242 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 3977 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4243 getF32Constant(DAG, 0x4020d29c)); 3978 getF32Constant(DAG, 0x4020d29c, dl));
4244 } else { // LimitFloatPrecision <= 18 3979 } else { // LimitFloatPrecision <= 18
4245 // For floating-point precision of 18: 3980 // For floating-point precision of 18:
4246 // 3981 //
4247 // Log2ofMantissa = 3982 // Log2ofMantissa =
4248 // -3.0400495f + 3983 // -3.0400495f +
4253 // (0.27515199f - 3988 // (0.27515199f -
4254 // 0.25691327e-1f * x) * x) * x) * x) * x) * x; 3989 // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
4255 // 3990 //
4256 // error 0.0000018516, which is better than 18 bits 3991 // error 0.0000018516, which is better than 18 bits
4257 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 3992 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4258 getF32Constant(DAG, 0xbcd2769e)); 3993 getF32Constant(DAG, 0xbcd2769e, dl));
4259 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 3994 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4260 getF32Constant(DAG, 0x3e8ce0b9)); 3995 getF32Constant(DAG, 0x3e8ce0b9, dl));
4261 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 3996 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4262 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 3997 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4263 getF32Constant(DAG, 0x3fa22ae7)); 3998 getF32Constant(DAG, 0x3fa22ae7, dl));
4264 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 3999 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4265 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 4000 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4266 getF32Constant(DAG, 0x40525723)); 4001 getF32Constant(DAG, 0x40525723, dl));
4267 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 4002 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4268 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 4003 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4269 getF32Constant(DAG, 0x40aaf200)); 4004 getF32Constant(DAG, 0x40aaf200, dl));
4270 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 4005 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4271 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 4006 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4272 getF32Constant(DAG, 0x40c39dad)); 4007 getF32Constant(DAG, 0x40c39dad, dl));
4273 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 4008 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4274 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, 4009 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4275 getF32Constant(DAG, 0x4042902c)); 4010 getF32Constant(DAG, 0x4042902c, dl));
4276 } 4011 }
4277 4012
4278 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); 4013 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
4279 } 4014 }
4280 4015
4284 4019
4285 /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for 4020 /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
4286 /// limited-precision mode. 4021 /// limited-precision mode.
4287 static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, 4022 static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
4288 const TargetLowering &TLI) { 4023 const TargetLowering &TLI) {
4024
4025 // TODO: What fast-math-flags should be set on the floating-point nodes?
4026
4289 if (Op.getValueType() == MVT::f32 && 4027 if (Op.getValueType() == MVT::f32 &&
4290 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 4028 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4291 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 4029 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4292 4030
4293 // Scale the exponent by log10(2) [0.30102999f]. 4031 // Scale the exponent by log10(2) [0.30102999f].
4294 SDValue Exp = GetExponent(DAG, Op1, TLI, dl); 4032 SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4295 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, 4033 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4296 getF32Constant(DAG, 0x3e9a209a)); 4034 getF32Constant(DAG, 0x3e9a209a, dl));
4297 4035
4298 // Get the significand and build it into a floating-point number with 4036 // Get the significand and build it into a floating-point number with
4299 // exponent of 1. 4037 // exponent of 1.
4300 SDValue X = GetSignificand(DAG, Op1, dl); 4038 SDValue X = GetSignificand(DAG, Op1, dl);
4301 4039
4307 // -0.50419619f + 4045 // -0.50419619f +
4308 // (0.60948995f - 0.10380950f * x) * x; 4046 // (0.60948995f - 0.10380950f * x) * x;
4309 // 4047 //
4310 // error 0.0014886165, which is 6 bits 4048 // error 0.0014886165, which is 6 bits
4311 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4049 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4312 getF32Constant(DAG, 0xbdd49a13)); 4050 getF32Constant(DAG, 0xbdd49a13, dl));
4313 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 4051 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4314 getF32Constant(DAG, 0x3f1c0789)); 4052 getF32Constant(DAG, 0x3f1c0789, dl));
4315 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 4053 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4316 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 4054 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4317 getF32Constant(DAG, 0x3f011300)); 4055 getF32Constant(DAG, 0x3f011300, dl));
4318 } else if (LimitFloatPrecision <= 12) { 4056 } else if (LimitFloatPrecision <= 12) {
4319 // For floating-point precision of 12: 4057 // For floating-point precision of 12:
4320 // 4058 //
4321 // Log10ofMantissa = 4059 // Log10ofMantissa =
4322 // -0.64831180f + 4060 // -0.64831180f +
4323 // (0.91751397f + 4061 // (0.91751397f +
4324 // (-0.31664806f + 0.47637168e-1f * x) * x) * x; 4062 // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
4325 // 4063 //
4326 // error 0.00019228036, which is better than 12 bits 4064 // error 0.00019228036, which is better than 12 bits
4327 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4065 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4328 getF32Constant(DAG, 0x3d431f31)); 4066 getF32Constant(DAG, 0x3d431f31, dl));
4329 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, 4067 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4330 getF32Constant(DAG, 0x3ea21fb2)); 4068 getF32Constant(DAG, 0x3ea21fb2, dl));
4331 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 4069 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4332 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 4070 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4333 getF32Constant(DAG, 0x3f6ae232)); 4071 getF32Constant(DAG, 0x3f6ae232, dl));
4334 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4072 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4335 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, 4073 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4336 getF32Constant(DAG, 0x3f25f7c3)); 4074 getF32Constant(DAG, 0x3f25f7c3, dl));
4337 } else { // LimitFloatPrecision <= 18 4075 } else { // LimitFloatPrecision <= 18
4338 // For floating-point precision of 18: 4076 // For floating-point precision of 18:
4339 // 4077 //
4340 // Log10ofMantissa = 4078 // Log10ofMantissa =
4341 // -0.84299375f + 4079 // -0.84299375f +
4344 // (0.49102474f + 4082 // (0.49102474f +
4345 // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; 4083 // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
4346 // 4084 //
4347 // error 0.0000037995730, which is better than 18 bits 4085 // error 0.0000037995730, which is better than 18 bits
4348 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4086 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4349 getF32Constant(DAG, 0x3c5d51ce)); 4087 getF32Constant(DAG, 0x3c5d51ce, dl));
4350 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, 4088 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4351 getF32Constant(DAG, 0x3e00685a)); 4089 getF32Constant(DAG, 0x3e00685a, dl));
4352 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 4090 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4353 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 4091 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4354 getF32Constant(DAG, 0x3efb6798)); 4092 getF32Constant(DAG, 0x3efb6798, dl));
4355 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4093 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4356 SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, 4094 SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4357 getF32Constant(DAG, 0x3f88d192)); 4095 getF32Constant(DAG, 0x3f88d192, dl));
4358 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 4096 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4359 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 4097 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4360 getF32Constant(DAG, 0x3fc4316c)); 4098 getF32Constant(DAG, 0x3fc4316c, dl));
4361 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 4099 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4362 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, 4100 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
4363 getF32Constant(DAG, 0x3f57ce70)); 4101 getF32Constant(DAG, 0x3f57ce70, dl));
4364 } 4102 }
4365 4103
4366 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); 4104 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
4367 } 4105 }
4368 4106
4373 /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for 4111 /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
4374 /// limited-precision mode. 4112 /// limited-precision mode.
4375 static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, 4113 static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
4376 const TargetLowering &TLI) { 4114 const TargetLowering &TLI) {
4377 if (Op.getValueType() == MVT::f32 && 4115 if (Op.getValueType() == MVT::f32 &&
4378 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 4116 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
4379 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); 4117 return getLimitedPrecisionExp2(Op, dl, DAG);
4380
4381 // FractionalPartOfX = x - (float)IntegerPartOfX;
4382 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
4383 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
4384
4385 // IntegerPartOfX <<= 23;
4386 IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
4387 DAG.getConstant(23, TLI.getPointerTy()));
4388
4389 SDValue TwoToFractionalPartOfX;
4390 if (LimitFloatPrecision <= 6) {
4391 // For floating-point precision of 6:
4392 //
4393 // TwoToFractionalPartOfX =
4394 // 0.997535578f +
4395 // (0.735607626f + 0.252464424f * x) * x;
4396 //
4397 // error 0.0144103317, which is 6 bits
4398 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4399 getF32Constant(DAG, 0x3e814304));
4400 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4401 getF32Constant(DAG, 0x3f3c50c8));
4402 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4403 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4404 getF32Constant(DAG, 0x3f7f5e7e));
4405 } else if (LimitFloatPrecision <= 12) {
4406 // For floating-point precision of 12:
4407 //
4408 // TwoToFractionalPartOfX =
4409 // 0.999892986f +
4410 // (0.696457318f +
4411 // (0.224338339f + 0.792043434e-1f * x) * x) * x;
4412 //
4413 // error 0.000107046256, which is 13 to 14 bits
4414 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4415 getF32Constant(DAG, 0x3da235e3));
4416 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4417 getF32Constant(DAG, 0x3e65b8f3));
4418 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4419 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4420 getF32Constant(DAG, 0x3f324b07));
4421 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4422 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4423 getF32Constant(DAG, 0x3f7ff8fd));
4424 } else { // LimitFloatPrecision <= 18
4425 // For floating-point precision of 18:
4426 //
4427 // TwoToFractionalPartOfX =
4428 // 0.999999982f +
4429 // (0.693148872f +
4430 // (0.240227044f +
4431 // (0.554906021e-1f +
4432 // (0.961591928e-2f +
4433 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
4434 // error 2.47208000*10^(-7), which is better than 18 bits
4435 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4436 getF32Constant(DAG, 0x3924b03e));
4437 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4438 getF32Constant(DAG, 0x3ab24b87));
4439 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4440 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4441 getF32Constant(DAG, 0x3c1d8c17));
4442 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4443 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4444 getF32Constant(DAG, 0x3d634a1d));
4445 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4446 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4447 getF32Constant(DAG, 0x3e75fe14));
4448 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4449 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
4450 getF32Constant(DAG, 0x3f317234));
4451 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
4452 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
4453 getF32Constant(DAG, 0x3f800000));
4454 }
4455
4456 // Add the exponent into the result in integer domain.
4457 SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32,
4458 TwoToFractionalPartOfX);
4459 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4460 DAG.getNode(ISD::ADD, dl, MVT::i32,
4461 t13, IntegerPartOfX));
4462 }
4463 4118
4464 // No special expansion. 4119 // No special expansion.
4465 return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); 4120 return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
4466 } 4121 }
4467 4122
4476 APFloat Ten(10.0f); 4131 APFloat Ten(10.0f);
4477 IsExp10 = LHSC->isExactlyValue(Ten); 4132 IsExp10 = LHSC->isExactlyValue(Ten);
4478 } 4133 }
4479 } 4134 }
4480 4135
4136 // TODO: What fast-math-flags should be set on the FMUL node?
4481 if (IsExp10) { 4137 if (IsExp10) {
4482 // Put the exponent in the right bit position for later addition to the 4138 // Put the exponent in the right bit position for later addition to the
4483 // final result: 4139 // final result:
4484 // 4140 //
4485 // #define LOG2OF10 3.3219281f 4141 // #define LOG2OF10 3.3219281f
4486 // IntegerPartOfX = (int32_t)(x * LOG2OF10); 4142 // t0 = Op * LOG2OF10;
4487 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, 4143 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
4488 getF32Constant(DAG, 0x40549a78)); 4144 getF32Constant(DAG, 0x40549a78, dl));
4489 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); 4145 return getLimitedPrecisionExp2(t0, dl, DAG);
4490
4491 // FractionalPartOfX = x - (float)IntegerPartOfX;
4492 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
4493 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
4494
4495 // IntegerPartOfX <<= 23;
4496 IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
4497 DAG.getConstant(23, TLI.getPointerTy()));
4498
4499 SDValue TwoToFractionalPartOfX;
4500 if (LimitFloatPrecision <= 6) {
4501 // For floating-point precision of 6:
4502 //
4503 // twoToFractionalPartOfX =
4504 // 0.997535578f +
4505 // (0.735607626f + 0.252464424f * x) * x;
4506 //
4507 // error 0.0144103317, which is 6 bits
4508 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4509 getF32Constant(DAG, 0x3e814304));
4510 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4511 getF32Constant(DAG, 0x3f3c50c8));
4512 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4513 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4514 getF32Constant(DAG, 0x3f7f5e7e));
4515 } else if (LimitFloatPrecision <= 12) {
4516 // For floating-point precision of 12:
4517 //
4518 // TwoToFractionalPartOfX =
4519 // 0.999892986f +
4520 // (0.696457318f +
4521 // (0.224338339f + 0.792043434e-1f * x) * x) * x;
4522 //
4523 // error 0.000107046256, which is 13 to 14 bits
4524 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4525 getF32Constant(DAG, 0x3da235e3));
4526 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4527 getF32Constant(DAG, 0x3e65b8f3));
4528 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4529 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4530 getF32Constant(DAG, 0x3f324b07));
4531 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4532 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4533 getF32Constant(DAG, 0x3f7ff8fd));
4534 } else { // LimitFloatPrecision <= 18
4535 // For floating-point precision of 18:
4536 //
4537 // TwoToFractionalPartOfX =
4538 // 0.999999982f +
4539 // (0.693148872f +
4540 // (0.240227044f +
4541 // (0.554906021e-1f +
4542 // (0.961591928e-2f +
4543 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
4544 // error 2.47208000*10^(-7), which is better than 18 bits
4545 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4546 getF32Constant(DAG, 0x3924b03e));
4547 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4548 getF32Constant(DAG, 0x3ab24b87));
4549 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4550 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4551 getF32Constant(DAG, 0x3c1d8c17));
4552 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4553 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4554 getF32Constant(DAG, 0x3d634a1d));
4555 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4556 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4557 getF32Constant(DAG, 0x3e75fe14));
4558 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4559 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
4560 getF32Constant(DAG, 0x3f317234));
4561 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
4562 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
4563 getF32Constant(DAG, 0x3f800000));
4564 }
4565
4566 SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX);
4567 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4568 DAG.getNode(ISD::ADD, dl, MVT::i32,
4569 t13, IntegerPartOfX));
4570 } 4146 }
4571 4147
4572 // No special expansion. 4148 // No special expansion.
4573 return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); 4149 return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
4574 } 4150 }
4586 unsigned Val = RHSC->getSExtValue(); 4162 unsigned Val = RHSC->getSExtValue();
4587 if ((int)Val < 0) Val = -Val; 4163 if ((int)Val < 0) Val = -Val;
4588 4164
4589 // powi(x, 0) -> 1.0 4165 // powi(x, 0) -> 1.0
4590 if (Val == 0) 4166 if (Val == 0)
4591 return DAG.getConstantFP(1.0, LHS.getValueType()); 4167 return DAG.getConstantFP(1.0, DL, LHS.getValueType());
4592 4168
4593 const Function *F = DAG.getMachineFunction().getFunction(); 4169 const Function *F = DAG.getMachineFunction().getFunction();
4594 if (!F->hasFnAttribute(Attribute::OptimizeForSize) || 4170 if (!F->optForSize() ||
4595 // If optimizing for size, don't insert too many multiplies. This 4171 // If optimizing for size, don't insert too many multiplies.
4596 // inserts up to 5 multiplies. 4172 // This inserts up to 5 multiplies.
4597 countPopulation(Val) + Log2_32(Val) < 7) { 4173 countPopulation(Val) + Log2_32(Val) < 7) {
4598 // We use the simple binary decomposition method to generate the multiply 4174 // We use the simple binary decomposition method to generate the multiply
4599 // sequence. There are more optimal ways to do this (for example, 4175 // sequence. There are more optimal ways to do this (for example,
4600 // powi(x,15) generates one more multiply than it should), but this has 4176 // powi(x,15) generates one more multiply than it should), but this has
4601 // the benefit of being both really simple and much better than a libcall. 4177 // the benefit of being both really simple and much better than a libcall.
4602 SDValue Res; // Logically starts equal to 1.0 4178 SDValue Res; // Logically starts equal to 1.0
4603 SDValue CurSquare = LHS; 4179 SDValue CurSquare = LHS;
4180 // TODO: Intrinsics should have fast-math-flags that propagate to these
4181 // nodes.
4604 while (Val) { 4182 while (Val) {
4605 if (Val & 1) { 4183 if (Val & 1) {
4606 if (Res.getNode()) 4184 if (Res.getNode())
4607 Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare); 4185 Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
4608 else 4186 else
4615 } 4193 }
4616 4194
4617 // If the original was negative, invert the result, producing 1/(x*x*x). 4195 // If the original was negative, invert the result, producing 1/(x*x*x).
4618 if (RHSC->getSExtValue() < 0) 4196 if (RHSC->getSExtValue() < 0)
4619 Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), 4197 Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
4620 DAG.getConstantFP(1.0, LHS.getValueType()), Res); 4198 DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
4621 return Res; 4199 return Res;
4622 } 4200 }
4623 } 4201 }
4624 4202
4625 // Otherwise, expand to a libcall. 4203 // Otherwise, expand to a libcall.
4626 return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); 4204 return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
4627 } 4205 }
4628 4206
4629 // getTruncatedArgReg - Find underlying register used for an truncated 4207 // getUnderlyingArgReg - Find underlying register used for a truncated or
4630 // argument. 4208 // bitcasted argument.
4631 static unsigned getTruncatedArgReg(const SDValue &N) { 4209 static unsigned getUnderlyingArgReg(const SDValue &N) {
4632 if (N.getOpcode() != ISD::TRUNCATE) 4210 switch (N.getOpcode()) {
4211 case ISD::CopyFromReg:
4212 return cast<RegisterSDNode>(N.getOperand(1))->getReg();
4213 case ISD::BITCAST:
4214 case ISD::AssertZext:
4215 case ISD::AssertSext:
4216 case ISD::TRUNCATE:
4217 return getUnderlyingArgReg(N.getOperand(0));
4218 default:
4633 return 0; 4219 return 0;
4634 4220 }
4635 const SDValue &Ext = N.getOperand(0);
4636 if (Ext.getOpcode() == ISD::AssertZext ||
4637 Ext.getOpcode() == ISD::AssertSext) {
4638 const SDValue &CFR = Ext.getOperand(0);
4639 if (CFR.getOpcode() == ISD::CopyFromReg)
4640 return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
4641 if (CFR.getOpcode() == ISD::TRUNCATE)
4642 return getTruncatedArgReg(CFR);
4643 }
4644 return 0;
4645 } 4221 }
4646 4222
4647 /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function 4223 /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
4648 /// argument, create the corresponding DBG_VALUE machine instruction for it now. 4224 /// argument, create the corresponding DBG_VALUE machine instruction for it now.
4649 /// At the end of instruction selection, they will be inserted to the entry BB. 4225 /// At the end of instruction selection, they will be inserted to the entry BB.
4650 bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, 4226 bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
4651 MDNode *Variable, 4227 const Value *V, DILocalVariable *Variable, DIExpression *Expr,
4652 MDNode *Expr, int64_t Offset, 4228 DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) {
4653 bool IsIndirect,
4654 const SDValue &N) {
4655 const Argument *Arg = dyn_cast<Argument>(V); 4229 const Argument *Arg = dyn_cast<Argument>(V);
4656 if (!Arg) 4230 if (!Arg)
4657 return false; 4231 return false;
4658 4232
4659 MachineFunction &MF = DAG.getMachineFunction(); 4233 MachineFunction &MF = DAG.getMachineFunction();
4660 const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); 4234 const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
4661 4235
4662 // Ignore inlined function arguments here. 4236 // Ignore inlined function arguments here.
4663 DIVariable DV(Variable); 4237 //
4664 if (DV.isInlinedFnArgument(MF.getFunction())) 4238 // FIXME: Should we be checking DL->inlinedAt() to determine this?
4239 if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction()))
4665 return false; 4240 return false;
4666 4241
4667 Optional<MachineOperand> Op; 4242 Optional<MachineOperand> Op;
4668 // Some arguments' frame index is recorded during argument lowering. 4243 // Some arguments' frame index is recorded during argument lowering.
4669 if (int FI = FuncInfo.getArgumentFrameIndex(Arg)) 4244 if (int FI = FuncInfo.getArgumentFrameIndex(Arg))
4670 Op = MachineOperand::CreateFI(FI); 4245 Op = MachineOperand::CreateFI(FI);
4671 4246
4672 if (!Op && N.getNode()) { 4247 if (!Op && N.getNode()) {
4673 unsigned Reg; 4248 unsigned Reg = getUnderlyingArgReg(N);
4674 if (N.getOpcode() == ISD::CopyFromReg)
4675 Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
4676 else
4677 Reg = getTruncatedArgReg(N);
4678 if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { 4249 if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
4679 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 4250 MachineRegisterInfo &RegInfo = MF.getRegInfo();
4680 unsigned PR = RegInfo.getLiveInPhysReg(Reg); 4251 unsigned PR = RegInfo.getLiveInPhysReg(Reg);
4681 if (PR) 4252 if (PR)
4682 Reg = PR; 4253 Reg = PR;
4700 Op = MachineOperand::CreateFI(FINode->getIndex()); 4271 Op = MachineOperand::CreateFI(FINode->getIndex());
4701 4272
4702 if (!Op) 4273 if (!Op)
4703 return false; 4274 return false;
4704 4275
4276 assert(Variable->isValidLocationForIntrinsic(DL) &&
4277 "Expected inlined-at fields to agree");
4705 if (Op->isReg()) 4278 if (Op->isReg())
4706 FuncInfo.ArgDbgValues.push_back( 4279 FuncInfo.ArgDbgValues.push_back(
4707 BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE), 4280 BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
4708 IsIndirect, Op->getReg(), Offset, Variable, Expr)); 4281 Op->getReg(), Offset, Variable, Expr));
4709 else 4282 else
4710 FuncInfo.ArgDbgValues.push_back( 4283 FuncInfo.ArgDbgValues.push_back(
4711 BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) 4284 BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
4712 .addOperand(*Op) 4285 .addOperand(*Op)
4713 .addImm(Offset) 4286 .addImm(Offset)
4714 .addMetadata(Variable) 4287 .addMetadata(Variable)
4715 .addMetadata(Expr)); 4288 .addMetadata(Expr));
4716 4289
4742 return nullptr; 4315 return nullptr;
4743 case Intrinsic::vastart: visitVAStart(I); return nullptr; 4316 case Intrinsic::vastart: visitVAStart(I); return nullptr;
4744 case Intrinsic::vaend: visitVAEnd(I); return nullptr; 4317 case Intrinsic::vaend: visitVAEnd(I); return nullptr;
4745 case Intrinsic::vacopy: visitVACopy(I); return nullptr; 4318 case Intrinsic::vacopy: visitVACopy(I); return nullptr;
4746 case Intrinsic::returnaddress: 4319 case Intrinsic::returnaddress:
4747 setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getPointerTy(), 4320 setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
4321 TLI.getPointerTy(DAG.getDataLayout()),
4748 getValue(I.getArgOperand(0)))); 4322 getValue(I.getArgOperand(0))));
4749 return nullptr; 4323 return nullptr;
4750 case Intrinsic::frameaddress: 4324 case Intrinsic::frameaddress:
4751 setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), 4325 setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
4326 TLI.getPointerTy(DAG.getDataLayout()),
4752 getValue(I.getArgOperand(0)))); 4327 getValue(I.getArgOperand(0))));
4753 return nullptr; 4328 return nullptr;
4754 case Intrinsic::read_register: { 4329 case Intrinsic::read_register: {
4755 Value *Reg = I.getArgOperand(0); 4330 Value *Reg = I.getArgOperand(0);
4331 SDValue Chain = getRoot();
4756 SDValue RegName = 4332 SDValue RegName =
4757 DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); 4333 DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
4758 EVT VT = TLI.getValueType(I.getType()); 4334 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4759 setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName)); 4335 Res = DAG.getNode(ISD::READ_REGISTER, sdl,
4336 DAG.getVTList(VT, MVT::Other), Chain, RegName);
4337 setValue(&I, Res);
4338 DAG.setRoot(Res.getValue(1));
4760 return nullptr; 4339 return nullptr;
4761 } 4340 }
4762 case Intrinsic::write_register: { 4341 case Intrinsic::write_register: {
4763 Value *Reg = I.getArgOperand(0); 4342 Value *Reg = I.getArgOperand(0);
4764 Value *RegValue = I.getArgOperand(1); 4343 Value *RegValue = I.getArgOperand(1);
4765 SDValue Chain = getValue(RegValue).getOperand(0); 4344 SDValue Chain = getRoot();
4766 SDValue RegName = 4345 SDValue RegName =
4767 DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); 4346 DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
4768 DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, 4347 DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
4769 RegName, getValue(RegValue))); 4348 RegName, getValue(RegValue)));
4770 return nullptr; 4349 return nullptr;
4787 SDValue Op3 = getValue(I.getArgOperand(2)); 4366 SDValue Op3 = getValue(I.getArgOperand(2));
4788 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); 4367 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
4789 if (!Align) 4368 if (!Align)
4790 Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. 4369 Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment.
4791 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); 4370 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
4792 DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, 4371 bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
4793 MachinePointerInfo(I.getArgOperand(0)), 4372 SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
4794 MachinePointerInfo(I.getArgOperand(1)))); 4373 false, isTC,
4374 MachinePointerInfo(I.getArgOperand(0)),
4375 MachinePointerInfo(I.getArgOperand(1)));
4376 updateDAGForMaybeTailCall(MC);
4795 return nullptr; 4377 return nullptr;
4796 } 4378 }
4797 case Intrinsic::memset: { 4379 case Intrinsic::memset: {
4798 // FIXME: this definition of "user defined address space" is x86-specific 4380 // FIXME: this definition of "user defined address space" is x86-specific
4799 // Assert for address < 256 since we support only user defined address 4381 // Assert for address < 256 since we support only user defined address
4806 SDValue Op3 = getValue(I.getArgOperand(2)); 4388 SDValue Op3 = getValue(I.getArgOperand(2));
4807 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); 4389 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
4808 if (!Align) 4390 if (!Align)
4809 Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. 4391 Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment.
4810 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); 4392 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
4811 DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, 4393 bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
4812 MachinePointerInfo(I.getArgOperand(0)))); 4394 SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
4395 isTC, MachinePointerInfo(I.getArgOperand(0)));
4396 updateDAGForMaybeTailCall(MS);
4813 return nullptr; 4397 return nullptr;
4814 } 4398 }
4815 case Intrinsic::memmove: { 4399 case Intrinsic::memmove: {
4816 // FIXME: this definition of "user defined address space" is x86-specific 4400 // FIXME: this definition of "user defined address space" is x86-specific
4817 // Assert for address < 256 since we support only user defined address 4401 // Assert for address < 256 since we support only user defined address
4826 SDValue Op3 = getValue(I.getArgOperand(2)); 4410 SDValue Op3 = getValue(I.getArgOperand(2));
4827 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); 4411 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
4828 if (!Align) 4412 if (!Align)
4829 Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. 4413 Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment.
4830 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); 4414 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
4831 DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, 4415 bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
4832 MachinePointerInfo(I.getArgOperand(0)), 4416 SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
4833 MachinePointerInfo(I.getArgOperand(1)))); 4417 isTC, MachinePointerInfo(I.getArgOperand(0)),
4418 MachinePointerInfo(I.getArgOperand(1)));
4419 updateDAGForMaybeTailCall(MM);
4834 return nullptr; 4420 return nullptr;
4835 } 4421 }
4836 case Intrinsic::dbg_declare: { 4422 case Intrinsic::dbg_declare: {
4837 const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); 4423 const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
4838 MDNode *Variable = DI.getVariable(); 4424 DILocalVariable *Variable = DI.getVariable();
4839 MDNode *Expression = DI.getExpression(); 4425 DIExpression *Expression = DI.getExpression();
4840 const Value *Address = DI.getAddress(); 4426 const Value *Address = DI.getAddress();
4841 DIVariable DIVar(Variable); 4427 assert(Variable && "Missing variable");
4842 assert((!DIVar || DIVar.isVariable()) && 4428 if (!Address) {
4843 "Variable in DbgDeclareInst should be either null or a DIVariable.");
4844 if (!Address || !DIVar) {
4845 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 4429 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
4846 return nullptr; 4430 return nullptr;
4847 } 4431 }
4848 4432
4849 // Check if address has undef value. 4433 // Check if address has undef value.
4860 SDDbgValue *SDV; 4444 SDDbgValue *SDV;
4861 if (N.getNode()) { 4445 if (N.getNode()) {
4862 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) 4446 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
4863 Address = BCI->getOperand(0); 4447 Address = BCI->getOperand(0);
4864 // Parameters are handled specially. 4448 // Parameters are handled specially.
4865 bool isParameter = 4449 bool isParameter = Variable->isParameter() || isa<Argument>(Address);
4866 (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable ||
4867 isa<Argument>(Address));
4868 4450
4869 const AllocaInst *AI = dyn_cast<AllocaInst>(Address); 4451 const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
4870 4452
4871 if (isParameter && !AI) { 4453 if (isParameter && !AI) {
4872 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); 4454 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
4875 SDV = DAG.getFrameIndexDbgValue( 4457 SDV = DAG.getFrameIndexDbgValue(
4876 Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder); 4458 Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder);
4877 else { 4459 else {
4878 // Address is an argument, so try to emit its dbg value using 4460 // Address is an argument, so try to emit its dbg value using
4879 // virtual register info from the FuncInfo.ValueMap. 4461 // virtual register info from the FuncInfo.ValueMap.
4880 EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, N); 4462 EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
4463 N);
4881 return nullptr; 4464 return nullptr;
4882 } 4465 }
4883 } else if (AI) 4466 } else {
4884 SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), 4467 SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
4885 true, 0, dl, SDNodeOrder); 4468 true, 0, dl, SDNodeOrder);
4886 else {
4887 // Can't do anything with other non-AI cases yet.
4888 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
4889 DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
4890 DEBUG(Address->dump());
4891 return nullptr;
4892 } 4469 }
4893 DAG.AddDbgValue(SDV, N.getNode(), isParameter); 4470 DAG.AddDbgValue(SDV, N.getNode(), isParameter);
4894 } else { 4471 } else {
4895 // If Address is an argument then try to emit its dbg value using 4472 // If Address is an argument then try to emit its dbg value using
4896 // virtual register info from the FuncInfo.ValueMap. 4473 // virtual register info from the FuncInfo.ValueMap.
4897 if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, 4474 if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
4898 N)) { 4475 N)) {
4899 // If variable is pinned by a alloca in dominating bb then 4476 // If variable is pinned by a alloca in dominating bb then
4900 // use StaticAllocaMap. 4477 // use StaticAllocaMap.
4901 if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { 4478 if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
4902 if (AI->getParent() != DI.getParent()) { 4479 if (AI->getParent() != DI.getParent()) {
4915 } 4492 }
4916 return nullptr; 4493 return nullptr;
4917 } 4494 }
4918 case Intrinsic::dbg_value: { 4495 case Intrinsic::dbg_value: {
4919 const DbgValueInst &DI = cast<DbgValueInst>(I); 4496 const DbgValueInst &DI = cast<DbgValueInst>(I);
4920 DIVariable DIVar(DI.getVariable()); 4497 assert(DI.getVariable() && "Missing variable");
4921 assert((!DIVar || DIVar.isVariable()) && 4498
4922 "Variable in DbgValueInst should be either null or a DIVariable."); 4499 DILocalVariable *Variable = DI.getVariable();
4923 if (!DIVar) 4500 DIExpression *Expression = DI.getExpression();
4924 return nullptr;
4925
4926 MDNode *Variable = DI.getVariable();
4927 MDNode *Expression = DI.getExpression();
4928 uint64_t Offset = DI.getOffset(); 4501 uint64_t Offset = DI.getOffset();
4929 const Value *V = DI.getValue(); 4502 const Value *V = DI.getValue();
4930 if (!V) 4503 if (!V)
4931 return nullptr; 4504 return nullptr;
4932 4505
4943 // Check unused arguments map. 4516 // Check unused arguments map.
4944 N = UnusedArgNodeMap[V]; 4517 N = UnusedArgNodeMap[V];
4945 if (N.getNode()) { 4518 if (N.getNode()) {
4946 // A dbg.value for an alloca is always indirect. 4519 // A dbg.value for an alloca is always indirect.
4947 bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; 4520 bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
4948 if (!EmitFuncArgumentDbgValue(V, Variable, Expression, Offset, 4521 if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset,
4949 IsIndirect, N)) { 4522 IsIndirect, N)) {
4950 SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), 4523 SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
4951 IsIndirect, Offset, dl, SDNodeOrder); 4524 IsIndirect, Offset, dl, SDNodeOrder);
4952 DAG.AddDbgValue(SDV, N.getNode(), false); 4525 DAG.AddDbgValue(SDV, N.getNode(), false);
4953 } 4526 }
4982 4555
4983 case Intrinsic::eh_typeid_for: { 4556 case Intrinsic::eh_typeid_for: {
4984 // Find the type id for the given typeinfo. 4557 // Find the type id for the given typeinfo.
4985 GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); 4558 GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
4986 unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); 4559 unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
4987 Res = DAG.getConstant(TypeID, MVT::i32); 4560 Res = DAG.getConstant(TypeID, sdl, MVT::i32);
4988 setValue(&I, Res); 4561 setValue(&I, Res);
4989 return nullptr; 4562 return nullptr;
4990 } 4563 }
4991 4564
4992 case Intrinsic::eh_return_i32: 4565 case Intrinsic::eh_return_i32:
5001 case Intrinsic::eh_unwind_init: 4574 case Intrinsic::eh_unwind_init:
5002 DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); 4575 DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
5003 return nullptr; 4576 return nullptr;
5004 case Intrinsic::eh_dwarf_cfa: { 4577 case Intrinsic::eh_dwarf_cfa: {
5005 SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, 4578 SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl,
5006 TLI.getPointerTy()); 4579 TLI.getPointerTy(DAG.getDataLayout()));
5007 SDValue Offset = DAG.getNode(ISD::ADD, sdl, 4580 SDValue Offset = DAG.getNode(ISD::ADD, sdl,
5008 CfaArg.getValueType(), 4581 CfaArg.getValueType(),
5009 DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, 4582 DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl,
5010 CfaArg.getValueType()), 4583 CfaArg.getValueType()),
5011 CfaArg); 4584 CfaArg);
5012 SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), 4585 SDValue FA = DAG.getNode(
5013 DAG.getConstant(0, TLI.getPointerTy())); 4586 ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()),
4587 DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())));
5014 setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), 4588 setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(),
5015 FA, Offset)); 4589 FA, Offset));
5016 return nullptr; 4590 return nullptr;
5017 } 4591 }
5018 case Intrinsic::eh_sjlj_callsite: { 4592 case Intrinsic::eh_sjlj_callsite: {
5046 case Intrinsic::eh_sjlj_longjmp: { 4620 case Intrinsic::eh_sjlj_longjmp: {
5047 DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, 4621 DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
5048 getRoot(), getValue(I.getArgOperand(0)))); 4622 getRoot(), getValue(I.getArgOperand(0))));
5049 return nullptr; 4623 return nullptr;
5050 } 4624 }
5051 4625 case Intrinsic::eh_sjlj_setup_dispatch: {
4626 DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
4627 getRoot()));
4628 return nullptr;
4629 }
4630
4631 case Intrinsic::masked_gather:
4632 visitMaskedGather(I);
4633 return nullptr;
5052 case Intrinsic::masked_load: 4634 case Intrinsic::masked_load:
5053 visitMaskedLoad(I); 4635 visitMaskedLoad(I);
4636 return nullptr;
4637 case Intrinsic::masked_scatter:
4638 visitMaskedScatter(I);
5054 return nullptr; 4639 return nullptr;
5055 case Intrinsic::masked_store: 4640 case Intrinsic::masked_store:
5056 visitMaskedStore(I); 4641 visitMaskedStore(I);
5057 return nullptr; 4642 return nullptr;
5058 case Intrinsic::x86_mmx_pslli_w: 4643 case Intrinsic::x86_mmx_pslli_w:
5102 // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits 4687 // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
5103 // to be zero. 4688 // to be zero.
5104 // We must do this early because v2i32 is not a legal type. 4689 // We must do this early because v2i32 is not a legal type.
5105 SDValue ShOps[2]; 4690 SDValue ShOps[2];
5106 ShOps[0] = ShAmt; 4691 ShOps[0] = ShAmt;
5107 ShOps[1] = DAG.getConstant(0, MVT::i32); 4692 ShOps[1] = DAG.getConstant(0, sdl, MVT::i32);
5108 ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); 4693 ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps);
5109 EVT DestVT = TLI.getValueType(I.getType()); 4694 EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
5110 ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); 4695 ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
5111 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, 4696 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
5112 DAG.getConstant(NewIntrinsic, MVT::i32), 4697 DAG.getConstant(NewIntrinsic, sdl, MVT::i32),
5113 getValue(I.getArgOperand(0)), ShAmt); 4698 getValue(I.getArgOperand(0)), ShAmt);
5114 setValue(&I, Res);
5115 return nullptr;
5116 }
5117 case Intrinsic::x86_avx_vinsertf128_pd_256:
5118 case Intrinsic::x86_avx_vinsertf128_ps_256:
5119 case Intrinsic::x86_avx_vinsertf128_si_256:
5120 case Intrinsic::x86_avx2_vinserti128: {
5121 EVT DestVT = TLI.getValueType(I.getType());
5122 EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
5123 uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
5124 ElVT.getVectorNumElements();
5125 Res =
5126 DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT,
5127 getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
5128 DAG.getConstant(Idx, TLI.getVectorIdxTy()));
5129 setValue(&I, Res);
5130 return nullptr;
5131 }
5132 case Intrinsic::x86_avx_vextractf128_pd_256:
5133 case Intrinsic::x86_avx_vextractf128_ps_256:
5134 case Intrinsic::x86_avx_vextractf128_si_256:
5135 case Intrinsic::x86_avx2_vextracti128: {
5136 EVT DestVT = TLI.getValueType(I.getType());
5137 uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
5138 DestVT.getVectorNumElements();
5139 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT,
5140 getValue(I.getArgOperand(0)),
5141 DAG.getConstant(Idx, TLI.getVectorIdxTy()));
5142 setValue(&I, Res); 4699 setValue(&I, Res);
5143 return nullptr; 4700 return nullptr;
5144 } 4701 }
5145 case Intrinsic::convertff: 4702 case Intrinsic::convertff:
5146 case Intrinsic::convertfsi: 4703 case Intrinsic::convertfsi:
5162 case Intrinsic::convertss: Code = ISD::CVT_SS; break; 4719 case Intrinsic::convertss: Code = ISD::CVT_SS; break;
5163 case Intrinsic::convertsu: Code = ISD::CVT_SU; break; 4720 case Intrinsic::convertsu: Code = ISD::CVT_SU; break;
5164 case Intrinsic::convertus: Code = ISD::CVT_US; break; 4721 case Intrinsic::convertus: Code = ISD::CVT_US; break;
5165 case Intrinsic::convertuu: Code = ISD::CVT_UU; break; 4722 case Intrinsic::convertuu: Code = ISD::CVT_UU; break;
5166 } 4723 }
5167 EVT DestVT = TLI.getValueType(I.getType()); 4724 EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
5168 const Value *Op1 = I.getArgOperand(0); 4725 const Value *Op1 = I.getArgOperand(0);
5169 Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), 4726 Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1),
5170 DAG.getValueType(DestVT), 4727 DAG.getValueType(DestVT),
5171 DAG.getValueType(getValue(Op1).getValueType()), 4728 DAG.getValueType(getValue(Op1).getValueType()),
5172 getValue(I.getArgOperand(1)), 4729 getValue(I.getArgOperand(1)),
5252 getValue(I.getArgOperand(0)), 4809 getValue(I.getArgOperand(0)),
5253 getValue(I.getArgOperand(1)), 4810 getValue(I.getArgOperand(1)),
5254 getValue(I.getArgOperand(2)))); 4811 getValue(I.getArgOperand(2))));
5255 return nullptr; 4812 return nullptr;
5256 case Intrinsic::fmuladd: { 4813 case Intrinsic::fmuladd: {
5257 EVT VT = TLI.getValueType(I.getType()); 4814 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
5258 if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && 4815 if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
5259 TLI.isFMAFasterThanFMulAndFAdd(VT)) { 4816 TLI.isFMAFasterThanFMulAndFAdd(VT)) {
5260 setValue(&I, DAG.getNode(ISD::FMA, sdl, 4817 setValue(&I, DAG.getNode(ISD::FMA, sdl,
5261 getValue(I.getArgOperand(0)).getValueType(), 4818 getValue(I.getArgOperand(0)).getValueType(),
5262 getValue(I.getArgOperand(0)), 4819 getValue(I.getArgOperand(0)),
5263 getValue(I.getArgOperand(1)), 4820 getValue(I.getArgOperand(1)),
5264 getValue(I.getArgOperand(2)))); 4821 getValue(I.getArgOperand(2))));
5265 } else { 4822 } else {
4823 // TODO: Intrinsic calls should have fast-math-flags.
5266 SDValue Mul = DAG.getNode(ISD::FMUL, sdl, 4824 SDValue Mul = DAG.getNode(ISD::FMUL, sdl,
5267 getValue(I.getArgOperand(0)).getValueType(), 4825 getValue(I.getArgOperand(0)).getValueType(),
5268 getValue(I.getArgOperand(0)), 4826 getValue(I.getArgOperand(0)),
5269 getValue(I.getArgOperand(1))); 4827 getValue(I.getArgOperand(1)));
5270 SDValue Add = DAG.getNode(ISD::FADD, sdl, 4828 SDValue Add = DAG.getNode(ISD::FADD, sdl,
5277 } 4835 }
5278 case Intrinsic::convert_to_fp16: 4836 case Intrinsic::convert_to_fp16:
5279 setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, 4837 setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
5280 DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, 4838 DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
5281 getValue(I.getArgOperand(0)), 4839 getValue(I.getArgOperand(0)),
5282 DAG.getTargetConstant(0, MVT::i32)))); 4840 DAG.getTargetConstant(0, sdl,
4841 MVT::i32))));
5283 return nullptr; 4842 return nullptr;
5284 case Intrinsic::convert_from_fp16: 4843 case Intrinsic::convert_from_fp16:
5285 setValue(&I, 4844 setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
5286 DAG.getNode(ISD::FP_EXTEND, sdl, TLI.getValueType(I.getType()), 4845 TLI.getValueType(DAG.getDataLayout(), I.getType()),
5287 DAG.getNode(ISD::BITCAST, sdl, MVT::f16, 4846 DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
5288 getValue(I.getArgOperand(0))))); 4847 getValue(I.getArgOperand(0)))));
5289 return nullptr; 4848 return nullptr;
5290 case Intrinsic::pcmarker: { 4849 case Intrinsic::pcmarker: {
5291 SDValue Tmp = getValue(I.getArgOperand(0)); 4850 SDValue Tmp = getValue(I.getArgOperand(0));
5292 DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); 4851 DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
5293 return nullptr; 4852 return nullptr;
5302 } 4861 }
5303 case Intrinsic::bswap: 4862 case Intrinsic::bswap:
5304 setValue(&I, DAG.getNode(ISD::BSWAP, sdl, 4863 setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
5305 getValue(I.getArgOperand(0)).getValueType(), 4864 getValue(I.getArgOperand(0)).getValueType(),
5306 getValue(I.getArgOperand(0)))); 4865 getValue(I.getArgOperand(0))));
4866 return nullptr;
4867 case Intrinsic::uabsdiff:
4868 setValue(&I, DAG.getNode(ISD::UABSDIFF, sdl,
4869 getValue(I.getArgOperand(0)).getValueType(),
4870 getValue(I.getArgOperand(0)),
4871 getValue(I.getArgOperand(1))));
4872 return nullptr;
4873 case Intrinsic::sabsdiff:
4874 setValue(&I, DAG.getNode(ISD::SABSDIFF, sdl,
4875 getValue(I.getArgOperand(0)).getValueType(),
4876 getValue(I.getArgOperand(0)),
4877 getValue(I.getArgOperand(1))));
5307 return nullptr; 4878 return nullptr;
5308 case Intrinsic::cttz: { 4879 case Intrinsic::cttz: {
5309 SDValue Arg = getValue(I.getArgOperand(0)); 4880 SDValue Arg = getValue(I.getArgOperand(0));
5310 ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); 4881 ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
5311 EVT Ty = Arg.getValueType(); 4882 EVT Ty = Arg.getValueType();
5327 setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); 4898 setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));
5328 return nullptr; 4899 return nullptr;
5329 } 4900 }
5330 case Intrinsic::stacksave: { 4901 case Intrinsic::stacksave: {
5331 SDValue Op = getRoot(); 4902 SDValue Op = getRoot();
5332 Res = DAG.getNode(ISD::STACKSAVE, sdl, 4903 Res = DAG.getNode(
5333 DAG.getVTList(TLI.getPointerTy(), MVT::Other), Op); 4904 ISD::STACKSAVE, sdl,
4905 DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op);
5334 setValue(&I, Res); 4906 setValue(&I, Res);
5335 DAG.setRoot(Res.getValue(1)); 4907 DAG.setRoot(Res.getValue(1));
5336 return nullptr; 4908 return nullptr;
5337 } 4909 }
5338 case Intrinsic::stackrestore: { 4910 case Intrinsic::stackrestore: {
5342 } 4914 }
5343 case Intrinsic::stackprotector: { 4915 case Intrinsic::stackprotector: {
5344 // Emit code into the DAG to store the stack guard onto the stack. 4916 // Emit code into the DAG to store the stack guard onto the stack.
5345 MachineFunction &MF = DAG.getMachineFunction(); 4917 MachineFunction &MF = DAG.getMachineFunction();
5346 MachineFrameInfo *MFI = MF.getFrameInfo(); 4918 MachineFrameInfo *MFI = MF.getFrameInfo();
5347 EVT PtrTy = TLI.getPointerTy(); 4919 EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
5348 SDValue Src, Chain = getRoot(); 4920 SDValue Src, Chain = getRoot();
5349 const Value *Ptr = cast<LoadInst>(I.getArgOperand(0))->getPointerOperand(); 4921 const Value *Ptr = cast<LoadInst>(I.getArgOperand(0))->getPointerOperand();
5350 const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr); 4922 const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr);
5351 4923
5352 // See if Ptr is a bitcast. If it is, look through it and see if we can get 4924 // See if Ptr is a bitcast. If it is, look through it and see if we can get
5388 MFI->setStackProtectorIndex(FI); 4960 MFI->setStackProtectorIndex(FI);
5389 4961
5390 SDValue FIN = DAG.getFrameIndex(FI, PtrTy); 4962 SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
5391 4963
5392 // Store the stack protector onto the stack. 4964 // Store the stack protector onto the stack.
5393 Res = DAG.getStore(Chain, sdl, Src, FIN, 4965 Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(
5394 MachinePointerInfo::getFixedStack(FI), 4966 DAG.getMachineFunction(), FI),
5395 true, false, 0); 4967 true, false, 0);
5396 setValue(&I, Res); 4968 setValue(&I, Res);
5397 DAG.setRoot(Res); 4969 DAG.setRoot(Res);
5398 return nullptr; 4970 return nullptr;
5399 } 4971 }
5405 4977
5406 SDValue Arg = getValue(I.getCalledValue()); 4978 SDValue Arg = getValue(I.getCalledValue());
5407 EVT Ty = Arg.getValueType(); 4979 EVT Ty = Arg.getValueType();
5408 4980
5409 if (CI->isZero()) 4981 if (CI->isZero())
5410 Res = DAG.getConstant(-1ULL, Ty); 4982 Res = DAG.getConstant(-1ULL, sdl, Ty);
5411 else 4983 else
5412 Res = DAG.getConstant(0, Ty); 4984 Res = DAG.getConstant(0, sdl, Ty);
5413 4985
5414 setValue(&I, Res); 4986 setValue(&I, Res);
5415 return nullptr; 4987 return nullptr;
5416 } 4988 }
5417 case Intrinsic::annotation: 4989 case Intrinsic::annotation:
5440 DAG.setRoot(Res); 5012 DAG.setRoot(Res);
5441 return nullptr; 5013 return nullptr;
5442 } 5014 }
5443 case Intrinsic::adjust_trampoline: { 5015 case Intrinsic::adjust_trampoline: {
5444 setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, 5016 setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
5445 TLI.getPointerTy(), 5017 TLI.getPointerTy(DAG.getDataLayout()),
5446 getValue(I.getArgOperand(0)))); 5018 getValue(I.getArgOperand(0))));
5447 return nullptr; 5019 return nullptr;
5448 } 5020 }
5449 case Intrinsic::gcroot: 5021 case Intrinsic::gcroot:
5450 if (GFI) { 5022 if (GFI) {
5468 return nullptr; 5040 return nullptr;
5469 } 5041 }
5470 5042
5471 case Intrinsic::debugtrap: 5043 case Intrinsic::debugtrap:
5472 case Intrinsic::trap: { 5044 case Intrinsic::trap: {
5473 StringRef TrapFuncName = TM.Options.getTrapFunctionName(); 5045 StringRef TrapFuncName =
5046 I.getAttributes()
5047 .getAttribute(AttributeSet::FunctionIndex, "trap-func-name")
5048 .getValueAsString();
5474 if (TrapFuncName.empty()) { 5049 if (TrapFuncName.empty()) {
5475 ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? 5050 ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
5476 ISD::TRAP : ISD::DEBUGTRAP; 5051 ISD::TRAP : ISD::DEBUGTRAP;
5477 DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); 5052 DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot()));
5478 return nullptr; 5053 return nullptr;
5479 } 5054 }
5480 TargetLowering::ArgListTy Args; 5055 TargetLowering::ArgListTy Args;
5481 5056
5482 TargetLowering::CallLoweringInfo CLI(DAG); 5057 TargetLowering::CallLoweringInfo CLI(DAG);
5483 CLI.setDebugLoc(sdl).setChain(getRoot()) 5058 CLI.setDebugLoc(sdl).setChain(getRoot()).setCallee(
5484 .setCallee(CallingConv::C, I.getType(), 5059 CallingConv::C, I.getType(),
5485 DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), 5060 DAG.getExternalSymbol(TrapFuncName.data(),
5486 std::move(Args), 0); 5061 TLI.getPointerTy(DAG.getDataLayout())),
5062 std::move(Args), 0);
5487 5063
5488 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); 5064 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
5489 DAG.setRoot(Result.second); 5065 DAG.setRoot(Result.second);
5490 return nullptr; 5066 return nullptr;
5491 } 5067 }
5537 // Stack coloring is not enabled in O0, discard region information. 5113 // Stack coloring is not enabled in O0, discard region information.
5538 if (TM.getOptLevel() == CodeGenOpt::None) 5114 if (TM.getOptLevel() == CodeGenOpt::None)
5539 return nullptr; 5115 return nullptr;
5540 5116
5541 SmallVector<Value *, 4> Allocas; 5117 SmallVector<Value *, 4> Allocas;
5542 GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL); 5118 GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL);
5543 5119
5544 for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), 5120 for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(),
5545 E = Allocas.end(); Object != E; ++Object) { 5121 E = Allocas.end(); Object != E; ++Object) {
5546 AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); 5122 AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
5547 5123
5557 5133
5558 int FI = SI->second; 5134 int FI = SI->second;
5559 5135
5560 SDValue Ops[2]; 5136 SDValue Ops[2];
5561 Ops[0] = getRoot(); 5137 Ops[0] = getRoot();
5562 Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); 5138 Ops[1] =
5139 DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true);
5563 unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); 5140 unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
5564 5141
5565 Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); 5142 Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops);
5566 DAG.setRoot(Res); 5143 DAG.setRoot(Res);
5567 } 5144 }
5568 return nullptr; 5145 return nullptr;
5569 } 5146 }
5570 case Intrinsic::invariant_start: 5147 case Intrinsic::invariant_start:
5571 // Discard region information. 5148 // Discard region information.
5572 setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); 5149 setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
5573 return nullptr; 5150 return nullptr;
5574 case Intrinsic::invariant_end: 5151 case Intrinsic::invariant_end:
5575 // Discard region information. 5152 // Discard region information.
5576 return nullptr; 5153 return nullptr;
5577 case Intrinsic::stackprotectorcheck: { 5154 case Intrinsic::stackprotectorcheck: {
5616 return nullptr; 5193 return nullptr;
5617 } 5194 }
5618 case Intrinsic::instrprof_increment: 5195 case Intrinsic::instrprof_increment:
5619 llvm_unreachable("instrprof failed to lower an increment"); 5196 llvm_unreachable("instrprof failed to lower an increment");
5620 5197
5621 case Intrinsic::frameallocate: { 5198 case Intrinsic::localescape: {
5622 MachineFunction &MF = DAG.getMachineFunction(); 5199 MachineFunction &MF = DAG.getMachineFunction();
5623 const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); 5200 const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
5624 5201
5625 // Do the allocation and map it as a normal value. 5202 // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
5626 // FIXME: Maybe we should add this to the alloca map so that we don't have 5203 // is the same on all targets.
5627 // to register allocate it? 5204 for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {
5628 uint64_t Size = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(); 5205 Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
5629 int Alloc = MF.getFrameInfo()->CreateFrameAllocation(Size); 5206 if (isa<ConstantPointerNull>(Arg))
5630 MVT PtrVT = TLI.getPointerTy(0); 5207 continue; // Skip null pointers. They represent a hole in index space.
5631 SDValue FIVal = DAG.getFrameIndex(Alloc, PtrVT); 5208 AllocaInst *Slot = cast<AllocaInst>(Arg);
5632 setValue(&I, FIVal); 5209 assert(FuncInfo.StaticAllocaMap.count(Slot) &&
5633 5210 "can only escape static allocas");
5634 // Directly emit a FRAME_ALLOC machine instr. Label assignment emission is 5211 int FI = FuncInfo.StaticAllocaMap[Slot];
5635 // the same on all targets. 5212 MCSymbol *FrameAllocSym =
5213 MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
5214 GlobalValue::getRealLinkageName(MF.getName()), Idx);
5215 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
5216 TII->get(TargetOpcode::LOCAL_ESCAPE))
5217 .addSym(FrameAllocSym)
5218 .addFrameIndex(FI);
5219 }
5220
5221 return nullptr;
5222 }
5223
5224 case Intrinsic::localrecover: {
5225 // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
5226 MachineFunction &MF = DAG.getMachineFunction();
5227 MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0);
5228
5229 // Get the symbol that defines the frame offset.
5230 auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
5231 auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
5232 unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX));
5636 MCSymbol *FrameAllocSym = 5233 MCSymbol *FrameAllocSym =
5637 MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName()); 5234 MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
5638 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, 5235 GlobalValue::getRealLinkageName(Fn->getName()), IdxVal);
5639 TII->get(TargetOpcode::FRAME_ALLOC)) 5236
5640 .addSym(FrameAllocSym) 5237 // Create a MCSymbol for the label to avoid any target lowering
5641 .addFrameIndex(Alloc);
5642
5643 return nullptr;
5644 }
5645
5646 case Intrinsic::framerecover: {
5647 // i8* @llvm.framerecover(i8* %fn, i8* %fp)
5648 MachineFunction &MF = DAG.getMachineFunction();
5649 MVT PtrVT = TLI.getPointerTy(0);
5650
5651 // Get the symbol that defines the frame offset.
5652 Function *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
5653 MCSymbol *FrameAllocSym =
5654 MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName());
5655
5656 // Create a TargetExternalSymbol for the label to avoid any target lowering
5657 // that would make this PC relative. 5238 // that would make this PC relative.
5658 StringRef Name = FrameAllocSym->getName(); 5239 SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT);
5659 assert(Name.size() == strlen(Name.data()) && "not null terminated");
5660 SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT);
5661 SDValue OffsetVal = 5240 SDValue OffsetVal =
5662 DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym); 5241 DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym);
5663 5242
5664 // Add the offset to the FP. 5243 // Add the offset to the FP.
5665 Value *FP = I.getArgOperand(1); 5244 Value *FP = I.getArgOperand(1);
5666 SDValue FPVal = getValue(FP); 5245 SDValue FPVal = getValue(FP);
5667 SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal); 5246 SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal);
5668 setValue(&I, Add); 5247 setValue(&I, Add);
5669 5248
5670 return nullptr; 5249 return nullptr;
5671 } 5250 }
5672 case Intrinsic::eh_begincatch: 5251
5673 case Intrinsic::eh_endcatch: 5252 case Intrinsic::eh_exceptionpointer:
5674 llvm_unreachable("begin/end catch intrinsics not lowered in codegen"); 5253 case Intrinsic::eh_exceptioncode: {
5254 // Get the exception pointer vreg, copy from it, and resize it to fit.
5255 const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0));
5256 MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
5257 const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
5258 unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
5259 SDValue N =
5260 DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
5261 N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
5262 setValue(&I, N);
5263 return nullptr;
5264 }
5675 } 5265 }
5676 } 5266 }
5677 5267
5678 std::pair<SDValue, SDValue> 5268 std::pair<SDValue, SDValue>
5679 SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, 5269 SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
5680 MachineBasicBlock *LandingPad) { 5270 const BasicBlock *EHPadBB) {
5681 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 5271 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
5682 MCSymbol *BeginLabel = nullptr; 5272 MCSymbol *BeginLabel = nullptr;
5683 5273
5684 if (LandingPad) { 5274 if (EHPadBB) {
5685 // Insert a label before the invoke call to mark the try range. This can be 5275 // Insert a label before the invoke call to mark the try range. This can be
5686 // used to detect deletion of the invoke via the MachineModuleInfo. 5276 // used to detect deletion of the invoke via the MachineModuleInfo.
5687 BeginLabel = MMI.getContext().CreateTempSymbol(); 5277 BeginLabel = MMI.getContext().createTempSymbol();
5688 5278
5689 // For SjLj, keep track of which landing pads go with which invokes 5279 // For SjLj, keep track of which landing pads go with which invokes
5690 // so as to maintain the ordering of pads in the LSDA. 5280 // so as to maintain the ordering of pads in the LSDA.
5691 unsigned CallSiteIndex = MMI.getCurrentCallSite(); 5281 unsigned CallSiteIndex = MMI.getCurrentCallSite();
5692 if (CallSiteIndex) { 5282 if (CallSiteIndex) {
5693 MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); 5283 MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
5694 LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex); 5284 LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
5695 5285
5696 // Now that the call site is handled, stop tracking it. 5286 // Now that the call site is handled, stop tracking it.
5697 MMI.setCurrentCallSite(0); 5287 MMI.setCurrentCallSite(0);
5698 } 5288 }
5699 5289
5722 PendingExports.clear(); 5312 PendingExports.clear();
5723 } else { 5313 } else {
5724 DAG.setRoot(Result.second); 5314 DAG.setRoot(Result.second);
5725 } 5315 }
5726 5316
5727 if (LandingPad) { 5317 if (EHPadBB) {
5728 // Insert a label at the end of the invoke call to mark the try range. This 5318 // Insert a label at the end of the invoke call to mark the try range. This
5729 // can be used to detect deletion of the invoke via the MachineModuleInfo. 5319 // can be used to detect deletion of the invoke via the MachineModuleInfo.
5730 MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); 5320 MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
5731 DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); 5321 DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
5732 5322
5733 // Inform MachineModuleInfo of range. 5323 // Inform MachineModuleInfo of range.
5734 MMI.addInvoke(LandingPad, BeginLabel, EndLabel); 5324 if (MMI.hasEHFunclets()) {
5325 WinEHFuncInfo &EHInfo =
5326 MMI.getWinEHFuncInfo(DAG.getMachineFunction().getFunction());
5327 EHInfo.addIPToStateRange(EHPadBB, BeginLabel, EndLabel);
5328 } else {
5329 MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
5330 }
5735 } 5331 }
5736 5332
5737 return Result; 5333 return Result;
5738 } 5334 }
5739 5335
5740 void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, 5336 void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
5741 bool isTailCall, 5337 bool isTailCall,
5742 MachineBasicBlock *LandingPad) { 5338 const BasicBlock *EHPadBB) {
5743 PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 5339 PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
5744 FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 5340 FunctionType *FTy = cast<FunctionType>(PT->getElementType());
5745 Type *RetTy = FTy->getReturnType(); 5341 Type *RetTy = FTy->getReturnType();
5746 5342
5747 TargetLowering::ArgListTy Args; 5343 TargetLowering::ArgListTy Args;
5760 Entry.Node = ArgNode; Entry.Ty = V->getType(); 5356 Entry.Node = ArgNode; Entry.Ty = V->getType();
5761 5357
5762 // Skip the first return-type Attribute to get to params. 5358 // Skip the first return-type Attribute to get to params.
5763 Entry.setAttributes(&CS, i - CS.arg_begin() + 1); 5359 Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
5764 Args.push_back(Entry); 5360 Args.push_back(Entry);
5361
5362 // If we have an explicit sret argument that is an Instruction, (i.e., it
5363 // might point to function-local memory), we can't meaningfully tail-call.
5364 if (Entry.isSRet && isa<Instruction>(V))
5365 isTailCall = false;
5765 } 5366 }
5766 5367
5767 // Check if target-independent constraints permit a tail call here. 5368 // Check if target-independent constraints permit a tail call here.
5768 // Target-dependent constraints are checked within TLI->LowerCallTo. 5369 // Target-dependent constraints are checked within TLI->LowerCallTo.
5769 if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) 5370 if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget()))
5798 else if (CS.getCalledValue()->getType()->isPointerTy()) // if it is a pointer access; ex) goto codesegmentPointer; 5399 else if (CS.getCalledValue()->getType()->isPointerTy()) // if it is a pointer access; ex) goto codesegmentPointer;
5799 DAG.getContext()->emitError(CS.getInstruction(), CS.getCaller()->getName() + 5400 DAG.getContext()->emitError(CS.getInstruction(), CS.getCaller()->getName() +
5800 " : Tail call elimination was failed on codesegment which is accessed by pointer!"); // we can't get name from Type... 5401 " : Tail call elimination was failed on codesegment which is accessed by pointer!"); // we can't get name from Type...
5801 } 5402 }
5802 #endif 5403 #endif
5803 std::pair<SDValue,SDValue> Result = lowerInvokable(CLI, LandingPad); 5404 std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
5804 5405
5805 if (Result.first.getNode()) 5406 if (Result.first.getNode())
5806 setValue(CS.getInstruction(), Result.first); 5407 setValue(CS.getInstruction(), Result.first);
5807 } 5408 }
5808 5409
5830 if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { 5431 if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
5831 // Cast pointer to the type we really want to load. 5432 // Cast pointer to the type we really want to load.
5832 LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), 5433 LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
5833 PointerType::getUnqual(LoadTy)); 5434 PointerType::getUnqual(LoadTy));
5834 5435
5835 if (const Constant *LoadCst = 5436 if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(
5836 ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), 5437 const_cast<Constant *>(LoadInput), *Builder.DL))
5837 Builder.DL))
5838 return Builder.getValue(LoadCst); 5438 return Builder.getValue(LoadCst);
5839 } 5439 }
5840 5440
5841 // Otherwise, we have to emit the load. If the pointer is to unfoldable but 5441 // Otherwise, we have to emit the load. If the pointer is to unfoldable but
5842 // still constant memory, the input chain can be the entry node. 5442 // still constant memory, the input chain can be the entry node.
5867 /// processIntegerCallValue - Record the value for an instruction that 5467 /// processIntegerCallValue - Record the value for an instruction that
5868 /// produces an integer result, converting the type where necessary. 5468 /// produces an integer result, converting the type where necessary.
5869 void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, 5469 void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
5870 SDValue Value, 5470 SDValue Value,
5871 bool IsSigned) { 5471 bool IsSigned) {
5872 EVT VT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); 5472 EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
5473 I.getType(), true);
5873 if (IsSigned) 5474 if (IsSigned)
5874 Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); 5475 Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
5875 else 5476 else
5876 Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT); 5477 Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT);
5877 setValue(&I, Value); 5478 setValue(&I, Value);
5892 return false; 5493 return false;
5893 5494
5894 const Value *Size = I.getArgOperand(2); 5495 const Value *Size = I.getArgOperand(2);
5895 const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); 5496 const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
5896 if (CSize && CSize->getZExtValue() == 0) { 5497 if (CSize && CSize->getZExtValue() == 0) {
5897 EVT CallVT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); 5498 EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
5898 setValue(&I, DAG.getConstant(0, CallVT)); 5499 I.getType(), true);
5500 setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT));
5899 return true; 5501 return true;
5900 } 5502 }
5901 5503
5902 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 5504 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
5903 std::pair<SDValue, SDValue> Res = 5505 std::pair<SDValue, SDValue> Res =
6176 RenameFn = visitIntrinsicCall(I, IID); 5778 RenameFn = visitIntrinsicCall(I, IID);
6177 if (!RenameFn) 5779 if (!RenameFn)
6178 return; 5780 return;
6179 } 5781 }
6180 } 5782 }
6181 if (unsigned IID = F->getIntrinsicID()) { 5783 if (Intrinsic::ID IID = F->getIntrinsicID()) {
6182 RenameFn = visitIntrinsicCall(I, IID); 5784 RenameFn = visitIntrinsicCall(I, IID);
6183 if (!RenameFn) 5785 if (!RenameFn)
6184 return; 5786 return;
6185 } 5787 }
6186 } 5788 }
6329 5931
6330 SDValue Callee; 5932 SDValue Callee;
6331 if (!RenameFn) 5933 if (!RenameFn)
6332 Callee = getValue(I.getCalledValue()); 5934 Callee = getValue(I.getCalledValue());
6333 else 5935 else
6334 Callee = DAG.getExternalSymbol(RenameFn, 5936 Callee = DAG.getExternalSymbol(
6335 DAG.getTargetLoweringInfo().getPointerTy()); 5937 RenameFn,
5938 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
6336 5939
6337 // Check if we can potentially perform a tail call. More detailed checking is 5940 // Check if we can potentially perform a tail call. More detailed checking is
6338 // be done within LowerCallTo, after more information about the call is known. 5941 // be done within LowerCallTo, after more information about the call is known.
6339 LowerCallTo(&I, Callee, I.isTailCall()); 5942 LowerCallTo(&I, Callee, I.isTailCall());
6340 } 5943 }
6359 } 5962 }
6360 5963
6361 /// getCallOperandValEVT - Return the EVT of the Value* that this operand 5964 /// getCallOperandValEVT - Return the EVT of the Value* that this operand
6362 /// corresponds to. If there is no Value* for this operand, it returns 5965 /// corresponds to. If there is no Value* for this operand, it returns
6363 /// MVT::Other. 5966 /// MVT::Other.
6364 EVT getCallOperandValEVT(LLVMContext &Context, 5967 EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI,
6365 const TargetLowering &TLI, 5968 const DataLayout &DL) const {
6366 const DataLayout *DL) const {
6367 if (!CallOperandVal) return MVT::Other; 5969 if (!CallOperandVal) return MVT::Other;
6368 5970
6369 if (isa<BasicBlock>(CallOperandVal)) 5971 if (isa<BasicBlock>(CallOperandVal))
6370 return TLI.getPointerTy(); 5972 return TLI.getPointerTy(DL);
6371 5973
6372 llvm::Type *OpTy = CallOperandVal->getType(); 5974 llvm::Type *OpTy = CallOperandVal->getType();
6373 5975
6374 // FIXME: code duplicated from TargetLowering::ParseConstraints(). 5976 // FIXME: code duplicated from TargetLowering::ParseConstraints().
6375 // If this is an indirect operand, the operand is a pointer to the 5977 // If this is an indirect operand, the operand is a pointer to the
6387 OpTy = STy->getElementType(0); 5989 OpTy = STy->getElementType(0);
6388 5990
6389 // If OpTy is not a single value, it may be a struct/union that we 5991 // If OpTy is not a single value, it may be a struct/union that we
6390 // can tile with integers. 5992 // can tile with integers.
6391 if (!OpTy->isSingleValueType() && OpTy->isSized()) { 5993 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6392 unsigned BitSize = DL->getTypeSizeInBits(OpTy); 5994 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
6393 switch (BitSize) { 5995 switch (BitSize) {
6394 default: break; 5996 default: break;
6395 case 1: 5997 case 1:
6396 case 8: 5998 case 8:
6397 case 16: 5999 case 16:
6401 OpTy = IntegerType::get(Context, BitSize); 6003 OpTy = IntegerType::get(Context, BitSize);
6402 break; 6004 break;
6403 } 6005 }
6404 } 6006 }
6405 6007
6406 return TLI.getValueType(OpTy, true); 6008 return TLI.getValueType(DL, OpTy, true);
6407 } 6009 }
6408 }; 6010 };
6409 6011
6410 typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; 6012 typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
6411 6013
6428 MachineFunction &MF = DAG.getMachineFunction(); 6030 MachineFunction &MF = DAG.getMachineFunction();
6429 SmallVector<unsigned, 4> Regs; 6031 SmallVector<unsigned, 4> Regs;
6430 6032
6431 // If this is a constraint for a single physreg, or a constraint for a 6033 // If this is a constraint for a single physreg, or a constraint for a
6432 // register class, find it. 6034 // register class, find it.
6433 std::pair<unsigned, const TargetRegisterClass*> PhysReg = 6035 std::pair<unsigned, const TargetRegisterClass *> PhysReg =
6434 TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, 6036 TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(),
6435 OpInfo.ConstraintVT); 6037 OpInfo.ConstraintCode,
6038 OpInfo.ConstraintVT);
6436 6039
6437 unsigned NumRegs = 1; 6040 unsigned NumRegs = 1;
6438 if (OpInfo.ConstraintVT != MVT::Other) { 6041 if (OpInfo.ConstraintVT != MVT::Other) {
6439 // If this is a FP input in an integer register (or visa versa) insert a bit 6042 // If this is a FP input in an integer register (or visa versa) insert a bit
6440 // cast of the input value. More generally, handle any case where the input 6043 // cast of the input value. More generally, handle any case where the input
6526 6129
6527 /// ConstraintOperands - Information about all of the constraints. 6130 /// ConstraintOperands - Information about all of the constraints.
6528 SDISelAsmOperandInfoVector ConstraintOperands; 6131 SDISelAsmOperandInfoVector ConstraintOperands;
6529 6132
6530 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6133 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6531 TargetLowering::AsmOperandInfoVector 6134 TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
6532 TargetConstraints = TLI.ParseConstraints(CS); 6135 DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS);
6533 6136
6534 bool hasMemory = false; 6137 bool hasMemory = false;
6535 6138
6536 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. 6139 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
6537 unsigned ResNo = 0; // ResNo - The result number of the next output. 6140 unsigned ResNo = 0; // ResNo - The result number of the next output.
6552 6155
6553 // The return value of the call is this value. As such, there is no 6156 // The return value of the call is this value. As such, there is no
6554 // corresponding argument. 6157 // corresponding argument.
6555 assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); 6158 assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
6556 if (StructType *STy = dyn_cast<StructType>(CS.getType())) { 6159 if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
6557 OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo)); 6160 OpVT = TLI.getSimpleValueType(DAG.getDataLayout(),
6161 STy->getElementType(ResNo));
6558 } else { 6162 } else {
6559 assert(ResNo == 0 && "Asm only has one result!"); 6163 assert(ResNo == 0 && "Asm only has one result!");
6560 OpVT = TLI.getSimpleValueType(CS.getType()); 6164 OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
6561 } 6165 }
6562 ++ResNo; 6166 ++ResNo;
6563 break; 6167 break;
6564 case InlineAsm::isInput: 6168 case InlineAsm::isInput:
6565 OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); 6169 OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
6576 OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); 6180 OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
6577 } else { 6181 } else {
6578 OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); 6182 OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
6579 } 6183 }
6580 6184
6581 OpVT = 6185 OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI,
6582 OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, DL).getSimpleVT(); 6186 DAG.getDataLayout()).getSimpleVT();
6583 } 6187 }
6584 6188
6585 OpInfo.ConstraintVT = OpVT; 6189 OpInfo.ConstraintVT = OpVT;
6586 6190
6587 // Indirect operand accesses access memory. 6191 // Indirect operand accesses access memory.
6619 // error. 6223 // error.
6620 if (OpInfo.hasMatchingInput()) { 6224 if (OpInfo.hasMatchingInput()) {
6621 SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; 6225 SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6622 6226
6623 if (OpInfo.ConstraintVT != Input.ConstraintVT) { 6227 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6624 std::pair<unsigned, const TargetRegisterClass*> MatchRC = 6228 const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
6625 TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, 6229 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6626 OpInfo.ConstraintVT); 6230 TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6627 std::pair<unsigned, const TargetRegisterClass*> InputRC = 6231 OpInfo.ConstraintVT);
6628 TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, 6232 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6629 Input.ConstraintVT); 6233 TLI.getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
6234 Input.ConstraintVT);
6630 if ((OpInfo.ConstraintVT.isInteger() != 6235 if ((OpInfo.ConstraintVT.isInteger() !=
6631 Input.ConstraintVT.isInteger()) || 6236 Input.ConstraintVT.isInteger()) ||
6632 (MatchRC.second != InputRC.second)) { 6237 (MatchRC.second != InputRC.second)) {
6633 report_fatal_error("Unsupported asm: input constraint" 6238 report_fatal_error("Unsupported asm: input constraint"
6634 " with a matching output constraint of" 6239 " with a matching output constraint of"
6664 // If the operand is a float, integer, or vector constant, spill to a 6269 // If the operand is a float, integer, or vector constant, spill to a
6665 // constant pool entry to get its address. 6270 // constant pool entry to get its address.
6666 const Value *OpVal = OpInfo.CallOperandVal; 6271 const Value *OpVal = OpInfo.CallOperandVal;
6667 if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || 6272 if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
6668 isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { 6273 isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
6669 OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), 6274 OpInfo.CallOperand = DAG.getConstantPool(
6670 TLI.getPointerTy()); 6275 cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
6671 } else { 6276 } else {
6672 // Otherwise, create a stack slot and emit a store to it before the 6277 // Otherwise, create a stack slot and emit a store to it before the
6673 // asm. 6278 // asm.
6674 Type *Ty = OpVal->getType(); 6279 Type *Ty = OpVal->getType();
6675 uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); 6280 auto &DL = DAG.getDataLayout();
6676 unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty); 6281 uint64_t TySize = DL.getTypeAllocSize(Ty);
6282 unsigned Align = DL.getPrefTypeAlignment(Ty);
6677 MachineFunction &MF = DAG.getMachineFunction(); 6283 MachineFunction &MF = DAG.getMachineFunction();
6678 int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); 6284 int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
6679 SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); 6285 SDValue StackSlot =
6680 Chain = DAG.getStore(Chain, getCurSDLoc(), 6286 DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout()));
6681 OpInfo.CallOperand, StackSlot, 6287 Chain = DAG.getStore(
6682 MachinePointerInfo::getFixedStack(SSFI), 6288 Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot,
6683 false, false, 0); 6289 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
6290 false, false, 0);
6684 OpInfo.CallOperand = StackSlot; 6291 OpInfo.CallOperand = StackSlot;
6685 } 6292 }
6686 6293
6687 // There is no longer a Value* corresponding to this operand. 6294 // There is no longer a Value* corresponding to this operand.
6688 OpInfo.CallOperandVal = nullptr; 6295 OpInfo.CallOperandVal = nullptr;
6709 } 6316 }
6710 6317
6711 // AsmNodeOperands - The operands for the ISD::INLINEASM node. 6318 // AsmNodeOperands - The operands for the ISD::INLINEASM node.
6712 std::vector<SDValue> AsmNodeOperands; 6319 std::vector<SDValue> AsmNodeOperands;
6713 AsmNodeOperands.push_back(SDValue()); // reserve space for input chain 6320 AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
6714 AsmNodeOperands.push_back( 6321 AsmNodeOperands.push_back(DAG.getTargetExternalSymbol(
6715 DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), 6322 IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout())));
6716 TLI.getPointerTy()));
6717 6323
6718 // If we have a !srcloc metadata node associated with it, we want to attach 6324 // If we have a !srcloc metadata node associated with it, we want to attach
6719 // this to the ultimately generated inline asm machineinstr. To do this, we 6325 // this to the ultimately generated inline asm machineinstr. To do this, we
6720 // pass in the third operand as this (potentially null) inline asm MDNode. 6326 // pass in the third operand as this (potentially null) inline asm MDNode.
6721 const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); 6327 const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
6751 else if (OpInfo.Type == InlineAsm::isClobber) 6357 else if (OpInfo.Type == InlineAsm::isClobber)
6752 ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); 6358 ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
6753 } 6359 }
6754 } 6360 }
6755 6361
6756 AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, 6362 AsmNodeOperands.push_back(DAG.getTargetConstant(
6757 TLI.getPointerTy())); 6363 ExtraInfo, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
6758 6364
6759 // Loop over all of the inputs, copying the operand values into the 6365 // Loop over all of the inputs, copying the operand values into the
6760 // appropriate registers and processing the output regs. 6366 // appropriate registers and processing the output regs.
6761 RegsForValue RetValRegs; 6367 RegsForValue RetValRegs;
6762 6368
6771 if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && 6377 if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
6772 OpInfo.ConstraintType != TargetLowering::C_Register) { 6378 OpInfo.ConstraintType != TargetLowering::C_Register) {
6773 // Memory output, or 'other' output (e.g. 'X' constraint). 6379 // Memory output, or 'other' output (e.g. 'X' constraint).
6774 assert(OpInfo.isIndirect && "Memory output must be indirect operand"); 6380 assert(OpInfo.isIndirect && "Memory output must be indirect operand");
6775 6381
6382 unsigned ConstraintID =
6383 TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
6384 assert(ConstraintID != InlineAsm::Constraint_Unknown &&
6385 "Failed to convert memory constraint code to constraint id.");
6386
6776 // Add information to the INLINEASM node to know about this output. 6387 // Add information to the INLINEASM node to know about this output.
6777 unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); 6388 unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
6778 AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, 6389 OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
6779 TLI.getPointerTy())); 6390 AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
6391 MVT::i32));
6780 AsmNodeOperands.push_back(OpInfo.CallOperand); 6392 AsmNodeOperands.push_back(OpInfo.CallOperand);
6781 break; 6393 break;
6782 } 6394 }
6783 6395
6784 // Otherwise, this is a register or register class output. 6396 // Otherwise, this is a register or register class output.
6809 // set. 6421 // set.
6810 OpInfo.AssignedRegs 6422 OpInfo.AssignedRegs
6811 .AddInlineAsmOperands(OpInfo.isEarlyClobber 6423 .AddInlineAsmOperands(OpInfo.isEarlyClobber
6812 ? InlineAsm::Kind_RegDefEarlyClobber 6424 ? InlineAsm::Kind_RegDefEarlyClobber
6813 : InlineAsm::Kind_RegDef, 6425 : InlineAsm::Kind_RegDef,
6814 false, 0, DAG, AsmNodeOperands); 6426 false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
6815 break; 6427 break;
6816 } 6428 }
6817 case InlineAsm::isInput: { 6429 case InlineAsm::isInput: {
6818 SDValue InOperandVal = OpInfo.CallOperand; 6430 SDValue InOperandVal = OpInfo.CallOperand;
6819 6431
6864 "inline asm error: This value" 6476 "inline asm error: This value"
6865 " type register class is not natively supported!"); 6477 " type register class is not natively supported!");
6866 return; 6478 return;
6867 } 6479 }
6868 } 6480 }
6481 SDLoc dl = getCurSDLoc();
6869 // Use the produced MatchedRegs object to 6482 // Use the produced MatchedRegs object to
6870 MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), 6483 MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl,
6871 Chain, &Flag, CS.getInstruction()); 6484 Chain, &Flag, CS.getInstruction());
6872 MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, 6485 MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
6873 true, OpInfo.getMatchedOperand(), 6486 true, OpInfo.getMatchedOperand(), dl,
6874 DAG, AsmNodeOperands); 6487 DAG, AsmNodeOperands);
6875 break; 6488 break;
6876 } 6489 }
6877 6490
6878 assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); 6491 assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
6879 assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && 6492 assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
6880 "Unexpected number of operands"); 6493 "Unexpected number of operands");
6881 // Add information to the INLINEASM node to know about this input. 6494 // Add information to the INLINEASM node to know about this input.
6882 // See InlineAsm.h isUseOperandTiedToDef. 6495 // See InlineAsm.h isUseOperandTiedToDef.
6496 OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
6883 OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, 6497 OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
6884 OpInfo.getMatchedOperand()); 6498 OpInfo.getMatchedOperand());
6885 AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, 6499 AsmNodeOperands.push_back(DAG.getTargetConstant(
6886 TLI.getPointerTy())); 6500 OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
6887 AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); 6501 AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
6888 break; 6502 break;
6889 } 6503 }
6890 6504
6891 // Treat indirect 'X' constraint as memory. 6505 // Treat indirect 'X' constraint as memory.
6906 } 6520 }
6907 6521
6908 // Add information to the INLINEASM node to know about this input. 6522 // Add information to the INLINEASM node to know about this input.
6909 unsigned ResOpType = 6523 unsigned ResOpType =
6910 InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); 6524 InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
6911 AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, 6525 AsmNodeOperands.push_back(DAG.getTargetConstant(
6912 TLI.getPointerTy())); 6526 ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
6913 AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); 6527 AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
6914 break; 6528 break;
6915 } 6529 }
6916 6530
6917 if (OpInfo.ConstraintType == TargetLowering::C_Memory) { 6531 if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
6918 assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); 6532 assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
6919 assert(InOperandVal.getValueType() == TLI.getPointerTy() && 6533 assert(InOperandVal.getValueType() ==
6534 TLI.getPointerTy(DAG.getDataLayout()) &&
6920 "Memory operands expect pointer values"); 6535 "Memory operands expect pointer values");
6536
6537 unsigned ConstraintID =
6538 TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
6539 assert(ConstraintID != InlineAsm::Constraint_Unknown &&
6540 "Failed to convert memory constraint code to constraint id.");
6921 6541
6922 // Add information to the INLINEASM node to know about this input. 6542 // Add information to the INLINEASM node to know about this input.
6923 unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); 6543 unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
6544 ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
6924 AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, 6545 AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
6925 TLI.getPointerTy())); 6546 getCurSDLoc(),
6547 MVT::i32));
6926 AsmNodeOperands.push_back(InOperandVal); 6548 AsmNodeOperands.push_back(InOperandVal);
6927 break; 6549 break;
6928 } 6550 }
6929 6551
6930 assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || 6552 assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
6948 "couldn't allocate input reg for constraint '" + 6570 "couldn't allocate input reg for constraint '" +
6949 Twine(OpInfo.ConstraintCode) + "'"); 6571 Twine(OpInfo.ConstraintCode) + "'");
6950 return; 6572 return;
6951 } 6573 }
6952 6574
6953 OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), 6575 SDLoc dl = getCurSDLoc();
6576
6577 OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl,
6954 Chain, &Flag, CS.getInstruction()); 6578 Chain, &Flag, CS.getInstruction());
6955 6579
6956 OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, 6580 OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
6957 DAG, AsmNodeOperands); 6581 dl, DAG, AsmNodeOperands);
6958 break; 6582 break;
6959 } 6583 }
6960 case InlineAsm::isClobber: { 6584 case InlineAsm::isClobber: {
6961 // Add the clobbered value to the operand list, so that the register 6585 // Add the clobbered value to the operand list, so that the register
6962 // allocator is aware that the physreg got clobbered. 6586 // allocator is aware that the physreg got clobbered.
6963 if (!OpInfo.AssignedRegs.Regs.empty()) 6587 if (!OpInfo.AssignedRegs.Regs.empty())
6964 OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, 6588 OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
6965 false, 0, DAG, 6589 false, 0, getCurSDLoc(), DAG,
6966 AsmNodeOperands); 6590 AsmNodeOperands);
6967 break; 6591 break;
6968 } 6592 }
6969 } 6593 }
6970 } 6594 }
6983 SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), 6607 SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
6984 Chain, &Flag, CS.getInstruction()); 6608 Chain, &Flag, CS.getInstruction());
6985 6609
6986 // FIXME: Why don't we do this for inline asms with MRVs? 6610 // FIXME: Why don't we do this for inline asms with MRVs?
6987 if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { 6611 if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
6988 EVT ResultType = TLI.getValueType(CS.getType()); 6612 EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType());
6989 6613
6990 // If any of the results of the inline asm is a vector, it may have the 6614 // If any of the results of the inline asm is a vector, it may have the
6991 // wrong width/num elts. This can happen for register classes that can 6615 // wrong width/num elts. This can happen for register classes that can
6992 // contain multiple different value types. The preg or vreg allocated may 6616 // contain multiple different value types. The preg or vreg allocated may
6993 // not have the same VT as was expected. Convert it to the right type 6617 // not have the same VT as was expected. Convert it to the right type
7049 DAG.getSrcValue(I.getArgOperand(0)))); 6673 DAG.getSrcValue(I.getArgOperand(0))));
7050 } 6674 }
7051 6675
7052 void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { 6676 void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
7053 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6677 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7054 const DataLayout &DL = *TLI.getDataLayout(); 6678 const DataLayout &DL = DAG.getDataLayout();
7055 SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurSDLoc(), 6679 SDValue V = DAG.getVAArg(TLI.getValueType(DAG.getDataLayout(), I.getType()),
7056 getRoot(), getValue(I.getOperand(0)), 6680 getCurSDLoc(), getRoot(), getValue(I.getOperand(0)),
7057 DAG.getSrcValue(I.getOperand(0)), 6681 DAG.getSrcValue(I.getOperand(0)),
7058 DL.getABITypeAlignment(I.getType())); 6682 DL.getABITypeAlignment(I.getType()));
7059 setValue(&I, V); 6683 setValue(&I, V);
7060 DAG.setRoot(V.getValue(1)); 6684 DAG.setRoot(V.getValue(1));
7061 } 6685 }
7081 /// \return A tuple of <return-value, token-chain> 6705 /// \return A tuple of <return-value, token-chain>
7082 /// 6706 ///
7083 /// This is a helper for lowering intrinsics that follow a target calling 6707 /// This is a helper for lowering intrinsics that follow a target calling
7084 /// convention or require stack pointer adjustment. Only a subset of the 6708 /// convention or require stack pointer adjustment. Only a subset of the
7085 /// intrinsic's operands need to participate in the calling convention. 6709 /// intrinsic's operands need to participate in the calling convention.
7086 std::pair<SDValue, SDValue> 6710 std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands(
7087 SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, 6711 ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee,
7088 unsigned NumArgs, SDValue Callee, 6712 Type *ReturnTy, const BasicBlock *EHPadBB, bool IsPatchPoint) {
7089 bool UseVoidTy,
7090 MachineBasicBlock *LandingPad,
7091 bool IsPatchPoint) {
7092 TargetLowering::ArgListTy Args; 6713 TargetLowering::ArgListTy Args;
7093 Args.reserve(NumArgs); 6714 Args.reserve(NumArgs);
7094 6715
7095 // Populate the argument list. 6716 // Populate the argument list.
7096 // Attributes for args start at offset 1, after the return attribute. 6717 // Attributes for args start at offset 1, after the return attribute.
7105 Entry.Ty = V->getType(); 6726 Entry.Ty = V->getType();
7106 Entry.setAttributes(&CS, AttrI); 6727 Entry.setAttributes(&CS, AttrI);
7107 Args.push_back(Entry); 6728 Args.push_back(Entry);
7108 } 6729 }
7109 6730
7110 Type *retTy = UseVoidTy ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
7111 TargetLowering::CallLoweringInfo CLI(DAG); 6731 TargetLowering::CallLoweringInfo CLI(DAG);
7112 CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) 6732 CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
7113 .setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) 6733 .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs)
7114 .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); 6734 .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint);
7115 6735
7116 return lowerInvokable(CLI, LandingPad); 6736 return lowerInvokable(CLI, EHPadBB);
7117 } 6737 }
7118 6738
7119 /// \brief Add a stack map intrinsic call's live variable operands to a stackmap 6739 /// \brief Add a stack map intrinsic call's live variable operands to a stackmap
7120 /// or patchpoint target node's operand list. 6740 /// or patchpoint target node's operand list.
7121 /// 6741 ///
7132 /// location is valid at any point during execution (this is similar to the 6752 /// location is valid at any point during execution (this is similar to the
7133 /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were 6753 /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
7134 /// only available in a register, then the runtime would need to trap when 6754 /// only available in a register, then the runtime would need to trap when
7135 /// execution reaches the StackMap in order to read the alloca's location. 6755 /// execution reaches the StackMap in order to read the alloca's location.
7136 static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, 6756 static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
7137 SmallVectorImpl<SDValue> &Ops, 6757 SDLoc DL, SmallVectorImpl<SDValue> &Ops,
7138 SelectionDAGBuilder &Builder) { 6758 SelectionDAGBuilder &Builder) {
7139 for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { 6759 for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) {
7140 SDValue OpVal = Builder.getValue(CS.getArgument(i)); 6760 SDValue OpVal = Builder.getValue(CS.getArgument(i));
7141 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { 6761 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
7142 Ops.push_back( 6762 Ops.push_back(
7143 Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); 6763 Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
7144 Ops.push_back( 6764 Ops.push_back(
7145 Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); 6765 Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64));
7146 } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { 6766 } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
7147 const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); 6767 const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
7148 Ops.push_back( 6768 Ops.push_back(Builder.DAG.getTargetFrameIndex(
7149 Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); 6769 FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout())));
7150 } else 6770 } else
7151 Ops.push_back(OpVal); 6771 Ops.push_back(OpVal);
7152 } 6772 }
7153 } 6773 }
7154 6774
7162 SDValue Chain, InFlag, Callee, NullPtr; 6782 SDValue Chain, InFlag, Callee, NullPtr;
7163 SmallVector<SDValue, 32> Ops; 6783 SmallVector<SDValue, 32> Ops;
7164 6784
7165 SDLoc DL = getCurSDLoc(); 6785 SDLoc DL = getCurSDLoc();
7166 Callee = getValue(CI.getCalledValue()); 6786 Callee = getValue(CI.getCalledValue());
7167 NullPtr = DAG.getIntPtrConstant(0, true); 6787 NullPtr = DAG.getIntPtrConstant(0, DL, true);
7168 6788
7169 // The stackmap intrinsic only records the live variables (the arguemnts 6789 // The stackmap intrinsic only records the live variables (the arguemnts
7170 // passed to it) and emits NOPS (if requested). Unlike the patchpoint 6790 // passed to it) and emits NOPS (if requested). Unlike the patchpoint
7171 // intrinsic, this won't be lowered to a function call. This means we don't 6791 // intrinsic, this won't be lowered to a function call. This means we don't
7172 // have to worry about calling conventions and target specific lowering code. 6792 // have to worry about calling conventions and target specific lowering code.
7180 InFlag = Chain.getValue(1); 6800 InFlag = Chain.getValue(1);
7181 6801
7182 // Add the <id> and <numBytes> constants. 6802 // Add the <id> and <numBytes> constants.
7183 SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); 6803 SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
7184 Ops.push_back(DAG.getTargetConstant( 6804 Ops.push_back(DAG.getTargetConstant(
7185 cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); 6805 cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64));
7186 SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); 6806 SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
7187 Ops.push_back(DAG.getTargetConstant( 6807 Ops.push_back(DAG.getTargetConstant(
7188 cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); 6808 cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL,
6809 MVT::i32));
7189 6810
7190 // Push live variables for the stack map. 6811 // Push live variables for the stack map.
7191 addStackMapLiveVars(&CI, 2, Ops, *this); 6812 addStackMapLiveVars(&CI, 2, DL, Ops, *this);
7192 6813
7193 // We are not pushing any register mask info here on the operands list, 6814 // We are not pushing any register mask info here on the operands list,
7194 // because the stackmap doesn't clobber anything. 6815 // because the stackmap doesn't clobber anything.
7195 6816
7196 // Push the chain and the glue flag. 6817 // Push the chain and the glue flag.
7214 FuncInfo.MF->getFrameInfo()->setHasStackMap(); 6835 FuncInfo.MF->getFrameInfo()->setHasStackMap();
7215 } 6836 }
7216 6837
7217 /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. 6838 /// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
7218 void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, 6839 void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
7219 MachineBasicBlock *LandingPad) { 6840 const BasicBlock *EHPadBB) {
7220 // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, 6841 // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
7221 // i32 <numBytes>, 6842 // i32 <numBytes>,
7222 // i8* <target>, 6843 // i8* <target>,
7223 // i32 <numArgs>, 6844 // i32 <numArgs>,
7224 // [Args...], 6845 // [Args...],
7225 // [live variables...]) 6846 // [live variables...])
7226 6847
7227 CallingConv::ID CC = CS.getCallingConv(); 6848 CallingConv::ID CC = CS.getCallingConv();
7228 bool IsAnyRegCC = CC == CallingConv::AnyReg; 6849 bool IsAnyRegCC = CC == CallingConv::AnyReg;
7229 bool HasDef = !CS->getType()->isVoidTy(); 6850 bool HasDef = !CS->getType()->isVoidTy();
7230 SDValue Callee = getValue(CS->getOperand(2)); // <target> 6851 SDLoc dl = getCurSDLoc();
6852 SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos));
6853
6854 // Handle immediate and symbolic callees.
6855 if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee))
6856 Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl,
6857 /*isTarget=*/true);
6858 else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee))
6859 Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(),
6860 SDLoc(SymbolicCallee),
6861 SymbolicCallee->getValueType(0));
7231 6862
7232 // Get the real number of arguments participating in the call <numArgs> 6863 // Get the real number of arguments participating in the call <numArgs>
7233 SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos)); 6864 SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos));
7234 unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue(); 6865 unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();
7235 6866
7239 assert(CS.arg_size() >= NumMetaOpers + NumArgs && 6870 assert(CS.arg_size() >= NumMetaOpers + NumArgs &&
7240 "Not enough arguments provided to the patchpoint intrinsic"); 6871 "Not enough arguments provided to the patchpoint intrinsic");
7241 6872
7242 // For AnyRegCC the arguments are lowered later on manually. 6873 // For AnyRegCC the arguments are lowered later on manually.
7243 unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; 6874 unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
7244 std::pair<SDValue, SDValue> Result = 6875 Type *ReturnTy =
7245 lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, 6876 IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
7246 LandingPad, true); 6877 std::pair<SDValue, SDValue> Result = lowerCallOperands(
6878 CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, EHPadBB, true);
7247 6879
7248 SDNode *CallEnd = Result.second.getNode(); 6880 SDNode *CallEnd = Result.second.getNode();
7249 if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) 6881 if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
7250 CallEnd = CallEnd->getOperand(0).getNode(); 6882 CallEnd = CallEnd->getOperand(0).getNode();
7251 6883
7260 SmallVector<SDValue, 8> Ops; 6892 SmallVector<SDValue, 8> Ops;
7261 6893
7262 // Add the <id> and <numBytes> constants. 6894 // Add the <id> and <numBytes> constants.
7263 SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos)); 6895 SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos));
7264 Ops.push_back(DAG.getTargetConstant( 6896 Ops.push_back(DAG.getTargetConstant(
7265 cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); 6897 cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64));
7266 SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos)); 6898 SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos));
7267 Ops.push_back(DAG.getTargetConstant( 6899 Ops.push_back(DAG.getTargetConstant(
7268 cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); 6900 cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl,
7269 6901 MVT::i32));
7270 // Assume that the Callee is a constant address. 6902
7271 // FIXME: handle function symbols in the future. 6903 // Add the callee.
7272 Ops.push_back( 6904 Ops.push_back(Callee);
7273 DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(),
7274 /*isTarget=*/true));
7275 6905
7276 // Adjust <numArgs> to account for any arguments that have been passed on the 6906 // Adjust <numArgs> to account for any arguments that have been passed on the
7277 // stack instead. 6907 // stack instead.
7278 // Call Node: Chain, Target, {Args}, RegMask, [Glue] 6908 // Call Node: Chain, Target, {Args}, RegMask, [Glue]
7279 unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3); 6909 unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3);
7280 NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs; 6910 NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs;
7281 Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); 6911 Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32));
7282 6912
7283 // Add the calling convention 6913 // Add the calling convention
7284 Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32)); 6914 Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32));
7285 6915
7286 // Add the arguments we omitted previously. The register allocator should 6916 // Add the arguments we omitted previously. The register allocator should
7287 // place these in any free register. 6917 // place these in any free register.
7288 if (IsAnyRegCC) 6918 if (IsAnyRegCC)
7289 for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) 6919 for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
7292 // Push the arguments from the call instruction up to the register mask. 6922 // Push the arguments from the call instruction up to the register mask.
7293 SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; 6923 SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
7294 Ops.append(Call->op_begin() + 2, e); 6924 Ops.append(Call->op_begin() + 2, e);
7295 6925
7296 // Push live variables for the stack map. 6926 // Push live variables for the stack map.
7297 addStackMapLiveVars(CS, NumMetaOpers + NumArgs, Ops, *this); 6927 addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this);
7298 6928
7299 // Push the register mask info. 6929 // Push the register mask info.
7300 if (HasGlue) 6930 if (HasGlue)
7301 Ops.push_back(*(Call->op_end()-2)); 6931 Ops.push_back(*(Call->op_end()-2));
7302 else 6932 else
7313 SDVTList NodeTys; 6943 SDVTList NodeTys;
7314 if (IsAnyRegCC && HasDef) { 6944 if (IsAnyRegCC && HasDef) {
7315 // Create the return types based on the intrinsic definition 6945 // Create the return types based on the intrinsic definition
7316 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6946 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7317 SmallVector<EVT, 3> ValueVTs; 6947 SmallVector<EVT, 3> ValueVTs;
7318 ComputeValueVTs(TLI, CS->getType(), ValueVTs); 6948 ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs);
7319 assert(ValueVTs.size() == 1 && "Expected only one return value type."); 6949 assert(ValueVTs.size() == 1 && "Expected only one return value type.");
7320 6950
7321 // There is always a chain and a glue type at the end 6951 // There is always a chain and a glue type at the end
7322 ValueVTs.push_back(MVT::Other); 6952 ValueVTs.push_back(MVT::Other);
7323 ValueVTs.push_back(MVT::Glue); 6953 ValueVTs.push_back(MVT::Glue);
7325 } else 6955 } else
7326 NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 6956 NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
7327 6957
7328 // Replace the target specific call node with a PATCHPOINT node. 6958 // Replace the target specific call node with a PATCHPOINT node.
7329 MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, 6959 MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT,
7330 getCurSDLoc(), NodeTys, Ops); 6960 dl, NodeTys, Ops);
7331 6961
7332 // Update the NodeMap. 6962 // Update the NodeMap.
7333 if (HasDef) { 6963 if (HasDef) {
7334 if (IsAnyRegCC) 6964 if (IsAnyRegCC)
7335 setValue(CS.getInstruction(), SDValue(MN, 0)); 6965 setValue(CS.getInstruction(), SDValue(MN, 0));
7377 // Handle the incoming return values from the call. 7007 // Handle the incoming return values from the call.
7378 CLI.Ins.clear(); 7008 CLI.Ins.clear();
7379 Type *OrigRetTy = CLI.RetTy; 7009 Type *OrigRetTy = CLI.RetTy;
7380 SmallVector<EVT, 4> RetTys; 7010 SmallVector<EVT, 4> RetTys;
7381 SmallVector<uint64_t, 4> Offsets; 7011 SmallVector<uint64_t, 4> Offsets;
7382 ComputeValueVTs(*this, CLI.RetTy, RetTys, &Offsets); 7012 auto &DL = CLI.DAG.getDataLayout();
7013 ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);
7383 7014
7384 SmallVector<ISD::OutputArg, 4> Outs; 7015 SmallVector<ISD::OutputArg, 4> Outs;
7385 GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this); 7016 GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);
7386 7017
7387 bool CanLowerReturn = 7018 bool CanLowerReturn =
7388 this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), 7019 this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
7389 CLI.IsVarArg, Outs, CLI.RetTy->getContext()); 7020 CLI.IsVarArg, Outs, CLI.RetTy->getContext());
7390 7021
7392 int DemoteStackIdx = -100; 7023 int DemoteStackIdx = -100;
7393 if (!CanLowerReturn) { 7024 if (!CanLowerReturn) {
7394 // FIXME: equivalent assert? 7025 // FIXME: equivalent assert?
7395 // assert(!CS.hasInAllocaArgument() && 7026 // assert(!CS.hasInAllocaArgument() &&
7396 // "sret demotion is incompatible with inalloca"); 7027 // "sret demotion is incompatible with inalloca");
7397 uint64_t TySize = getDataLayout()->getTypeAllocSize(CLI.RetTy); 7028 uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
7398 unsigned Align = getDataLayout()->getPrefTypeAlignment(CLI.RetTy); 7029 unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy);
7399 MachineFunction &MF = CLI.DAG.getMachineFunction(); 7030 MachineFunction &MF = CLI.DAG.getMachineFunction();
7400 DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); 7031 DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
7401 Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); 7032 Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
7402 7033
7403 DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy()); 7034 DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL));
7404 ArgListEntry Entry; 7035 ArgListEntry Entry;
7405 Entry.Node = DemoteStackSlot; 7036 Entry.Node = DemoteStackSlot;
7406 Entry.Ty = StackSlotPtrType; 7037 Entry.Ty = StackSlotPtrType;
7407 Entry.isSExt = false; 7038 Entry.isSExt = false;
7408 Entry.isZExt = false; 7039 Entry.isZExt = false;
7412 Entry.isByVal = false; 7043 Entry.isByVal = false;
7413 Entry.isReturned = false; 7044 Entry.isReturned = false;
7414 Entry.Alignment = Align; 7045 Entry.Alignment = Align;
7415 CLI.getArgs().insert(CLI.getArgs().begin(), Entry); 7046 CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
7416 CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); 7047 CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
7048
7049 // sret demotion isn't compatible with tail-calls, since the sret argument
7050 // points into the callers stack frame.
7051 CLI.IsTailCall = false;
7417 } else { 7052 } else {
7418 for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { 7053 for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
7419 EVT VT = RetTys[I]; 7054 EVT VT = RetTys[I];
7420 MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); 7055 MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
7421 unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); 7056 unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
7439 CLI.Outs.clear(); 7074 CLI.Outs.clear();
7440 CLI.OutVals.clear(); 7075 CLI.OutVals.clear();
7441 ArgListTy &Args = CLI.getArgs(); 7076 ArgListTy &Args = CLI.getArgs();
7442 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 7077 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
7443 SmallVector<EVT, 4> ValueVTs; 7078 SmallVector<EVT, 4> ValueVTs;
7444 ComputeValueVTs(*this, Args[i].Ty, ValueVTs); 7079 ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
7445 Type *FinalType = Args[i].Ty; 7080 Type *FinalType = Args[i].Ty;
7446 if (Args[i].isByVal) 7081 if (Args[i].isByVal)
7447 FinalType = cast<PointerType>(Args[i].Ty)->getElementType(); 7082 FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
7448 bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( 7083 bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
7449 FinalType, CLI.CallConv, CLI.IsVarArg); 7084 FinalType, CLI.CallConv, CLI.IsVarArg);
7452 EVT VT = ValueVTs[Value]; 7087 EVT VT = ValueVTs[Value];
7453 Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); 7088 Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
7454 SDValue Op = SDValue(Args[i].Node.getNode(), 7089 SDValue Op = SDValue(Args[i].Node.getNode(),
7455 Args[i].Node.getResNo() + Value); 7090 Args[i].Node.getResNo() + Value);
7456 ISD::ArgFlagsTy Flags; 7091 ISD::ArgFlagsTy Flags;
7457 unsigned OriginalAlignment = getDataLayout()->getABITypeAlignment(ArgTy); 7092 unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
7458 7093
7459 if (Args[i].isZExt) 7094 if (Args[i].isZExt)
7460 Flags.setZExt(); 7095 Flags.setZExt();
7461 if (Args[i].isSExt) 7096 if (Args[i].isSExt)
7462 Flags.setSExt(); 7097 Flags.setSExt();
7476 Flags.setByVal(); 7111 Flags.setByVal();
7477 } 7112 }
7478 if (Args[i].isByVal || Args[i].isInAlloca) { 7113 if (Args[i].isByVal || Args[i].isInAlloca) {
7479 PointerType *Ty = cast<PointerType>(Args[i].Ty); 7114 PointerType *Ty = cast<PointerType>(Args[i].Ty);
7480 Type *ElementTy = Ty->getElementType(); 7115 Type *ElementTy = Ty->getElementType();
7481 Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); 7116 Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
7482 // For ByVal, alignment should come from FE. BE will guess if this 7117 // For ByVal, alignment should come from FE. BE will guess if this
7483 // info is not there but there are cases it cannot get right. 7118 // info is not there but there are cases it cannot get right.
7484 unsigned FrameAlign; 7119 unsigned FrameAlign;
7485 if (Args[i].Alignment) 7120 if (Args[i].Alignment)
7486 FrameAlign = Args[i].Alignment; 7121 FrameAlign = Args[i].Alignment;
7487 else 7122 else
7488 FrameAlign = getByValTypeAlignment(ElementTy); 7123 FrameAlign = getByValTypeAlignment(ElementTy, DL);
7489 Flags.setByValAlign(FrameAlign); 7124 Flags.setByValAlign(FrameAlign);
7490 } 7125 }
7491 if (Args[i].isNest) 7126 if (Args[i].isNest)
7492 Flags.setNest(); 7127 Flags.setNest();
7493 if (NeedsRegBlock) { 7128 if (NeedsRegBlock)
7494 Flags.setInConsecutiveRegs(); 7129 Flags.setInConsecutiveRegs();
7495 if (Value == NumValues - 1)
7496 Flags.setInConsecutiveRegsLast();
7497 }
7498 Flags.setOrigAlign(OriginalAlignment); 7130 Flags.setOrigAlign(OriginalAlignment);
7499 7131
7500 MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); 7132 MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
7501 unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); 7133 unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT);
7502 SmallVector<SDValue, 4> Parts(NumParts); 7134 SmallVector<SDValue, 4> Parts(NumParts);
7541 MyFlags.Flags.setOrigAlign(1); 7173 MyFlags.Flags.setOrigAlign(1);
7542 7174
7543 CLI.Outs.push_back(MyFlags); 7175 CLI.Outs.push_back(MyFlags);
7544 CLI.OutVals.push_back(Parts[j]); 7176 CLI.OutVals.push_back(Parts[j]);
7545 } 7177 }
7178
7179 if (NeedsRegBlock && Value == NumValues - 1)
7180 CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
7546 } 7181 }
7547 } 7182 }
7548 7183
7549 SmallVector<SDValue, 4> InVals; 7184 SmallVector<SDValue, 4> InVals;
7550 CLI.Chain = LowerCall(CLI, InVals); 7185 CLI.Chain = LowerCall(CLI, InVals);
7578 // The instruction result is the result of loading from the 7213 // The instruction result is the result of loading from the
7579 // hidden sret parameter. 7214 // hidden sret parameter.
7580 SmallVector<EVT, 1> PVTs; 7215 SmallVector<EVT, 1> PVTs;
7581 Type *PtrRetTy = PointerType::getUnqual(OrigRetTy); 7216 Type *PtrRetTy = PointerType::getUnqual(OrigRetTy);
7582 7217
7583 ComputeValueVTs(*this, PtrRetTy, PVTs); 7218 ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
7584 assert(PVTs.size() == 1 && "Pointers should fit in one register"); 7219 assert(PVTs.size() == 1 && "Pointers should fit in one register");
7585 EVT PtrVT = PVTs[0]; 7220 EVT PtrVT = PVTs[0];
7586 7221
7587 unsigned NumValues = RetTys.size(); 7222 unsigned NumValues = RetTys.size();
7588 ReturnValues.resize(NumValues); 7223 ReturnValues.resize(NumValues);
7589 SmallVector<SDValue, 4> Chains(NumValues); 7224 SmallVector<SDValue, 4> Chains(NumValues);
7590 7225
7591 for (unsigned i = 0; i < NumValues; ++i) { 7226 for (unsigned i = 0; i < NumValues; ++i) {
7592 SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, 7227 SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
7593 CLI.DAG.getConstant(Offsets[i], PtrVT)); 7228 CLI.DAG.getConstant(Offsets[i], CLI.DL,
7229 PtrVT));
7594 SDValue L = CLI.DAG.getLoad( 7230 SDValue L = CLI.DAG.getLoad(
7595 RetTys[i], CLI.DL, CLI.Chain, Add, 7231 RetTys[i], CLI.DL, CLI.Chain, Add,
7596 MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, 7232 MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
7597 false, false, 1); 7233 DemoteStackIdx, Offsets[i]),
7234 false, false, false, 1);
7598 ReturnValues[i] = L; 7235 ReturnValues[i] = L;
7599 Chains[i] = L.getValue(1); 7236 Chains[i] = L.getValue(1);
7600 } 7237 }
7601 7238
7602 CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains); 7239 CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
7651 cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && 7288 cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
7652 "Copy from a reg to the same reg!"); 7289 "Copy from a reg to the same reg!");
7653 assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); 7290 assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
7654 7291
7655 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7292 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7656 RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); 7293 RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
7294 V->getType());
7657 SDValue Chain = DAG.getEntryNode(); 7295 SDValue Chain = DAG.getEntryNode();
7658 7296
7659 ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == 7297 ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
7660 FuncInfo.PreferredExtendType.end()) 7298 FuncInfo.PreferredExtendType.end())
7661 ? ISD::ANY_EXTEND 7299 ? ISD::ANY_EXTEND
7684 } 7322 }
7685 7323
7686 void SelectionDAGISel::LowerArguments(const Function &F) { 7324 void SelectionDAGISel::LowerArguments(const Function &F) {
7687 SelectionDAG &DAG = SDB->DAG; 7325 SelectionDAG &DAG = SDB->DAG;
7688 SDLoc dl = SDB->getCurSDLoc(); 7326 SDLoc dl = SDB->getCurSDLoc();
7689 const DataLayout *DL = TLI->getDataLayout(); 7327 const DataLayout &DL = DAG.getDataLayout();
7690 SmallVector<ISD::InputArg, 16> Ins; 7328 SmallVector<ISD::InputArg, 16> Ins;
7691 7329
7692 if (!FuncInfo->CanLowerReturn) { 7330 if (!FuncInfo->CanLowerReturn) {
7693 // Put in an sret pointer parameter before all the other parameters. 7331 // Put in an sret pointer parameter before all the other parameters.
7694 SmallVector<EVT, 1> ValueVTs; 7332 SmallVector<EVT, 1> ValueVTs;
7695 ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); 7333 ComputeValueVTs(*TLI, DAG.getDataLayout(),
7334 PointerType::getUnqual(F.getReturnType()), ValueVTs);
7696 7335
7697 // NOTE: Assuming that a pointer will never break down to more than one VT 7336 // NOTE: Assuming that a pointer will never break down to more than one VT
7698 // or one register. 7337 // or one register.
7699 ISD::ArgFlagsTy Flags; 7338 ISD::ArgFlagsTy Flags;
7700 Flags.setSRet(); 7339 Flags.setSRet();
7707 // Set up the incoming argument description vector. 7346 // Set up the incoming argument description vector.
7708 unsigned Idx = 1; 7347 unsigned Idx = 1;
7709 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); 7348 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
7710 I != E; ++I, ++Idx) { 7349 I != E; ++I, ++Idx) {
7711 SmallVector<EVT, 4> ValueVTs; 7350 SmallVector<EVT, 4> ValueVTs;
7712 ComputeValueVTs(*TLI, I->getType(), ValueVTs); 7351 ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs);
7713 bool isArgValueUsed = !I->use_empty(); 7352 bool isArgValueUsed = !I->use_empty();
7714 unsigned PartBase = 0; 7353 unsigned PartBase = 0;
7715 Type *FinalType = I->getType(); 7354 Type *FinalType = I->getType();
7716 if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) 7355 if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
7717 FinalType = cast<PointerType>(FinalType)->getElementType(); 7356 FinalType = cast<PointerType>(FinalType)->getElementType();
7720 for (unsigned Value = 0, NumValues = ValueVTs.size(); 7359 for (unsigned Value = 0, NumValues = ValueVTs.size();
7721 Value != NumValues; ++Value) { 7360 Value != NumValues; ++Value) {
7722 EVT VT = ValueVTs[Value]; 7361 EVT VT = ValueVTs[Value];
7723 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); 7362 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
7724 ISD::ArgFlagsTy Flags; 7363 ISD::ArgFlagsTy Flags;
7725 unsigned OriginalAlignment = DL->getABITypeAlignment(ArgTy); 7364 unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
7726 7365
7727 if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) 7366 if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
7728 Flags.setZExt(); 7367 Flags.setZExt();
7729 if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) 7368 if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
7730 Flags.setSExt(); 7369 Flags.setSExt();
7744 Flags.setByVal(); 7383 Flags.setByVal();
7745 } 7384 }
7746 if (Flags.isByVal() || Flags.isInAlloca()) { 7385 if (Flags.isByVal() || Flags.isInAlloca()) {
7747 PointerType *Ty = cast<PointerType>(I->getType()); 7386 PointerType *Ty = cast<PointerType>(I->getType());
7748 Type *ElementTy = Ty->getElementType(); 7387 Type *ElementTy = Ty->getElementType();
7749 Flags.setByValSize(DL->getTypeAllocSize(ElementTy)); 7388 Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
7750 // For ByVal, alignment should be passed from FE. BE will guess if 7389 // For ByVal, alignment should be passed from FE. BE will guess if
7751 // this info is not there but there are cases it cannot get right. 7390 // this info is not there but there are cases it cannot get right.
7752 unsigned FrameAlign; 7391 unsigned FrameAlign;
7753 if (F.getParamAlignment(Idx)) 7392 if (F.getParamAlignment(Idx))
7754 FrameAlign = F.getParamAlignment(Idx); 7393 FrameAlign = F.getParamAlignment(Idx);
7755 else 7394 else
7756 FrameAlign = TLI->getByValTypeAlignment(ElementTy); 7395 FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL);
7757 Flags.setByValAlign(FrameAlign); 7396 Flags.setByValAlign(FrameAlign);
7758 } 7397 }
7759 if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) 7398 if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
7760 Flags.setNest(); 7399 Flags.setNest();
7761 if (NeedsRegBlock) { 7400 if (NeedsRegBlock)
7762 Flags.setInConsecutiveRegs(); 7401 Flags.setInConsecutiveRegs();
7763 if (Value == NumValues - 1)
7764 Flags.setInConsecutiveRegsLast();
7765 }
7766 Flags.setOrigAlign(OriginalAlignment); 7402 Flags.setOrigAlign(OriginalAlignment);
7767 7403
7768 MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); 7404 MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
7769 unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); 7405 unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT);
7770 for (unsigned i = 0; i != NumRegs; ++i) { 7406 for (unsigned i = 0; i != NumRegs; ++i) {
7775 // if it isn't first piece, alignment must be 1 7411 // if it isn't first piece, alignment must be 1
7776 else if (i > 0) 7412 else if (i > 0)
7777 MyFlags.Flags.setOrigAlign(1); 7413 MyFlags.Flags.setOrigAlign(1);
7778 Ins.push_back(MyFlags); 7414 Ins.push_back(MyFlags);
7779 } 7415 }
7416 if (NeedsRegBlock && Value == NumValues - 1)
7417 Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
7780 PartBase += VT.getStoreSize(); 7418 PartBase += VT.getStoreSize();
7781 } 7419 }
7782 } 7420 }
7783 7421
7784 // Call the target to set up the argument values. 7422 // Call the target to set up the argument values.
7808 Idx = 1; 7446 Idx = 1;
7809 if (!FuncInfo->CanLowerReturn) { 7447 if (!FuncInfo->CanLowerReturn) {
7810 // Create a virtual register for the sret pointer, and put in a copy 7448 // Create a virtual register for the sret pointer, and put in a copy
7811 // from the sret argument into it. 7449 // from the sret argument into it.
7812 SmallVector<EVT, 1> ValueVTs; 7450 SmallVector<EVT, 1> ValueVTs;
7813 ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); 7451 ComputeValueVTs(*TLI, DAG.getDataLayout(),
7452 PointerType::getUnqual(F.getReturnType()), ValueVTs);
7814 MVT VT = ValueVTs[0].getSimpleVT(); 7453 MVT VT = ValueVTs[0].getSimpleVT();
7815 MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); 7454 MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
7816 ISD::NodeType AssertOp = ISD::DELETED_NODE; 7455 ISD::NodeType AssertOp = ISD::DELETED_NODE;
7817 SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, 7456 SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
7818 RegVT, VT, nullptr, AssertOp); 7457 RegVT, VT, nullptr, AssertOp);
7832 7471
7833 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; 7472 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
7834 ++I, ++Idx) { 7473 ++I, ++Idx) {
7835 SmallVector<SDValue, 4> ArgValues; 7474 SmallVector<SDValue, 4> ArgValues;
7836 SmallVector<EVT, 4> ValueVTs; 7475 SmallVector<EVT, 4> ValueVTs;
7837 ComputeValueVTs(*TLI, I->getType(), ValueVTs); 7476 ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs);
7838 unsigned NumValues = ValueVTs.size(); 7477 unsigned NumValues = ValueVTs.size();
7839 7478
7840 // If this argument is unused then remember its value. It is used to generate 7479 // If this argument is unused then remember its value. It is used to generate
7841 // debugging information. 7480 // debugging information.
7842 if (I->use_empty() && NumValues) { 7481 if (I->use_empty() && NumValues) {
7909 } 7548 }
7910 7549
7911 assert(i == InVals.size() && "Argument register count mismatch!"); 7550 assert(i == InVals.size() && "Argument register count mismatch!");
7912 7551
7913 // Finally, if the target has anything special to do, allow it to do so. 7552 // Finally, if the target has anything special to do, allow it to do so.
7914 // FIXME: this should insert code into the DAG!
7915 EmitFunctionEntryCode(); 7553 EmitFunctionEntryCode();
7916 } 7554 }
7917 7555
7918 /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to 7556 /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
7919 /// ensure constants are generated when needed. Remember the virtual registers 7557 /// ensure constants are generated when needed. Remember the virtual registers
7926 SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { 7564 SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
7927 const TerminatorInst *TI = LLVMBB->getTerminator(); 7565 const TerminatorInst *TI = LLVMBB->getTerminator();
7928 7566
7929 SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; 7567 SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
7930 7568
7931 // Check successor nodes' PHI nodes that expect a constant to be available 7569 // Check PHI nodes in successors that expect a value to be available from this
7932 // from this block. 7570 // block.
7933 for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { 7571 for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
7934 const BasicBlock *SuccBB = TI->getSuccessor(succ); 7572 const BasicBlock *SuccBB = TI->getSuccessor(succ);
7935 if (!isa<PHINode>(SuccBB->begin())) continue; 7573 if (!isa<PHINode>(SuccBB->begin())) continue;
7936 MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; 7574 MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
7937 7575
7980 7618
7981 // Remember that this register needs to added to the machine PHI node as 7619 // Remember that this register needs to added to the machine PHI node as
7982 // the input for this MBB. 7620 // the input for this MBB.
7983 SmallVector<EVT, 4> ValueVTs; 7621 SmallVector<EVT, 4> ValueVTs;
7984 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7622 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7985 ComputeValueVTs(TLI, PN->getType(), ValueVTs); 7623 ComputeValueVTs(TLI, DAG.getDataLayout(), PN->getType(), ValueVTs);
7986 for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { 7624 for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
7987 EVT VT = ValueVTs[vti]; 7625 EVT VT = ValueVTs[vti];
7988 unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); 7626 unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
7989 for (unsigned i = 0, e = NumRegisters; i != e; ++i) 7627 for (unsigned i = 0, e = NumRegisters; i != e; ++i)
7990 FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); 7628 FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
8014 // Add it as a successor of ParentMBB. 7652 // Add it as a successor of ParentMBB.
8015 ParentMBB->addSuccessor( 7653 ParentMBB->addSuccessor(
8016 SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely)); 7654 SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely));
8017 return SuccMBB; 7655 return SuccMBB;
8018 } 7656 }
7657
7658 MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
7659 MachineFunction::iterator I = MBB;
7660 if (++I == FuncInfo.MF->end())
7661 return nullptr;
7662 return I;
7663 }
7664
7665 /// During lowering new call nodes can be created (such as memset, etc.).
7666 /// Those will become new roots of the current DAG, but complications arise
7667 /// when they are tail calls. In such cases, the call lowering will update
7668 /// the root, but the builder still needs to know that a tail call has been
7669 /// lowered in order to avoid generating an additional return.
7670 void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
7671 // If the node is null, we do have a tail call.
7672 if (MaybeTC.getNode() != nullptr)
7673 DAG.setRoot(MaybeTC);
7674 else
7675 HasTailCall = true;
7676 }
7677
7678 bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters,
7679 unsigned *TotalCases, unsigned First,
7680 unsigned Last) {
7681 assert(Last >= First);
7682 assert(TotalCases[Last] >= TotalCases[First]);
7683
7684 APInt LowCase = Clusters[First].Low->getValue();
7685 APInt HighCase = Clusters[Last].High->getValue();
7686 assert(LowCase.getBitWidth() == HighCase.getBitWidth());
7687
7688 // FIXME: A range of consecutive cases has 100% density, but only requires one
7689 // comparison to lower. We should discriminate against such consecutive ranges
7690 // in jump tables.
7691
7692 uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100);
7693 uint64_t Range = Diff + 1;
7694
7695 uint64_t NumCases =
7696 TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]);
7697
7698 assert(NumCases < UINT64_MAX / 100);
7699 assert(Range >= NumCases);
7700
7701 return NumCases * 100 >= Range * MinJumpTableDensity;
7702 }
7703
7704 static inline bool areJTsAllowed(const TargetLowering &TLI) {
7705 return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
7706 TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
7707 }
7708
7709 bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
7710 unsigned First, unsigned Last,
7711 const SwitchInst *SI,
7712 MachineBasicBlock *DefaultMBB,
7713 CaseCluster &JTCluster) {
7714 assert(First <= Last);
7715
7716 uint32_t Weight = 0;
7717 unsigned NumCmps = 0;
7718 std::vector<MachineBasicBlock*> Table;
7719 DenseMap<MachineBasicBlock*, uint32_t> JTWeights;
7720 for (unsigned I = First; I <= Last; ++I) {
7721 assert(Clusters[I].Kind == CC_Range);
7722 Weight += Clusters[I].Weight;
7723 assert(Weight >= Clusters[I].Weight && "Weight overflow!");
7724 APInt Low = Clusters[I].Low->getValue();
7725 APInt High = Clusters[I].High->getValue();
7726 NumCmps += (Low == High) ? 1 : 2;
7727 if (I != First) {
7728 // Fill the gap between this and the previous cluster.
7729 APInt PreviousHigh = Clusters[I - 1].High->getValue();
7730 assert(PreviousHigh.slt(Low));
7731 uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1;
7732 for (uint64_t J = 0; J < Gap; J++)
7733 Table.push_back(DefaultMBB);
7734 }
7735 uint64_t ClusterSize = (High - Low).getLimitedValue() + 1;
7736 for (uint64_t J = 0; J < ClusterSize; ++J)
7737 Table.push_back(Clusters[I].MBB);
7738 JTWeights[Clusters[I].MBB] += Clusters[I].Weight;
7739 }
7740
7741 unsigned NumDests = JTWeights.size();
7742 if (isSuitableForBitTests(NumDests, NumCmps,
7743 Clusters[First].Low->getValue(),
7744 Clusters[Last].High->getValue())) {
7745 // Clusters[First..Last] should be lowered as bit tests instead.
7746 return false;
7747 }
7748
7749 // Create the MBB that will load from and jump through the table.
7750 // Note: We create it here, but it's not inserted into the function yet.
7751 MachineFunction *CurMF = FuncInfo.MF;
7752 MachineBasicBlock *JumpTableMBB =
7753 CurMF->CreateMachineBasicBlock(SI->getParent());
7754
7755 // Add successors. Note: use table order for determinism.
7756 SmallPtrSet<MachineBasicBlock *, 8> Done;
7757 for (MachineBasicBlock *Succ : Table) {
7758 if (Done.count(Succ))
7759 continue;
7760 addSuccessorWithWeight(JumpTableMBB, Succ, JTWeights[Succ]);
7761 Done.insert(Succ);
7762 }
7763
7764 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7765 unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding())
7766 ->createJumpTableIndex(Table);
7767
7768 // Set up the jump table info.
7769 JumpTable JT(-1U, JTI, JumpTableMBB, nullptr);
7770 JumpTableHeader JTH(Clusters[First].Low->getValue(),
7771 Clusters[Last].High->getValue(), SI->getCondition(),
7772 nullptr, false);
7773 JTCases.emplace_back(std::move(JTH), std::move(JT));
7774
7775 JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
7776 JTCases.size() - 1, Weight);
7777 return true;
7778 }
7779
7780 void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
7781 const SwitchInst *SI,
7782 MachineBasicBlock *DefaultMBB) {
7783 #ifndef NDEBUG
7784 // Clusters must be non-empty, sorted, and only contain Range clusters.
7785 assert(!Clusters.empty());
7786 for (CaseCluster &C : Clusters)
7787 assert(C.Kind == CC_Range);
7788 for (unsigned i = 1, e = Clusters.size(); i < e; ++i)
7789 assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue()));
7790 #endif
7791
7792 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7793 if (!areJTsAllowed(TLI))
7794 return;
7795
7796 const int64_t N = Clusters.size();
7797 const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries();
7798
7799 // TotalCases[i]: Total nbr of cases in Clusters[0..i].
7800 SmallVector<unsigned, 8> TotalCases(N);
7801
7802 for (unsigned i = 0; i < N; ++i) {
7803 APInt Hi = Clusters[i].High->getValue();
7804 APInt Lo = Clusters[i].Low->getValue();
7805 TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
7806 if (i != 0)
7807 TotalCases[i] += TotalCases[i - 1];
7808 }
7809
7810 if (N >= MinJumpTableSize && isDense(Clusters, &TotalCases[0], 0, N - 1)) {
7811 // Cheap case: the whole range might be suitable for jump table.
7812 CaseCluster JTCluster;
7813 if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
7814 Clusters[0] = JTCluster;
7815 Clusters.resize(1);
7816 return;
7817 }
7818 }
7819
7820 // The algorithm below is not suitable for -O0.
7821 if (TM.getOptLevel() == CodeGenOpt::None)
7822 return;
7823
7824 // Split Clusters into minimum number of dense partitions. The algorithm uses
7825 // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code
7826 // for the Case Statement'" (1994), but builds the MinPartitions array in
7827 // reverse order to make it easier to reconstruct the partitions in ascending
7828 // order. In the choice between two optimal partitionings, it picks the one
7829 // which yields more jump tables.
7830
7831 // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
7832 SmallVector<unsigned, 8> MinPartitions(N);
7833 // LastElement[i] is the last element of the partition starting at i.
7834 SmallVector<unsigned, 8> LastElement(N);
7835 // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1].
7836 SmallVector<unsigned, 8> NumTables(N);
7837
7838 // Base case: There is only one way to partition Clusters[N-1].
7839 MinPartitions[N - 1] = 1;
7840 LastElement[N - 1] = N - 1;
7841 assert(MinJumpTableSize > 1);
7842 NumTables[N - 1] = 0;
7843
7844 // Note: loop indexes are signed to avoid underflow.
7845 for (int64_t i = N - 2; i >= 0; i--) {
7846 // Find optimal partitioning of Clusters[i..N-1].
7847 // Baseline: Put Clusters[i] into a partition on its own.
7848 MinPartitions[i] = MinPartitions[i + 1] + 1;
7849 LastElement[i] = i;
7850 NumTables[i] = NumTables[i + 1];
7851
7852 // Search for a solution that results in fewer partitions.
7853 for (int64_t j = N - 1; j > i; j--) {
7854 // Try building a partition from Clusters[i..j].
7855 if (isDense(Clusters, &TotalCases[0], i, j)) {
7856 unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
7857 bool IsTable = j - i + 1 >= MinJumpTableSize;
7858 unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]);
7859
7860 // If this j leads to fewer partitions, or same number of partitions
7861 // with more lookup tables, it is a better partitioning.
7862 if (NumPartitions < MinPartitions[i] ||
7863 (NumPartitions == MinPartitions[i] && Tables > NumTables[i])) {
7864 MinPartitions[i] = NumPartitions;
7865 LastElement[i] = j;
7866 NumTables[i] = Tables;
7867 }
7868 }
7869 }
7870 }
7871
7872 // Iterate over the partitions, replacing some with jump tables in-place.
7873 unsigned DstIndex = 0;
7874 for (unsigned First = 0, Last; First < N; First = Last + 1) {
7875 Last = LastElement[First];
7876 assert(Last >= First);
7877 assert(DstIndex <= First);
7878 unsigned NumClusters = Last - First + 1;
7879
7880 CaseCluster JTCluster;
7881 if (NumClusters >= MinJumpTableSize &&
7882 buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
7883 Clusters[DstIndex++] = JTCluster;
7884 } else {
7885 for (unsigned I = First; I <= Last; ++I)
7886 std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
7887 }
7888 }
7889 Clusters.resize(DstIndex);
7890 }
7891
7892 bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) {
7893 // FIXME: Using the pointer type doesn't seem ideal.
7894 uint64_t BW = DAG.getDataLayout().getPointerSizeInBits();
7895 uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
7896 return Range <= BW;
7897 }
7898
7899 bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests,
7900 unsigned NumCmps,
7901 const APInt &Low,
7902 const APInt &High) {
7903 // FIXME: I don't think NumCmps is the correct metric: a single case and a
7904 // range of cases both require only one branch to lower. Just looking at the
7905 // number of clusters and destinations should be enough to decide whether to
7906 // build bit tests.
7907
7908 // To lower a range with bit tests, the range must fit the bitwidth of a
7909 // machine word.
7910 if (!rangeFitsInWord(Low, High))
7911 return false;
7912
7913 // Decide whether it's profitable to lower this range with bit tests. Each
7914 // destination requires a bit test and branch, and there is an overall range
7915 // check branch. For a small number of clusters, separate comparisons might be
7916 // cheaper, and for many destinations, splitting the range might be better.
7917 return (NumDests == 1 && NumCmps >= 3) ||
7918 (NumDests == 2 && NumCmps >= 5) ||
7919 (NumDests == 3 && NumCmps >= 6);
7920 }
7921
7922 bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
7923 unsigned First, unsigned Last,
7924 const SwitchInst *SI,
7925 CaseCluster &BTCluster) {
7926 assert(First <= Last);
7927 if (First == Last)
7928 return false;
7929
7930 BitVector Dests(FuncInfo.MF->getNumBlockIDs());
7931 unsigned NumCmps = 0;
7932 for (int64_t I = First; I <= Last; ++I) {
7933 assert(Clusters[I].Kind == CC_Range);
7934 Dests.set(Clusters[I].MBB->getNumber());
7935 NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
7936 }
7937 unsigned NumDests = Dests.count();
7938
7939 APInt Low = Clusters[First].Low->getValue();
7940 APInt High = Clusters[Last].High->getValue();
7941 assert(Low.slt(High));
7942
7943 if (!isSuitableForBitTests(NumDests, NumCmps, Low, High))
7944 return false;
7945
7946 APInt LowBound;
7947 APInt CmpRange;
7948
7949 const int BitWidth = DAG.getTargetLoweringInfo()
7950 .getPointerTy(DAG.getDataLayout())
7951 .getSizeInBits();
7952 assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!");
7953
7954 // Check if the clusters cover a contiguous range such that no value in the
7955 // range will jump to the default statement.
7956 bool ContiguousRange = true;
7957 for (int64_t I = First + 1; I <= Last; ++I) {
7958 if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) {
7959 ContiguousRange = false;
7960 break;
7961 }
7962 }
7963
7964 if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
7965 // Optimize the case where all the case values fit in a word without having
7966 // to subtract minValue. In this case, we can optimize away the subtraction.
7967 LowBound = APInt::getNullValue(Low.getBitWidth());
7968 CmpRange = High;
7969 ContiguousRange = false;
7970 } else {
7971 LowBound = Low;
7972 CmpRange = High - Low;
7973 }
7974
7975 CaseBitsVector CBV;
7976 uint32_t TotalWeight = 0;
7977 for (unsigned i = First; i <= Last; ++i) {
7978 // Find the CaseBits for this destination.
7979 unsigned j;
7980 for (j = 0; j < CBV.size(); ++j)
7981 if (CBV[j].BB == Clusters[i].MBB)
7982 break;
7983 if (j == CBV.size())
7984 CBV.push_back(CaseBits(0, Clusters[i].MBB, 0, 0));
7985 CaseBits *CB = &CBV[j];
7986
7987 // Update Mask, Bits and ExtraWeight.
7988 uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
7989 uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
7990 assert(Hi >= Lo && Hi < 64 && "Invalid bit case!");
7991 CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo;
7992 CB->Bits += Hi - Lo + 1;
7993 CB->ExtraWeight += Clusters[i].Weight;
7994 TotalWeight += Clusters[i].Weight;
7995 assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!");
7996 }
7997
7998 BitTestInfo BTI;
7999 std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
8000 // Sort by weight first, number of bits second.
8001 if (a.ExtraWeight != b.ExtraWeight)
8002 return a.ExtraWeight > b.ExtraWeight;
8003 return a.Bits > b.Bits;
8004 });
8005
8006 for (auto &CB : CBV) {
8007 MachineBasicBlock *BitTestBB =
8008 FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
8009 BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight));
8010 }
8011 BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
8012 SI->getCondition(), -1U, MVT::Other, false,
8013 ContiguousRange, nullptr, nullptr, std::move(BTI),
8014 TotalWeight);
8015
8016 BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
8017 BitTestCases.size() - 1, TotalWeight);
8018 return true;
8019 }
8020
8021 void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
8022 const SwitchInst *SI) {
8023 // Partition Clusters into as few subsets as possible, where each subset has a
8024 // range that fits in a machine word and has <= 3 unique destinations.
8025
8026 #ifndef NDEBUG
8027 // Clusters must be sorted and contain Range or JumpTable clusters.
8028 assert(!Clusters.empty());
8029 assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable);
8030 for (const CaseCluster &C : Clusters)
8031 assert(C.Kind == CC_Range || C.Kind == CC_JumpTable);
8032 for (unsigned i = 1; i < Clusters.size(); ++i)
8033 assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue()));
8034 #endif
8035
8036 // The algorithm below is not suitable for -O0.
8037 if (TM.getOptLevel() == CodeGenOpt::None)
8038 return;
8039
8040 // If target does not have legal shift left, do not emit bit tests at all.
8041 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8042 EVT PTy = TLI.getPointerTy(DAG.getDataLayout());
8043 if (!TLI.isOperationLegal(ISD::SHL, PTy))
8044 return;
8045
8046 int BitWidth = PTy.getSizeInBits();
8047 const int64_t N = Clusters.size();
8048
8049 // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
8050 SmallVector<unsigned, 8> MinPartitions(N);
8051 // LastElement[i] is the last element of the partition starting at i.
8052 SmallVector<unsigned, 8> LastElement(N);
8053
8054 // FIXME: This might not be the best algorithm for finding bit test clusters.
8055
8056 // Base case: There is only one way to partition Clusters[N-1].
8057 MinPartitions[N - 1] = 1;
8058 LastElement[N - 1] = N - 1;
8059
8060 // Note: loop indexes are signed to avoid underflow.
8061 for (int64_t i = N - 2; i >= 0; --i) {
8062 // Find optimal partitioning of Clusters[i..N-1].
8063 // Baseline: Put Clusters[i] into a partition on its own.
8064 MinPartitions[i] = MinPartitions[i + 1] + 1;
8065 LastElement[i] = i;
8066
8067 // Search for a solution that results in fewer partitions.
8068 // Note: the search is limited by BitWidth, reducing time complexity.
8069 for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) {
8070 // Try building a partition from Clusters[i..j].
8071
8072 // Check the range.
8073 if (!rangeFitsInWord(Clusters[i].Low->getValue(),
8074 Clusters[j].High->getValue()))
8075 continue;
8076
8077 // Check nbr of destinations and cluster types.
8078 // FIXME: This works, but doesn't seem very efficient.
8079 bool RangesOnly = true;
8080 BitVector Dests(FuncInfo.MF->getNumBlockIDs());
8081 for (int64_t k = i; k <= j; k++) {
8082 if (Clusters[k].Kind != CC_Range) {
8083 RangesOnly = false;
8084 break;
8085 }
8086 Dests.set(Clusters[k].MBB->getNumber());
8087 }
8088 if (!RangesOnly || Dests.count() > 3)
8089 break;
8090
8091 // Check if it's a better partition.
8092 unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
8093 if (NumPartitions < MinPartitions[i]) {
8094 // Found a better partition.
8095 MinPartitions[i] = NumPartitions;
8096 LastElement[i] = j;
8097 }
8098 }
8099 }
8100
8101 // Iterate over the partitions, replacing with bit-test clusters in-place.
8102 unsigned DstIndex = 0;
8103 for (unsigned First = 0, Last; First < N; First = Last + 1) {
8104 Last = LastElement[First];
8105 assert(First <= Last);
8106 assert(DstIndex <= First);
8107
8108 CaseCluster BitTestCluster;
8109 if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) {
8110 Clusters[DstIndex++] = BitTestCluster;
8111 } else {
8112 size_t NumClusters = Last - First + 1;
8113 std::memmove(&Clusters[DstIndex], &Clusters[First],
8114 sizeof(Clusters[0]) * NumClusters);
8115 DstIndex += NumClusters;
8116 }
8117 }
8118 Clusters.resize(DstIndex);
8119 }
8120
8121 void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
8122 MachineBasicBlock *SwitchMBB,
8123 MachineBasicBlock *DefaultMBB) {
8124 MachineFunction *CurMF = FuncInfo.MF;
8125 MachineBasicBlock *NextMBB = nullptr;
8126 MachineFunction::iterator BBI = W.MBB;
8127 if (++BBI != FuncInfo.MF->end())
8128 NextMBB = BBI;
8129
8130 unsigned Size = W.LastCluster - W.FirstCluster + 1;
8131
8132 BranchProbabilityInfo *BPI = FuncInfo.BPI;
8133
8134 if (Size == 2 && W.MBB == SwitchMBB) {
8135 // If any two of the cases has the same destination, and if one value
8136 // is the same as the other, but has one bit unset that the other has set,
8137 // use bit manipulation to do two compares at once. For example:
8138 // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
8139 // TODO: This could be extended to merge any 2 cases in switches with 3
8140 // cases.
8141 // TODO: Handle cases where W.CaseBB != SwitchBB.
8142 CaseCluster &Small = *W.FirstCluster;
8143 CaseCluster &Big = *W.LastCluster;
8144
8145 if (Small.Low == Small.High && Big.Low == Big.High &&
8146 Small.MBB == Big.MBB) {
8147 const APInt &SmallValue = Small.Low->getValue();
8148 const APInt &BigValue = Big.Low->getValue();
8149
8150 // Check that there is only one bit different.
8151 APInt CommonBit = BigValue ^ SmallValue;
8152 if (CommonBit.isPowerOf2()) {
8153 SDValue CondLHS = getValue(Cond);
8154 EVT VT = CondLHS.getValueType();
8155 SDLoc DL = getCurSDLoc();
8156
8157 SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
8158 DAG.getConstant(CommonBit, DL, VT));
8159 SDValue Cond = DAG.getSetCC(
8160 DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT),
8161 ISD::SETEQ);
8162
8163 // Update successor info.
8164 // Both Small and Big will jump to Small.BB, so we sum up the weights.
8165 addSuccessorWithWeight(SwitchMBB, Small.MBB, Small.Weight + Big.Weight);
8166 addSuccessorWithWeight(
8167 SwitchMBB, DefaultMBB,
8168 // The default destination is the first successor in IR.
8169 BPI ? BPI->getEdgeWeight(SwitchMBB->getBasicBlock(), (unsigned)0)
8170 : 0);
8171
8172 // Insert the true branch.
8173 SDValue BrCond =
8174 DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
8175 DAG.getBasicBlock(Small.MBB));
8176 // Insert the false branch.
8177 BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
8178 DAG.getBasicBlock(DefaultMBB));
8179
8180 DAG.setRoot(BrCond);
8181 return;
8182 }
8183 }
8184 }
8185
8186 if (TM.getOptLevel() != CodeGenOpt::None) {
8187 // Order cases by weight so the most likely case will be checked first.
8188 std::sort(W.FirstCluster, W.LastCluster + 1,
8189 [](const CaseCluster &a, const CaseCluster &b) {
8190 return a.Weight > b.Weight;
8191 });
8192
8193 // Rearrange the case blocks so that the last one falls through if possible
8194 // without without changing the order of weights.
8195 for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
8196 --I;
8197 if (I->Weight > W.LastCluster->Weight)
8198 break;
8199 if (I->Kind == CC_Range && I->MBB == NextMBB) {
8200 std::swap(*I, *W.LastCluster);
8201 break;
8202 }
8203 }
8204 }
8205
8206 // Compute total weight.
8207 uint32_t DefaultWeight = W.DefaultWeight;
8208 uint32_t UnhandledWeights = DefaultWeight;
8209 for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) {
8210 UnhandledWeights += I->Weight;
8211 assert(UnhandledWeights >= I->Weight && "Weight overflow!");
8212 }
8213
8214 MachineBasicBlock *CurMBB = W.MBB;
8215 for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
8216 MachineBasicBlock *Fallthrough;
8217 if (I == W.LastCluster) {
8218 // For the last cluster, fall through to the default destination.
8219 Fallthrough = DefaultMBB;
8220 } else {
8221 Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
8222 CurMF->insert(BBI, Fallthrough);
8223 // Put Cond in a virtual register to make it available from the new blocks.
8224 ExportFromCurrentBlock(Cond);
8225 }
8226 UnhandledWeights -= I->Weight;
8227
8228 switch (I->Kind) {
8229 case CC_JumpTable: {
8230 // FIXME: Optimize away range check based on pivot comparisons.
8231 JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first;
8232 JumpTable *JT = &JTCases[I->JTCasesIndex].second;
8233
8234 // The jump block hasn't been inserted yet; insert it here.
8235 MachineBasicBlock *JumpMBB = JT->MBB;
8236 CurMF->insert(BBI, JumpMBB);
8237
8238 uint32_t JumpWeight = I->Weight;
8239 uint32_t FallthroughWeight = UnhandledWeights;
8240
8241 // If the default statement is a target of the jump table, we evenly
8242 // distribute the default weight to successors of CurMBB. Also update
8243 // the weight on the edge from JumpMBB to Fallthrough.
8244 for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
8245 SE = JumpMBB->succ_end();
8246 SI != SE; ++SI) {
8247 if (*SI == DefaultMBB) {
8248 JumpWeight += DefaultWeight / 2;
8249 FallthroughWeight -= DefaultWeight / 2;
8250 JumpMBB->setSuccWeight(SI, DefaultWeight / 2);
8251 break;
8252 }
8253 }
8254
8255 addSuccessorWithWeight(CurMBB, Fallthrough, FallthroughWeight);
8256 addSuccessorWithWeight(CurMBB, JumpMBB, JumpWeight);
8257
8258 // The jump table header will be inserted in our current block, do the
8259 // range check, and fall through to our fallthrough block.
8260 JTH->HeaderBB = CurMBB;
8261 JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
8262
8263 // If we're in the right place, emit the jump table header right now.
8264 if (CurMBB == SwitchMBB) {
8265 visitJumpTableHeader(*JT, *JTH, SwitchMBB);
8266 JTH->Emitted = true;
8267 }
8268 break;
8269 }
8270 case CC_BitTests: {
8271 // FIXME: Optimize away range check based on pivot comparisons.
8272 BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex];
8273
8274 // The bit test blocks haven't been inserted yet; insert them here.
8275 for (BitTestCase &BTC : BTB->Cases)
8276 CurMF->insert(BBI, BTC.ThisBB);
8277
8278 // Fill in fields of the BitTestBlock.
8279 BTB->Parent = CurMBB;
8280 BTB->Default = Fallthrough;
8281
8282 BTB->DefaultWeight = UnhandledWeights;
8283 // If the cases in bit test don't form a contiguous range, we evenly
8284 // distribute the weight on the edge to Fallthrough to two successors
8285 // of CurMBB.
8286 if (!BTB->ContiguousRange) {
8287 BTB->Weight += DefaultWeight / 2;
8288 BTB->DefaultWeight -= DefaultWeight / 2;
8289 }
8290
8291 // If we're in the right place, emit the bit test header right now.
8292 if (CurMBB == SwitchMBB) {
8293 visitBitTestHeader(*BTB, SwitchMBB);
8294 BTB->Emitted = true;
8295 }
8296 break;
8297 }
8298 case CC_Range: {
8299 const Value *RHS, *LHS, *MHS;
8300 ISD::CondCode CC;
8301 if (I->Low == I->High) {
8302 // Check Cond == I->Low.
8303 CC = ISD::SETEQ;
8304 LHS = Cond;
8305 RHS=I->Low;
8306 MHS = nullptr;
8307 } else {
8308 // Check I->Low <= Cond <= I->High.
8309 CC = ISD::SETLE;
8310 LHS = I->Low;
8311 MHS = Cond;
8312 RHS = I->High;
8313 }
8314
8315 // The false weight is the sum of all unhandled cases.
8316 CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight,
8317 UnhandledWeights);
8318
8319 if (CurMBB == SwitchMBB)
8320 visitSwitchCase(CB, SwitchMBB);
8321 else
8322 SwitchCases.push_back(CB);
8323
8324 break;
8325 }
8326 }
8327 CurMBB = Fallthrough;
8328 }
8329 }
8330
8331 unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC,
8332 CaseClusterIt First,
8333 CaseClusterIt Last) {
8334 return std::count_if(First, Last + 1, [&](const CaseCluster &X) {
8335 if (X.Weight != CC.Weight)
8336 return X.Weight > CC.Weight;
8337
8338 // Ties are broken by comparing the case value.
8339 return X.Low->getValue().slt(CC.Low->getValue());
8340 });
8341 }
8342
8343 void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
8344 const SwitchWorkListItem &W,
8345 Value *Cond,
8346 MachineBasicBlock *SwitchMBB) {
8347 assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
8348 "Clusters not sorted?");
8349
8350 assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
8351
8352 // Balance the tree based on branch weights to create a near-optimal (in terms
8353 // of search time given key frequency) binary search tree. See e.g. Kurt
8354 // Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
8355 CaseClusterIt LastLeft = W.FirstCluster;
8356 CaseClusterIt FirstRight = W.LastCluster;
8357 uint32_t LeftWeight = LastLeft->Weight + W.DefaultWeight / 2;
8358 uint32_t RightWeight = FirstRight->Weight + W.DefaultWeight / 2;
8359
8360 // Move LastLeft and FirstRight towards each other from opposite directions to
8361 // find a partitioning of the clusters which balances the weight on both
8362 // sides. If LeftWeight and RightWeight are equal, alternate which side is
8363 // taken to ensure 0-weight nodes are distributed evenly.
8364 unsigned I = 0;
8365 while (LastLeft + 1 < FirstRight) {
8366 if (LeftWeight < RightWeight || (LeftWeight == RightWeight && (I & 1)))
8367 LeftWeight += (++LastLeft)->Weight;
8368 else
8369 RightWeight += (--FirstRight)->Weight;
8370 I++;
8371 }
8372
8373 for (;;) {
8374 // Our binary search tree differs from a typical BST in that ours can have up
8375 // to three values in each leaf. The pivot selection above doesn't take that
8376 // into account, which means the tree might require more nodes and be less
8377 // efficient. We compensate for this here.
8378
8379 unsigned NumLeft = LastLeft - W.FirstCluster + 1;
8380 unsigned NumRight = W.LastCluster - FirstRight + 1;
8381
8382 if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) {
8383 // If one side has less than 3 clusters, and the other has more than 3,
8384 // consider taking a cluster from the other side.
8385
8386 if (NumLeft < NumRight) {
8387 // Consider moving the first cluster on the right to the left side.
8388 CaseCluster &CC = *FirstRight;
8389 unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
8390 unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
8391 if (LeftSideRank <= RightSideRank) {
8392 // Moving the cluster to the left does not demote it.
8393 ++LastLeft;
8394 ++FirstRight;
8395 continue;
8396 }
8397 } else {
8398 assert(NumRight < NumLeft);
8399 // Consider moving the last element on the left to the right side.
8400 CaseCluster &CC = *LastLeft;
8401 unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
8402 unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
8403 if (RightSideRank <= LeftSideRank) {
8404 // Moving the cluster to the right does not demot it.
8405 --LastLeft;
8406 --FirstRight;
8407 continue;
8408 }
8409 }
8410 }
8411 break;
8412 }
8413
8414 assert(LastLeft + 1 == FirstRight);
8415 assert(LastLeft >= W.FirstCluster);
8416 assert(FirstRight <= W.LastCluster);
8417
8418 // Use the first element on the right as pivot since we will make less-than
8419 // comparisons against it.
8420 CaseClusterIt PivotCluster = FirstRight;
8421 assert(PivotCluster > W.FirstCluster);
8422 assert(PivotCluster <= W.LastCluster);
8423
8424 CaseClusterIt FirstLeft = W.FirstCluster;
8425 CaseClusterIt LastRight = W.LastCluster;
8426
8427 const ConstantInt *Pivot = PivotCluster->Low;
8428
8429 // New blocks will be inserted immediately after the current one.
8430 MachineFunction::iterator BBI = W.MBB;
8431 ++BBI;
8432
8433 // We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
8434 // we can branch to its destination directly if it's squeezed exactly in
8435 // between the known lower bound and Pivot - 1.
8436 MachineBasicBlock *LeftMBB;
8437 if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range &&
8438 FirstLeft->Low == W.GE &&
8439 (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) {
8440 LeftMBB = FirstLeft->MBB;
8441 } else {
8442 LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
8443 FuncInfo.MF->insert(BBI, LeftMBB);
8444 WorkList.push_back(
8445 {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultWeight / 2});
8446 // Put Cond in a virtual register to make it available from the new blocks.
8447 ExportFromCurrentBlock(Cond);
8448 }
8449
8450 // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
8451 // single cluster, RHS.Low == Pivot, and we can branch to its destination
8452 // directly if RHS.High equals the current upper bound.
8453 MachineBasicBlock *RightMBB;
8454 if (FirstRight == LastRight && FirstRight->Kind == CC_Range &&
8455 W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) {
8456 RightMBB = FirstRight->MBB;
8457 } else {
8458 RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
8459 FuncInfo.MF->insert(BBI, RightMBB);
8460 WorkList.push_back(
8461 {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultWeight / 2});
8462 // Put Cond in a virtual register to make it available from the new blocks.
8463 ExportFromCurrentBlock(Cond);
8464 }
8465
8466 // Create the CaseBlock record that will be used to lower the branch.
8467 CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
8468 LeftWeight, RightWeight);
8469
8470 if (W.MBB == SwitchMBB)
8471 visitSwitchCase(CB, SwitchMBB);
8472 else
8473 SwitchCases.push_back(CB);
8474 }
8475
8476 void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
8477 // Extract cases from the switch.
8478 BranchProbabilityInfo *BPI = FuncInfo.BPI;
8479 CaseClusterVector Clusters;
8480 Clusters.reserve(SI.getNumCases());
8481 for (auto I : SI.cases()) {
8482 MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
8483 const ConstantInt *CaseVal = I.getCaseValue();
8484 uint32_t Weight =
8485 BPI ? BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()) : 0;
8486 Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight));
8487 }
8488
8489 MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
8490
8491 // Cluster adjacent cases with the same destination. We do this at all
8492 // optimization levels because it's cheap to do and will make codegen faster
8493 // if there are many clusters.
8494 sortAndRangeify(Clusters);
8495
8496 if (TM.getOptLevel() != CodeGenOpt::None) {
8497 // Replace an unreachable default with the most popular destination.
8498 // FIXME: Exploit unreachable default more aggressively.
8499 bool UnreachableDefault =
8500 isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg());
8501 if (UnreachableDefault && !Clusters.empty()) {
8502 DenseMap<const BasicBlock *, unsigned> Popularity;
8503 unsigned MaxPop = 0;
8504 const BasicBlock *MaxBB = nullptr;
8505 for (auto I : SI.cases()) {
8506 const BasicBlock *BB = I.getCaseSuccessor();
8507 if (++Popularity[BB] > MaxPop) {
8508 MaxPop = Popularity[BB];
8509 MaxBB = BB;
8510 }
8511 }
8512 // Set new default.
8513 assert(MaxPop > 0 && MaxBB);
8514 DefaultMBB = FuncInfo.MBBMap[MaxBB];
8515
8516 // Remove cases that were pointing to the destination that is now the
8517 // default.
8518 CaseClusterVector New;
8519 New.reserve(Clusters.size());
8520 for (CaseCluster &CC : Clusters) {
8521 if (CC.MBB != DefaultMBB)
8522 New.push_back(CC);
8523 }
8524 Clusters = std::move(New);
8525 }
8526 }
8527
8528 // If there is only the default destination, jump there directly.
8529 MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
8530 if (Clusters.empty()) {
8531 SwitchMBB->addSuccessor(DefaultMBB);
8532 if (DefaultMBB != NextBlock(SwitchMBB)) {
8533 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
8534 getControlRoot(), DAG.getBasicBlock(DefaultMBB)));
8535 }
8536 return;
8537 }
8538
8539 findJumpTables(Clusters, &SI, DefaultMBB);
8540 findBitTestClusters(Clusters, &SI);
8541
8542 DEBUG({
8543 dbgs() << "Case clusters: ";
8544 for (const CaseCluster &C : Clusters) {
8545 if (C.Kind == CC_JumpTable) dbgs() << "JT:";
8546 if (C.Kind == CC_BitTests) dbgs() << "BT:";
8547
8548 C.Low->getValue().print(dbgs(), true);
8549 if (C.Low != C.High) {
8550 dbgs() << '-';
8551 C.High->getValue().print(dbgs(), true);
8552 }
8553 dbgs() << ' ';
8554 }
8555 dbgs() << '\n';
8556 });
8557
8558 assert(!Clusters.empty());
8559 SwitchWorkList WorkList;
8560 CaseClusterIt First = Clusters.begin();
8561 CaseClusterIt Last = Clusters.end() - 1;
8562 uint32_t DefaultWeight = getEdgeWeight(SwitchMBB, DefaultMBB);
8563 WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultWeight});
8564
8565 while (!WorkList.empty()) {
8566 SwitchWorkListItem W = WorkList.back();
8567 WorkList.pop_back();
8568 unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
8569
8570 if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None) {
8571 // For optimized builds, lower large range as a balanced binary tree.
8572 splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
8573 continue;
8574 }
8575
8576 lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
8577 }
8578 }