Mercurial > hg > Members > tobaru > cbc > CbC_llvm
comparison lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @ 97:b0dd3743370f
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 14 Oct 2015 19:39:58 +0900 |
parents | d52ff4b80465 afa8332a0e37 |
children | 57be027de0f4 |
comparison
equal
deleted
inserted
replaced
94:d52ff4b80465 | 97:b0dd3743370f |
---|---|
20 #include "llvm/Analysis/AliasAnalysis.h" | 20 #include "llvm/Analysis/AliasAnalysis.h" |
21 #include "llvm/Analysis/BranchProbabilityInfo.h" | 21 #include "llvm/Analysis/BranchProbabilityInfo.h" |
22 #include "llvm/Analysis/ConstantFolding.h" | 22 #include "llvm/Analysis/ConstantFolding.h" |
23 #include "llvm/Analysis/TargetLibraryInfo.h" | 23 #include "llvm/Analysis/TargetLibraryInfo.h" |
24 #include "llvm/Analysis/ValueTracking.h" | 24 #include "llvm/Analysis/ValueTracking.h" |
25 #include "llvm/CodeGen/Analysis.h" | 25 #include "llvm/Analysis/VectorUtils.h" |
26 #include "llvm/CodeGen/FastISel.h" | 26 #include "llvm/CodeGen/FastISel.h" |
27 #include "llvm/CodeGen/FunctionLoweringInfo.h" | 27 #include "llvm/CodeGen/FunctionLoweringInfo.h" |
28 #include "llvm/CodeGen/GCMetadata.h" | 28 #include "llvm/CodeGen/GCMetadata.h" |
29 #include "llvm/CodeGen/GCStrategy.h" | 29 #include "llvm/CodeGen/GCStrategy.h" |
30 #include "llvm/CodeGen/MachineFrameInfo.h" | 30 #include "llvm/CodeGen/MachineFrameInfo.h" |
33 #include "llvm/CodeGen/MachineJumpTableInfo.h" | 33 #include "llvm/CodeGen/MachineJumpTableInfo.h" |
34 #include "llvm/CodeGen/MachineModuleInfo.h" | 34 #include "llvm/CodeGen/MachineModuleInfo.h" |
35 #include "llvm/CodeGen/MachineRegisterInfo.h" | 35 #include "llvm/CodeGen/MachineRegisterInfo.h" |
36 #include "llvm/CodeGen/SelectionDAG.h" | 36 #include "llvm/CodeGen/SelectionDAG.h" |
37 #include "llvm/CodeGen/StackMaps.h" | 37 #include "llvm/CodeGen/StackMaps.h" |
38 #include "llvm/CodeGen/WinEHFuncInfo.h" | |
38 #include "llvm/IR/CallingConv.h" | 39 #include "llvm/IR/CallingConv.h" |
39 #include "llvm/IR/Constants.h" | 40 #include "llvm/IR/Constants.h" |
40 #include "llvm/IR/DataLayout.h" | 41 #include "llvm/IR/DataLayout.h" |
41 #include "llvm/IR/DebugInfo.h" | 42 #include "llvm/IR/DebugInfo.h" |
42 #include "llvm/IR/DerivedTypes.h" | 43 #include "llvm/IR/DerivedTypes.h" |
61 #include "llvm/Target/TargetLowering.h" | 62 #include "llvm/Target/TargetLowering.h" |
62 #include "llvm/Target/TargetOptions.h" | 63 #include "llvm/Target/TargetOptions.h" |
63 #include "llvm/Target/TargetSelectionDAGInfo.h" | 64 #include "llvm/Target/TargetSelectionDAGInfo.h" |
64 #include "llvm/Target/TargetSubtargetInfo.h" | 65 #include "llvm/Target/TargetSubtargetInfo.h" |
65 #include <algorithm> | 66 #include <algorithm> |
67 #include <utility> | |
66 using namespace llvm; | 68 using namespace llvm; |
67 | 69 |
68 #define DEBUG_TYPE "isel" | 70 #define DEBUG_TYPE "isel" |
69 | 71 |
70 /// LimitFloatPrecision - Generate low-precision inline sequences for | 72 /// LimitFloatPrecision - Generate low-precision inline sequences for |
76 cl::desc("Generate low-precision inline sequences " | 78 cl::desc("Generate low-precision inline sequences " |
77 "for some float libcalls"), | 79 "for some float libcalls"), |
78 cl::location(LimitFloatPrecision), | 80 cl::location(LimitFloatPrecision), |
79 cl::init(0)); | 81 cl::init(0)); |
80 | 82 |
83 static cl::opt<bool> | |
84 EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden, | |
85 cl::desc("Enable fast-math-flags for DAG nodes")); | |
86 | |
81 // Limit the width of DAG chains. This is important in general to prevent | 87 // Limit the width of DAG chains. This is important in general to prevent |
82 // prevent DAG-based analysis from blowing up. For example, alias analysis and | 88 // DAG-based analysis from blowing up. For example, alias analysis and |
83 // load clustering may not complete in reasonable time. It is difficult to | 89 // load clustering may not complete in reasonable time. It is difficult to |
84 // recognize and avoid this situation within each individual analysis, and | 90 // recognize and avoid this situation within each individual analysis, and |
85 // future analyses are likely to have the same behavior. Limiting DAG width is | 91 // future analyses are likely to have the same behavior. Limiting DAG width is |
86 // the safe approach, and will be especially important with global DAGs. | 92 // the safe approach and will be especially important with global DAGs. |
87 // | 93 // |
88 // MaxParallelChains default is arbitrarily high to avoid affecting | 94 // MaxParallelChains default is arbitrarily high to avoid affecting |
89 // optimization, but could be lowered to improve compile time. Any ld-ld-st-st | 95 // optimization, but could be lowered to improve compile time. Any ld-ld-st-st |
90 // sequence over this should have been converted to llvm.memcpy by the | 96 // sequence over this should have been converted to llvm.memcpy by the |
91 // frontend. It easy to induce this behavior with .ll code such as: | 97 // frontend. It easy to induce this behavior with .ll code such as: |
140 } else { | 146 } else { |
141 Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); | 147 Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); |
142 Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); | 148 Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); |
143 } | 149 } |
144 | 150 |
145 if (TLI.isBigEndian()) | 151 if (DAG.getDataLayout().isBigEndian()) |
146 std::swap(Lo, Hi); | 152 std::swap(Lo, Hi); |
147 | 153 |
148 Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); | 154 Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); |
149 | 155 |
150 if (RoundParts < NumParts) { | 156 if (RoundParts < NumParts) { |
154 Hi = getCopyFromParts(DAG, DL, | 160 Hi = getCopyFromParts(DAG, DL, |
155 Parts + RoundParts, OddParts, PartVT, OddVT, V); | 161 Parts + RoundParts, OddParts, PartVT, OddVT, V); |
156 | 162 |
157 // Combine the round and odd parts. | 163 // Combine the round and odd parts. |
158 Lo = Val; | 164 Lo = Val; |
159 if (TLI.isBigEndian()) | 165 if (DAG.getDataLayout().isBigEndian()) |
160 std::swap(Lo, Hi); | 166 std::swap(Lo, Hi); |
161 EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); | 167 EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); |
162 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); | 168 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); |
163 Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, | 169 Hi = |
164 DAG.getConstant(Lo.getValueType().getSizeInBits(), | 170 DAG.getNode(ISD::SHL, DL, TotalVT, Hi, |
165 TLI.getPointerTy())); | 171 DAG.getConstant(Lo.getValueType().getSizeInBits(), DL, |
172 TLI.getPointerTy(DAG.getDataLayout()))); | |
166 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); | 173 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); |
167 Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); | 174 Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); |
168 } | 175 } |
169 } else if (PartVT.isFloatingPoint()) { | 176 } else if (PartVT.isFloatingPoint()) { |
170 // FP split into multiple FP parts (for ppcf128) | 177 // FP split into multiple FP parts (for ppcf128) |
171 assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 && | 178 assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 && |
172 "Unexpected split"); | 179 "Unexpected split"); |
173 SDValue Lo, Hi; | 180 SDValue Lo, Hi; |
174 Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); | 181 Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); |
175 Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); | 182 Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); |
176 if (TLI.hasBigEndianPartOrdering(ValueVT)) | 183 if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) |
177 std::swap(Lo, Hi); | 184 std::swap(Lo, Hi); |
178 Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); | 185 Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); |
179 } else { | 186 } else { |
180 // FP split into integer parts (soft fp) | 187 // FP split into integer parts (soft fp) |
181 assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && | 188 assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && |
205 } | 212 } |
206 | 213 |
207 if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { | 214 if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { |
208 // FP_ROUND's are always exact here. | 215 // FP_ROUND's are always exact here. |
209 if (ValueVT.bitsLT(Val.getValueType())) | 216 if (ValueVT.bitsLT(Val.getValueType())) |
210 return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, | 217 return DAG.getNode( |
211 DAG.getTargetConstant(1, TLI.getPointerTy())); | 218 ISD::FP_ROUND, DL, ValueVT, Val, |
219 DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()))); | |
212 | 220 |
213 return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); | 221 return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); |
214 } | 222 } |
215 | 223 |
216 if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) | 224 if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) |
255 TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, | 263 TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, |
256 NumIntermediates, RegisterVT); | 264 NumIntermediates, RegisterVT); |
257 assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); | 265 assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); |
258 NumParts = NumRegs; // Silence a compiler warning. | 266 NumParts = NumRegs; // Silence a compiler warning. |
259 assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); | 267 assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); |
260 assert(RegisterVT == Parts[0].getSimpleValueType() && | 268 assert(RegisterVT.getSizeInBits() == |
261 "Part type doesn't match part!"); | 269 Parts[0].getSimpleValueType().getSizeInBits() && |
270 "Part type sizes don't match!"); | |
262 | 271 |
263 // Assemble the parts into intermediate operands. | 272 // Assemble the parts into intermediate operands. |
264 SmallVector<SDValue, 8> Ops(NumIntermediates); | 273 SmallVector<SDValue, 8> Ops(NumIntermediates); |
265 if (NumIntermediates == NumParts) { | 274 if (NumIntermediates == NumParts) { |
266 // If the register was not expanded, truncate or copy the value, | 275 // If the register was not expanded, truncate or copy the value, |
298 // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the | 307 // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the |
299 // elements we want. | 308 // elements we want. |
300 if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { | 309 if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { |
301 assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && | 310 assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && |
302 "Cannot narrow, it would be a lossy transformation"); | 311 "Cannot narrow, it would be a lossy transformation"); |
303 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, | 312 return DAG.getNode( |
304 DAG.getConstant(0, TLI.getVectorIdxTy())); | 313 ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, |
314 DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); | |
305 } | 315 } |
306 | 316 |
307 // Vector/Vector bitcast. | 317 // Vector/Vector bitcast. |
308 if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) | 318 if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) |
309 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); | 319 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); |
310 | 320 |
311 assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && | 321 assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && |
312 "Cannot handle this kind of promotion"); | 322 "Cannot handle this kind of promotion"); |
313 // Promoted vector extract | 323 // Promoted vector extract |
314 bool Smaller = ValueVT.bitsLE(PartEVT); | 324 return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); |
315 return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), | |
316 DL, ValueVT, Val); | |
317 | 325 |
318 } | 326 } |
319 | 327 |
320 // Trivial bitcast if the types are the same size and the destination | 328 // Trivial bitcast if the types are the same size and the destination |
321 // vector type is legal. | 329 // vector type is legal. |
329 "non-trivial scalar-to-vector conversion"); | 337 "non-trivial scalar-to-vector conversion"); |
330 return DAG.getUNDEF(ValueVT); | 338 return DAG.getUNDEF(ValueVT); |
331 } | 339 } |
332 | 340 |
333 if (ValueVT.getVectorNumElements() == 1 && | 341 if (ValueVT.getVectorNumElements() == 1 && |
334 ValueVT.getVectorElementType() != PartEVT) { | 342 ValueVT.getVectorElementType() != PartEVT) |
335 bool Smaller = ValueVT.bitsLE(PartEVT); | 343 Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType()); |
336 Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), | |
337 DL, ValueVT.getScalarType(), Val); | |
338 } | |
339 | 344 |
340 return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); | 345 return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); |
341 } | 346 } |
342 | 347 |
343 static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc dl, | 348 static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc dl, |
355 | 360 |
356 // Handle the vector case separately. | 361 // Handle the vector case separately. |
357 if (ValueVT.isVector()) | 362 if (ValueVT.isVector()) |
358 return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); | 363 return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); |
359 | 364 |
360 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | |
361 unsigned PartBits = PartVT.getSizeInBits(); | 365 unsigned PartBits = PartVT.getSizeInBits(); |
362 unsigned OrigNumParts = NumParts; | 366 unsigned OrigNumParts = NumParts; |
363 assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); | 367 assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) && |
368 "Copying to an illegal type!"); | |
364 | 369 |
365 if (NumParts == 0) | 370 if (NumParts == 0) |
366 return; | 371 return; |
367 | 372 |
368 assert(!ValueVT.isVector() && "Vector case handled elsewhere"); | 373 assert(!ValueVT.isVector() && "Vector case handled elsewhere"); |
423 "Do not know what to expand to!"); | 428 "Do not know what to expand to!"); |
424 unsigned RoundParts = 1 << Log2_32(NumParts); | 429 unsigned RoundParts = 1 << Log2_32(NumParts); |
425 unsigned RoundBits = RoundParts * PartBits; | 430 unsigned RoundBits = RoundParts * PartBits; |
426 unsigned OddParts = NumParts - RoundParts; | 431 unsigned OddParts = NumParts - RoundParts; |
427 SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, | 432 SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, |
428 DAG.getIntPtrConstant(RoundBits)); | 433 DAG.getIntPtrConstant(RoundBits, DL)); |
429 getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); | 434 getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); |
430 | 435 |
431 if (TLI.isBigEndian()) | 436 if (DAG.getDataLayout().isBigEndian()) |
432 // The odd parts were reversed by getCopyToParts - unreverse them. | 437 // The odd parts were reversed by getCopyToParts - unreverse them. |
433 std::reverse(Parts + RoundParts, Parts + NumParts); | 438 std::reverse(Parts + RoundParts, Parts + NumParts); |
434 | 439 |
435 NumParts = RoundParts; | 440 NumParts = RoundParts; |
436 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); | 441 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); |
450 EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); | 455 EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); |
451 SDValue &Part0 = Parts[i]; | 456 SDValue &Part0 = Parts[i]; |
452 SDValue &Part1 = Parts[i+StepSize/2]; | 457 SDValue &Part1 = Parts[i+StepSize/2]; |
453 | 458 |
454 Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, | 459 Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, |
455 ThisVT, Part0, DAG.getIntPtrConstant(1)); | 460 ThisVT, Part0, DAG.getIntPtrConstant(1, DL)); |
456 Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, | 461 Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, |
457 ThisVT, Part0, DAG.getIntPtrConstant(0)); | 462 ThisVT, Part0, DAG.getIntPtrConstant(0, DL)); |
458 | 463 |
459 if (ThisBits == PartBits && ThisVT != PartVT) { | 464 if (ThisBits == PartBits && ThisVT != PartVT) { |
460 Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); | 465 Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); |
461 Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1); | 466 Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1); |
462 } | 467 } |
463 } | 468 } |
464 } | 469 } |
465 | 470 |
466 if (TLI.isBigEndian()) | 471 if (DAG.getDataLayout().isBigEndian()) |
467 std::reverse(Parts, Parts + OrigNumParts); | 472 std::reverse(Parts, Parts + OrigNumParts); |
468 } | 473 } |
469 | 474 |
470 | 475 |
471 /// getCopyToPartsVector - Create a series of nodes that contain the specified | 476 /// getCopyToPartsVector - Create a series of nodes that contain the specified |
490 EVT ElementVT = PartVT.getVectorElementType(); | 495 EVT ElementVT = PartVT.getVectorElementType(); |
491 // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in | 496 // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in |
492 // undef elements. | 497 // undef elements. |
493 SmallVector<SDValue, 16> Ops; | 498 SmallVector<SDValue, 16> Ops; |
494 for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) | 499 for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) |
495 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, | 500 Ops.push_back(DAG.getNode( |
496 ElementVT, Val, DAG.getConstant(i, | 501 ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val, |
497 TLI.getVectorIdxTy()))); | 502 DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())))); |
498 | 503 |
499 for (unsigned i = ValueVT.getVectorNumElements(), | 504 for (unsigned i = ValueVT.getVectorNumElements(), |
500 e = PartVT.getVectorNumElements(); i != e; ++i) | 505 e = PartVT.getVectorNumElements(); i != e; ++i) |
501 Ops.push_back(DAG.getUNDEF(ElementVT)); | 506 Ops.push_back(DAG.getUNDEF(ElementVT)); |
502 | 507 |
510 PartEVT.getVectorElementType().bitsGE( | 515 PartEVT.getVectorElementType().bitsGE( |
511 ValueVT.getVectorElementType()) && | 516 ValueVT.getVectorElementType()) && |
512 PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { | 517 PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { |
513 | 518 |
514 // Promoted vector extract | 519 // Promoted vector extract |
515 bool Smaller = PartEVT.bitsLE(ValueVT); | 520 Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); |
516 Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), | |
517 DL, PartVT, Val); | |
518 } else{ | 521 } else{ |
519 // Vector -> scalar conversion. | 522 // Vector -> scalar conversion. |
520 assert(ValueVT.getVectorNumElements() == 1 && | 523 assert(ValueVT.getVectorNumElements() == 1 && |
521 "Only trivial vector-to-scalar conversions should get here!"); | 524 "Only trivial vector-to-scalar conversions should get here!"); |
522 Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, | 525 Val = DAG.getNode( |
523 PartVT, Val, DAG.getConstant(0, TLI.getVectorIdxTy())); | 526 ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, |
524 | 527 DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); |
525 bool Smaller = ValueVT.bitsLE(PartVT); | 528 |
526 Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), | 529 Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); |
527 DL, PartVT, Val); | |
528 } | 530 } |
529 | 531 |
530 Parts[0] = Val; | 532 Parts[0] = Val; |
531 return; | 533 return; |
532 } | 534 } |
546 | 548 |
547 // Split the vector into intermediate operands. | 549 // Split the vector into intermediate operands. |
548 SmallVector<SDValue, 8> Ops(NumIntermediates); | 550 SmallVector<SDValue, 8> Ops(NumIntermediates); |
549 for (unsigned i = 0; i != NumIntermediates; ++i) { | 551 for (unsigned i = 0; i != NumIntermediates; ++i) { |
550 if (IntermediateVT.isVector()) | 552 if (IntermediateVT.isVector()) |
551 Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, | 553 Ops[i] = |
552 IntermediateVT, Val, | 554 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, |
553 DAG.getConstant(i * (NumElements / NumIntermediates), | 555 DAG.getConstant(i * (NumElements / NumIntermediates), DL, |
554 TLI.getVectorIdxTy())); | 556 TLI.getVectorIdxTy(DAG.getDataLayout()))); |
555 else | 557 else |
556 Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, | 558 Ops[i] = DAG.getNode( |
557 IntermediateVT, Val, | 559 ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, |
558 DAG.getConstant(i, TLI.getVectorIdxTy())); | 560 DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); |
559 } | 561 } |
560 | 562 |
561 // Split the intermediate operands into legal parts. | 563 // Split the intermediate operands into legal parts. |
562 if (NumParts == NumIntermediates) { | 564 if (NumParts == NumIntermediates) { |
563 // If the register was not expanded, promote or copy the value, | 565 // If the register was not expanded, promote or copy the value, |
574 for (unsigned i = 0; i != NumIntermediates; ++i) | 576 for (unsigned i = 0; i != NumIntermediates; ++i) |
575 getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V); | 577 getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V); |
576 } | 578 } |
577 } | 579 } |
578 | 580 |
579 namespace { | 581 RegsForValue::RegsForValue() {} |
580 /// RegsForValue - This struct represents the registers (physical or virtual) | 582 |
581 /// that a particular set of values is assigned, and the type information | 583 RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, |
582 /// about the value. The most common situation is to represent one value at a | 584 EVT valuevt) |
583 /// time, but struct or array values are handled element-wise as multiple | 585 : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} |
584 /// values. The splitting of aggregates is performed recursively, so that we | 586 |
585 /// never have aggregate-typed registers. The values at this point do not | 587 RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, |
586 /// necessarily have legal types, so each value may require one or more | 588 const DataLayout &DL, unsigned Reg, Type *Ty) { |
587 /// registers of some legal type. | 589 ComputeValueVTs(TLI, DL, Ty, ValueVTs); |
588 /// | 590 |
589 struct RegsForValue { | 591 for (EVT ValueVT : ValueVTs) { |
590 /// ValueVTs - The value types of the values, which may not be legal, and | 592 unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT); |
591 /// may need be promoted or synthesized from one or more registers. | 593 MVT RegisterVT = TLI.getRegisterType(Context, ValueVT); |
592 /// | 594 for (unsigned i = 0; i != NumRegs; ++i) |
593 SmallVector<EVT, 4> ValueVTs; | 595 Regs.push_back(Reg + i); |
594 | 596 RegVTs.push_back(RegisterVT); |
595 /// RegVTs - The value types of the registers. This is the same size as | 597 Reg += NumRegs; |
596 /// ValueVTs and it records, for each value, what the type of the assigned | 598 } |
597 /// register or registers are. (Individual values are never synthesized | |
598 /// from more than one type of register.) | |
599 /// | |
600 /// With virtual registers, the contents of RegVTs is redundant with TLI's | |
601 /// getRegisterType member function, however when with physical registers | |
602 /// it is necessary to have a separate record of the types. | |
603 /// | |
604 SmallVector<MVT, 4> RegVTs; | |
605 | |
606 /// Regs - This list holds the registers assigned to the values. | |
607 /// Each legal or promoted value requires one register, and each | |
608 /// expanded value requires multiple registers. | |
609 /// | |
610 SmallVector<unsigned, 4> Regs; | |
611 | |
612 RegsForValue() {} | |
613 | |
614 RegsForValue(const SmallVector<unsigned, 4> ®s, | |
615 MVT regvt, EVT valuevt) | |
616 : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} | |
617 | |
618 RegsForValue(LLVMContext &Context, const TargetLowering &tli, | |
619 unsigned Reg, Type *Ty) { | |
620 ComputeValueVTs(tli, Ty, ValueVTs); | |
621 | |
622 for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { | |
623 EVT ValueVT = ValueVTs[Value]; | |
624 unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); | |
625 MVT RegisterVT = tli.getRegisterType(Context, ValueVT); | |
626 for (unsigned i = 0; i != NumRegs; ++i) | |
627 Regs.push_back(Reg + i); | |
628 RegVTs.push_back(RegisterVT); | |
629 Reg += NumRegs; | |
630 } | |
631 } | |
632 | |
633 /// append - Add the specified values to this one. | |
634 void append(const RegsForValue &RHS) { | |
635 ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); | |
636 RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); | |
637 Regs.append(RHS.Regs.begin(), RHS.Regs.end()); | |
638 } | |
639 | |
640 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from | |
641 /// this value and returns the result as a ValueVTs value. This uses | |
642 /// Chain/Flag as the input and updates them for the output Chain/Flag. | |
643 /// If the Flag pointer is NULL, no flag is used. | |
644 SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, | |
645 SDLoc dl, | |
646 SDValue &Chain, SDValue *Flag, | |
647 const Value *V = nullptr) const; | |
648 | |
649 /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the | |
650 /// specified value into the registers specified by this object. This uses | |
651 /// Chain/Flag as the input and updates them for the output Chain/Flag. | |
652 /// If the Flag pointer is NULL, no flag is used. | |
653 void | |
654 getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, | |
655 SDValue *Flag, const Value *V, | |
656 ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; | |
657 | |
658 /// AddInlineAsmOperands - Add this value to the specified inlineasm node | |
659 /// operand list. This adds the code marker, matching input operand index | |
660 /// (if applicable), and includes the number of values added into it. | |
661 void AddInlineAsmOperands(unsigned Kind, | |
662 bool HasMatching, unsigned MatchingIdx, | |
663 SelectionDAG &DAG, | |
664 std::vector<SDValue> &Ops) const; | |
665 }; | |
666 } | 599 } |
667 | 600 |
668 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from | 601 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from |
669 /// this value and returns the result as a ValueVT value. This uses | 602 /// this value and returns the result as a ValueVT value. This uses |
670 /// Chain/Flag as the input and updates them for the output Chain/Flag. | 603 /// Chain/Flag as the input and updates them for the output Chain/Flag. |
719 | 652 |
720 if (NumZeroBits == RegSize) { | 653 if (NumZeroBits == RegSize) { |
721 // The current value is a zero. | 654 // The current value is a zero. |
722 // Explicitly express that as it would be easier for | 655 // Explicitly express that as it would be easier for |
723 // optimizations to kick in. | 656 // optimizations to kick in. |
724 Parts[i] = DAG.getConstant(0, RegisterVT); | 657 Parts[i] = DAG.getConstant(0, dl, RegisterVT); |
725 continue; | 658 continue; |
726 } | 659 } |
727 | 660 |
728 // FIXME: We capture more information than the dag can represent. For | 661 // FIXME: We capture more information than the dag can represent. For |
729 // now, just use the tightest assertzext/assertsext possible. | 662 // now, just use the tightest assertzext/assertsext possible. |
821 | 754 |
822 /// AddInlineAsmOperands - Add this value to the specified inlineasm node | 755 /// AddInlineAsmOperands - Add this value to the specified inlineasm node |
823 /// operand list. This adds the code marker and includes the number of | 756 /// operand list. This adds the code marker and includes the number of |
824 /// values added into it. | 757 /// values added into it. |
825 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, | 758 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, |
826 unsigned MatchingIdx, | 759 unsigned MatchingIdx, SDLoc dl, |
827 SelectionDAG &DAG, | 760 SelectionDAG &DAG, |
828 std::vector<SDValue> &Ops) const { | 761 std::vector<SDValue> &Ops) const { |
829 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 762 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
830 | 763 |
831 unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); | 764 unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); |
841 const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); | 774 const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); |
842 const TargetRegisterClass *RC = MRI.getRegClass(Regs.front()); | 775 const TargetRegisterClass *RC = MRI.getRegClass(Regs.front()); |
843 Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); | 776 Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); |
844 } | 777 } |
845 | 778 |
846 SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); | 779 SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32); |
847 Ops.push_back(Res); | 780 Ops.push_back(Res); |
848 | 781 |
849 unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); | 782 unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); |
850 for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { | 783 for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { |
851 unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); | 784 unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); |
856 Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); | 789 Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); |
857 | 790 |
858 if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { | 791 if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { |
859 // If we clobbered the stack pointer, MFI should know about it. | 792 // If we clobbered the stack pointer, MFI should know about it. |
860 assert(DAG.getMachineFunction().getFrameInfo()-> | 793 assert(DAG.getMachineFunction().getFrameInfo()-> |
861 hasInlineAsmWithSPAdjust()); | 794 hasOpaqueSPAdjustment()); |
862 } | 795 } |
863 } | 796 } |
864 } | 797 } |
865 } | 798 } |
866 | 799 |
867 void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, | 800 void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, |
868 const TargetLibraryInfo *li) { | 801 const TargetLibraryInfo *li) { |
869 AA = &aa; | 802 AA = &aa; |
870 GFI = gfi; | 803 GFI = gfi; |
871 LibInfo = li; | 804 LibInfo = li; |
872 DL = DAG.getTarget().getDataLayout(); | 805 DL = &DAG.getDataLayout(); |
873 Context = DAG.getContext(); | 806 Context = DAG.getContext(); |
874 LPadToCallSiteMap.clear(); | 807 LPadToCallSiteMap.clear(); |
875 } | 808 } |
876 | 809 |
877 /// clear - Clear out the current SelectionDAG and the associated | 810 /// clear - Clear out the current SelectionDAG and the associated |
995 DanglingDebugInfo &DDI = DanglingDebugInfoMap[V]; | 928 DanglingDebugInfo &DDI = DanglingDebugInfoMap[V]; |
996 if (DDI.getDI()) { | 929 if (DDI.getDI()) { |
997 const DbgValueInst *DI = DDI.getDI(); | 930 const DbgValueInst *DI = DDI.getDI(); |
998 DebugLoc dl = DDI.getdl(); | 931 DebugLoc dl = DDI.getdl(); |
999 unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); | 932 unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); |
1000 MDNode *Variable = DI->getVariable(); | 933 DILocalVariable *Variable = DI->getVariable(); |
1001 MDNode *Expr = DI->getExpression(); | 934 DIExpression *Expr = DI->getExpression(); |
935 assert(Variable->isValidLocationForIntrinsic(dl) && | |
936 "Expected inlined-at fields to agree"); | |
1002 uint64_t Offset = DI->getOffset(); | 937 uint64_t Offset = DI->getOffset(); |
1003 // A dbg.value for an alloca is always indirect. | 938 // A dbg.value for an alloca is always indirect. |
1004 bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; | 939 bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; |
1005 SDDbgValue *SDV; | 940 SDDbgValue *SDV; |
1006 if (Val.getNode()) { | 941 if (Val.getNode()) { |
1007 if (!EmitFuncArgumentDbgValue(V, Variable, Expr, Offset, IsIndirect, | 942 if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, IsIndirect, |
1008 Val)) { | 943 Val)) { |
1009 SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(), | 944 SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(), |
1010 IsIndirect, Offset, dl, DbgSDNodeOrder); | 945 IsIndirect, Offset, dl, DbgSDNodeOrder); |
1011 DAG.AddDbgValue(SDV, Val.getNode(), false); | 946 DAG.AddDbgValue(SDV, Val.getNode(), false); |
1012 } | 947 } |
1014 DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); | 949 DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); |
1015 DanglingDebugInfoMap[V] = DanglingDebugInfo(); | 950 DanglingDebugInfoMap[V] = DanglingDebugInfo(); |
1016 } | 951 } |
1017 } | 952 } |
1018 | 953 |
954 /// getCopyFromRegs - If there was virtual register allocated for the value V | |
955 /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. | |
956 SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { | |
957 DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); | |
958 SDValue Result; | |
959 | |
960 if (It != FuncInfo.ValueMap.end()) { | |
961 unsigned InReg = It->second; | |
962 RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), | |
963 DAG.getDataLayout(), InReg, Ty); | |
964 SDValue Chain = DAG.getEntryNode(); | |
965 Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); | |
966 resolveDanglingDebugInfo(V, Result); | |
967 } | |
968 | |
969 return Result; | |
970 } | |
971 | |
1019 /// getValue - Return an SDValue for the given Value. | 972 /// getValue - Return an SDValue for the given Value. |
1020 SDValue SelectionDAGBuilder::getValue(const Value *V) { | 973 SDValue SelectionDAGBuilder::getValue(const Value *V) { |
1021 // If we already have an SDValue for this value, use it. It's important | 974 // If we already have an SDValue for this value, use it. It's important |
1022 // to do this first, so that we don't create a CopyFromReg if we already | 975 // to do this first, so that we don't create a CopyFromReg if we already |
1023 // have a regular SDValue. | 976 // have a regular SDValue. |
1024 SDValue &N = NodeMap[V]; | 977 SDValue &N = NodeMap[V]; |
1025 if (N.getNode()) return N; | 978 if (N.getNode()) return N; |
1026 | 979 |
1027 // If there's a virtual register allocated and initialized for this | 980 // If there's a virtual register allocated and initialized for this |
1028 // value, use it. | 981 // value, use it. |
1029 DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); | 982 SDValue copyFromReg = getCopyFromRegs(V, V->getType()); |
1030 if (It != FuncInfo.ValueMap.end()) { | 983 if (copyFromReg.getNode()) { |
1031 unsigned InReg = It->second; | 984 return copyFromReg; |
1032 RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg, | |
1033 V->getType()); | |
1034 SDValue Chain = DAG.getEntryNode(); | |
1035 N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); | |
1036 resolveDanglingDebugInfo(V, N); | |
1037 return N; | |
1038 } | 985 } |
1039 | 986 |
1040 // Otherwise create a new SDValue and remember it. | 987 // Otherwise create a new SDValue and remember it. |
1041 SDValue Val = getValueImpl(V); | 988 SDValue Val = getValueImpl(V); |
1042 NodeMap[V] = Val; | 989 NodeMap[V] = Val; |
1043 resolveDanglingDebugInfo(V, Val); | 990 resolveDanglingDebugInfo(V, Val); |
1044 return Val; | 991 return Val; |
1045 } | 992 } |
1046 | 993 |
994 // Return true if SDValue exists for the given Value | |
995 bool SelectionDAGBuilder::findValue(const Value *V) const { | |
996 return (NodeMap.find(V) != NodeMap.end()) || | |
997 (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end()); | |
998 } | |
999 | |
1047 /// getNonRegisterValue - Return an SDValue for the given Value, but | 1000 /// getNonRegisterValue - Return an SDValue for the given Value, but |
1048 /// don't look in FuncInfo.ValueMap for a virtual register. | 1001 /// don't look in FuncInfo.ValueMap for a virtual register. |
1049 SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { | 1002 SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { |
1050 // If we already have an SDValue for this value, use it. | 1003 // If we already have an SDValue for this value, use it. |
1051 SDValue &N = NodeMap[V]; | 1004 SDValue &N = NodeMap[V]; |
1052 if (N.getNode()) return N; | 1005 if (N.getNode()) { |
1006 if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) { | |
1007 // Remove the debug location from the node as the node is about to be used | |
1008 // in a location which may differ from the original debug location. This | |
1009 // is relevant to Constant and ConstantFP nodes because they can appear | |
1010 // as constant expressions inside PHI nodes. | |
1011 N->setDebugLoc(DebugLoc()); | |
1012 } | |
1013 return N; | |
1014 } | |
1053 | 1015 |
1054 // Otherwise create a new SDValue and remember it. | 1016 // Otherwise create a new SDValue and remember it. |
1055 SDValue Val = getValueImpl(V); | 1017 SDValue Val = getValueImpl(V); |
1056 NodeMap[V] = Val; | 1018 NodeMap[V] = Val; |
1057 resolveDanglingDebugInfo(V, Val); | 1019 resolveDanglingDebugInfo(V, Val); |
1062 /// Create an SDValue for the given value. | 1024 /// Create an SDValue for the given value. |
1063 SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { | 1025 SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { |
1064 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 1026 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
1065 | 1027 |
1066 if (const Constant *C = dyn_cast<Constant>(V)) { | 1028 if (const Constant *C = dyn_cast<Constant>(V)) { |
1067 EVT VT = TLI.getValueType(V->getType(), true); | 1029 EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true); |
1068 | 1030 |
1069 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) | 1031 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) |
1070 return DAG.getConstant(*CI, VT); | 1032 return DAG.getConstant(*CI, getCurSDLoc(), VT); |
1071 | 1033 |
1072 if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) | 1034 if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) |
1073 return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); | 1035 return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); |
1074 | 1036 |
1075 if (isa<ConstantPointerNull>(C)) { | 1037 if (isa<ConstantPointerNull>(C)) { |
1076 unsigned AS = V->getType()->getPointerAddressSpace(); | 1038 unsigned AS = V->getType()->getPointerAddressSpace(); |
1077 return DAG.getConstant(0, TLI.getPointerTy(AS)); | 1039 return DAG.getConstant(0, getCurSDLoc(), |
1040 TLI.getPointerTy(DAG.getDataLayout(), AS)); | |
1078 } | 1041 } |
1079 | 1042 |
1080 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) | 1043 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) |
1081 return DAG.getConstantFP(*CFP, VT); | 1044 return DAG.getConstantFP(*CFP, getCurSDLoc(), VT); |
1082 | 1045 |
1083 if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) | 1046 if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) |
1084 return DAG.getUNDEF(VT); | 1047 return DAG.getUNDEF(VT); |
1085 | 1048 |
1086 if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { | 1049 if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { |
1126 if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { | 1089 if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { |
1127 assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && | 1090 assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && |
1128 "Unknown struct or array constant!"); | 1091 "Unknown struct or array constant!"); |
1129 | 1092 |
1130 SmallVector<EVT, 4> ValueVTs; | 1093 SmallVector<EVT, 4> ValueVTs; |
1131 ComputeValueVTs(TLI, C->getType(), ValueVTs); | 1094 ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs); |
1132 unsigned NumElts = ValueVTs.size(); | 1095 unsigned NumElts = ValueVTs.size(); |
1133 if (NumElts == 0) | 1096 if (NumElts == 0) |
1134 return SDValue(); // empty struct | 1097 return SDValue(); // empty struct |
1135 SmallVector<SDValue, 4> Constants(NumElts); | 1098 SmallVector<SDValue, 4> Constants(NumElts); |
1136 for (unsigned i = 0; i != NumElts; ++i) { | 1099 for (unsigned i = 0; i != NumElts; ++i) { |
1137 EVT EltVT = ValueVTs[i]; | 1100 EVT EltVT = ValueVTs[i]; |
1138 if (isa<UndefValue>(C)) | 1101 if (isa<UndefValue>(C)) |
1139 Constants[i] = DAG.getUNDEF(EltVT); | 1102 Constants[i] = DAG.getUNDEF(EltVT); |
1140 else if (EltVT.isFloatingPoint()) | 1103 else if (EltVT.isFloatingPoint()) |
1141 Constants[i] = DAG.getConstantFP(0, EltVT); | 1104 Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT); |
1142 else | 1105 else |
1143 Constants[i] = DAG.getConstant(0, EltVT); | 1106 Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT); |
1144 } | 1107 } |
1145 | 1108 |
1146 return DAG.getMergeValues(Constants, getCurSDLoc()); | 1109 return DAG.getMergeValues(Constants, getCurSDLoc()); |
1147 } | 1110 } |
1148 | 1111 |
1158 if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) { | 1121 if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) { |
1159 for (unsigned i = 0; i != NumElements; ++i) | 1122 for (unsigned i = 0; i != NumElements; ++i) |
1160 Ops.push_back(getValue(CV->getOperand(i))); | 1123 Ops.push_back(getValue(CV->getOperand(i))); |
1161 } else { | 1124 } else { |
1162 assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); | 1125 assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); |
1163 EVT EltVT = TLI.getValueType(VecTy->getElementType()); | 1126 EVT EltVT = |
1127 TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType()); | |
1164 | 1128 |
1165 SDValue Op; | 1129 SDValue Op; |
1166 if (EltVT.isFloatingPoint()) | 1130 if (EltVT.isFloatingPoint()) |
1167 Op = DAG.getConstantFP(0, EltVT); | 1131 Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT); |
1168 else | 1132 else |
1169 Op = DAG.getConstant(0, EltVT); | 1133 Op = DAG.getConstant(0, getCurSDLoc(), EltVT); |
1170 Ops.assign(NumElements, Op); | 1134 Ops.assign(NumElements, Op); |
1171 } | 1135 } |
1172 | 1136 |
1173 // Create a BUILD_VECTOR node. | 1137 // Create a BUILD_VECTOR node. |
1174 return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops); | 1138 return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops); |
1178 // computation. | 1142 // computation. |
1179 if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { | 1143 if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { |
1180 DenseMap<const AllocaInst*, int>::iterator SI = | 1144 DenseMap<const AllocaInst*, int>::iterator SI = |
1181 FuncInfo.StaticAllocaMap.find(AI); | 1145 FuncInfo.StaticAllocaMap.find(AI); |
1182 if (SI != FuncInfo.StaticAllocaMap.end()) | 1146 if (SI != FuncInfo.StaticAllocaMap.end()) |
1183 return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); | 1147 return DAG.getFrameIndex(SI->second, |
1148 TLI.getPointerTy(DAG.getDataLayout())); | |
1184 } | 1149 } |
1185 | 1150 |
1186 // If this is an instruction which fast-isel has deferred, select it now. | 1151 // If this is an instruction which fast-isel has deferred, select it now. |
1187 if (const Instruction *Inst = dyn_cast<Instruction>(V)) { | 1152 if (const Instruction *Inst = dyn_cast<Instruction>(V)) { |
1188 unsigned InReg = FuncInfo.InitializeRegForValue(Inst); | 1153 unsigned InReg = FuncInfo.InitializeRegForValue(Inst); |
1189 RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); | 1154 RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, |
1155 Inst->getType()); | |
1190 SDValue Chain = DAG.getEntryNode(); | 1156 SDValue Chain = DAG.getEntryNode(); |
1191 return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); | 1157 return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); |
1192 } | 1158 } |
1193 | 1159 |
1194 llvm_unreachable("Can't get register for value!"); | 1160 llvm_unreachable("Can't get register for value!"); |
1195 } | 1161 } |
1196 | 1162 |
1163 void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) { | |
1164 auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); | |
1165 bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX; | |
1166 bool IsSEH = isAsynchronousEHPersonality(Pers); | |
1167 bool IsCoreCLR = Pers == EHPersonality::CoreCLR; | |
1168 MachineBasicBlock *CatchPadMBB = FuncInfo.MBB; | |
1169 // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues. | |
1170 if (IsMSVCCXX || IsCoreCLR) | |
1171 CatchPadMBB->setIsEHFuncletEntry(); | |
1172 | |
1173 MachineBasicBlock *NormalDestMBB = FuncInfo.MBBMap[I.getNormalDest()]; | |
1174 | |
1175 // Update machine-CFG edge. | |
1176 FuncInfo.MBB->addSuccessor(NormalDestMBB); | |
1177 | |
1178 // CatchPads in SEH are not funclets, they are merely markers which indicate | |
1179 // where to insert register restoration code. | |
1180 if (IsSEH) { | |
1181 DAG.setRoot(DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other, | |
1182 getControlRoot(), DAG.getBasicBlock(NormalDestMBB), | |
1183 DAG.getBasicBlock(FuncInfo.MF->begin()))); | |
1184 return; | |
1185 } | |
1186 | |
1187 // If this is not a fall-through branch or optimizations are switched off, | |
1188 // emit the branch. | |
1189 if (NormalDestMBB != NextBlock(CatchPadMBB) || | |
1190 TM.getOptLevel() == CodeGenOpt::None) | |
1191 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, | |
1192 getControlRoot(), | |
1193 DAG.getBasicBlock(NormalDestMBB))); | |
1194 } | |
1195 | |
1196 void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { | |
1197 // Update machine-CFG edge. | |
1198 MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()]; | |
1199 FuncInfo.MBB->addSuccessor(TargetMBB); | |
1200 | |
1201 auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); | |
1202 bool IsSEH = isAsynchronousEHPersonality(Pers); | |
1203 if (IsSEH) { | |
1204 // If this is not a fall-through branch or optimizations are switched off, | |
1205 // emit the branch. | |
1206 if (TargetMBB != NextBlock(FuncInfo.MBB) || | |
1207 TM.getOptLevel() == CodeGenOpt::None) | |
1208 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, | |
1209 getControlRoot(), DAG.getBasicBlock(TargetMBB))); | |
1210 return; | |
1211 } | |
1212 | |
1213 // Figure out the funclet membership for the catchret's successor. | |
1214 // This will be used by the FuncletLayout pass to determine how to order the | |
1215 // BB's. | |
1216 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); | |
1217 WinEHFuncInfo &EHInfo = | |
1218 MMI.getWinEHFuncInfo(DAG.getMachineFunction().getFunction()); | |
1219 const BasicBlock *SuccessorColor = EHInfo.CatchRetSuccessorColorMap[&I]; | |
1220 assert(SuccessorColor && "No parent funclet for catchret!"); | |
1221 MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor]; | |
1222 assert(SuccessorColorMBB && "No MBB for SuccessorColor!"); | |
1223 | |
1224 // Create the terminator node. | |
1225 SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other, | |
1226 getControlRoot(), DAG.getBasicBlock(TargetMBB), | |
1227 DAG.getBasicBlock(SuccessorColorMBB)); | |
1228 DAG.setRoot(Ret); | |
1229 } | |
1230 | |
1231 void SelectionDAGBuilder::visitCatchEndPad(const CatchEndPadInst &I) { | |
1232 llvm_unreachable("should never codegen catchendpads"); | |
1233 } | |
1234 | |
1235 void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { | |
1236 // Don't emit any special code for the cleanuppad instruction. It just marks | |
1237 // the start of a funclet. | |
1238 FuncInfo.MBB->setIsEHFuncletEntry(); | |
1239 FuncInfo.MBB->setIsCleanupFuncletEntry(); | |
1240 } | |
1241 | |
1242 /// When an invoke or a cleanupret unwinds to the next EH pad, there are | |
1243 /// many places it could ultimately go. In the IR, we have a single unwind | |
1244 /// destination, but in the machine CFG, we enumerate all the possible blocks. | |
1245 /// This function skips over imaginary basic blocks that hold catchpad, | |
1246 /// terminatepad, or catchendpad instructions, and finds all the "real" machine | |
1247 /// basic block destinations. As those destinations may not be successors of | |
1248 /// EHPadBB, here we also calculate the edge weight to those destinations. The | |
1249 /// passed-in Weight is the edge weight to EHPadBB. | |
1250 static void findUnwindDestinations( | |
1251 FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, uint32_t Weight, | |
1252 SmallVectorImpl<std::pair<MachineBasicBlock *, uint32_t>> &UnwindDests) { | |
1253 EHPersonality Personality = | |
1254 classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); | |
1255 bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; | |
1256 bool IsCoreCLR = Personality == EHPersonality::CoreCLR; | |
1257 | |
1258 while (EHPadBB) { | |
1259 const Instruction *Pad = EHPadBB->getFirstNonPHI(); | |
1260 BasicBlock *NewEHPadBB = nullptr; | |
1261 if (isa<LandingPadInst>(Pad)) { | |
1262 // Stop on landingpads. They are not funclets. | |
1263 UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Weight); | |
1264 break; | |
1265 } else if (isa<CleanupPadInst>(Pad)) { | |
1266 // Stop on cleanup pads. Cleanups are always funclet entries for all known | |
1267 // personalities. | |
1268 UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Weight); | |
1269 UnwindDests.back().first->setIsEHFuncletEntry(); | |
1270 break; | |
1271 } else if (const auto *CPI = dyn_cast<CatchPadInst>(Pad)) { | |
1272 // Add the catchpad handler to the possible destinations. | |
1273 UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Weight); | |
1274 // In MSVC C++, catchblocks are funclets and need prologues. | |
1275 if (IsMSVCCXX || IsCoreCLR) | |
1276 UnwindDests.back().first->setIsEHFuncletEntry(); | |
1277 NewEHPadBB = CPI->getUnwindDest(); | |
1278 } else if (const auto *CEPI = dyn_cast<CatchEndPadInst>(Pad)) | |
1279 NewEHPadBB = CEPI->getUnwindDest(); | |
1280 else if (const auto *CEPI = dyn_cast<CleanupEndPadInst>(Pad)) | |
1281 NewEHPadBB = CEPI->getUnwindDest(); | |
1282 else | |
1283 continue; | |
1284 | |
1285 BranchProbabilityInfo *BPI = FuncInfo.BPI; | |
1286 if (BPI && NewEHPadBB) { | |
1287 // When BPI is available, the calculated weight cannot be zero as zero | |
1288 // will be turned to a default weight in MachineBlockFrequencyInfo. | |
1289 Weight = std::max<uint32_t>( | |
1290 BPI->getEdgeProbability(EHPadBB, NewEHPadBB).scale(Weight), 1); | |
1291 } | |
1292 EHPadBB = NewEHPadBB; | |
1293 } | |
1294 } | |
1295 | |
1296 void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) { | |
1297 // Update successor info. | |
1298 SmallVector<std::pair<MachineBasicBlock *, uint32_t>, 1> UnwindDests; | |
1299 auto UnwindDest = I.getUnwindDest(); | |
1300 BranchProbabilityInfo *BPI = FuncInfo.BPI; | |
1301 uint32_t UnwindDestWeight = | |
1302 BPI ? BPI->getEdgeWeight(FuncInfo.MBB->getBasicBlock(), UnwindDest) : 0; | |
1303 findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestWeight, UnwindDests); | |
1304 for (auto &UnwindDest : UnwindDests) { | |
1305 UnwindDest.first->setIsEHPad(); | |
1306 addSuccessorWithWeight(FuncInfo.MBB, UnwindDest.first, UnwindDest.second); | |
1307 } | |
1308 | |
1309 // Create the terminator node. | |
1310 SDValue Ret = | |
1311 DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot()); | |
1312 DAG.setRoot(Ret); | |
1313 } | |
1314 | |
1315 void SelectionDAGBuilder::visitCleanupEndPad(const CleanupEndPadInst &I) { | |
1316 report_fatal_error("visitCleanupEndPad not yet implemented!"); | |
1317 } | |
1318 | |
1319 void SelectionDAGBuilder::visitTerminatePad(const TerminatePadInst &TPI) { | |
1320 report_fatal_error("visitTerminatePad not yet implemented!"); | |
1321 } | |
1322 | |
1197 void SelectionDAGBuilder::visitRet(const ReturnInst &I) { | 1323 void SelectionDAGBuilder::visitRet(const ReturnInst &I) { |
1198 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 1324 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
1325 auto &DL = DAG.getDataLayout(); | |
1199 SDValue Chain = getControlRoot(); | 1326 SDValue Chain = getControlRoot(); |
1200 SmallVector<ISD::OutputArg, 8> Outs; | 1327 SmallVector<ISD::OutputArg, 8> Outs; |
1201 SmallVector<SDValue, 8> OutVals; | 1328 SmallVector<SDValue, 8> OutVals; |
1202 | 1329 |
1203 if (!FuncInfo.CanLowerReturn) { | 1330 if (!FuncInfo.CanLowerReturn) { |
1206 | 1333 |
1207 // Emit a store of the return value through the virtual register. | 1334 // Emit a store of the return value through the virtual register. |
1208 // Leave Outs empty so that LowerReturn won't try to load return | 1335 // Leave Outs empty so that LowerReturn won't try to load return |
1209 // registers the usual way. | 1336 // registers the usual way. |
1210 SmallVector<EVT, 1> PtrValueVTs; | 1337 SmallVector<EVT, 1> PtrValueVTs; |
1211 ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), | 1338 ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()), |
1212 PtrValueVTs); | 1339 PtrValueVTs); |
1213 | 1340 |
1214 SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); | 1341 SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); |
1215 SDValue RetOp = getValue(I.getOperand(0)); | 1342 SDValue RetOp = getValue(I.getOperand(0)); |
1216 | 1343 |
1217 SmallVector<EVT, 4> ValueVTs; | 1344 SmallVector<EVT, 4> ValueVTs; |
1218 SmallVector<uint64_t, 4> Offsets; | 1345 SmallVector<uint64_t, 4> Offsets; |
1219 ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); | 1346 ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets); |
1220 unsigned NumValues = ValueVTs.size(); | 1347 unsigned NumValues = ValueVTs.size(); |
1221 | 1348 |
1222 SmallVector<SDValue, 4> Chains(NumValues); | 1349 SmallVector<SDValue, 4> Chains(NumValues); |
1223 for (unsigned i = 0; i != NumValues; ++i) { | 1350 for (unsigned i = 0; i != NumValues; ++i) { |
1224 SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), | 1351 SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), |
1225 RetPtr.getValueType(), RetPtr, | 1352 RetPtr.getValueType(), RetPtr, |
1226 DAG.getIntPtrConstant(Offsets[i])); | 1353 DAG.getIntPtrConstant(Offsets[i], |
1354 getCurSDLoc())); | |
1227 Chains[i] = | 1355 Chains[i] = |
1228 DAG.getStore(Chain, getCurSDLoc(), | 1356 DAG.getStore(Chain, getCurSDLoc(), |
1229 SDValue(RetOp.getNode(), RetOp.getResNo() + i), | 1357 SDValue(RetOp.getNode(), RetOp.getResNo() + i), |
1230 // FIXME: better loc info would be nice. | 1358 // FIXME: better loc info would be nice. |
1231 Add, MachinePointerInfo(), false, false, 0); | 1359 Add, MachinePointerInfo(), false, false, 0); |
1233 | 1361 |
1234 Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), | 1362 Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), |
1235 MVT::Other, Chains); | 1363 MVT::Other, Chains); |
1236 } else if (I.getNumOperands() != 0) { | 1364 } else if (I.getNumOperands() != 0) { |
1237 SmallVector<EVT, 4> ValueVTs; | 1365 SmallVector<EVT, 4> ValueVTs; |
1238 ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs); | 1366 ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs); |
1239 unsigned NumValues = ValueVTs.size(); | 1367 unsigned NumValues = ValueVTs.size(); |
1240 if (NumValues) { | 1368 if (NumValues) { |
1241 SDValue RetOp = getValue(I.getOperand(0)); | 1369 SDValue RetOp = getValue(I.getOperand(0)); |
1242 | 1370 |
1243 const Function *F = I.getParent()->getParent(); | 1371 const Function *F = I.getParent()->getParent(); |
1407 (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && | 1535 (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && |
1408 isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { | 1536 isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { |
1409 ISD::CondCode Condition; | 1537 ISD::CondCode Condition; |
1410 if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { | 1538 if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { |
1411 Condition = getICmpCondCode(IC->getPredicate()); | 1539 Condition = getICmpCondCode(IC->getPredicate()); |
1412 } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { | 1540 } else { |
1541 const FCmpInst *FC = cast<FCmpInst>(Cond); | |
1413 Condition = getFCmpCondCode(FC->getPredicate()); | 1542 Condition = getFCmpCondCode(FC->getPredicate()); |
1414 if (TM.Options.NoNaNsFPMath) | 1543 if (TM.Options.NoNaNsFPMath) |
1415 Condition = getFCmpCodeWithoutNaN(Condition); | 1544 Condition = getFCmpCodeWithoutNaN(Condition); |
1416 } else { | |
1417 (void)Condition; // silence warning. | |
1418 llvm_unreachable("Unknown compare instruction"); | |
1419 } | 1545 } |
1420 | 1546 |
1421 CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, | 1547 CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, |
1422 TBB, FBB, CurBB, TWeight, FWeight); | 1548 TBB, FBB, CurBB, TWeight, FWeight); |
1423 SwitchCases.push_back(CB); | 1549 SwitchCases.push_back(CB); |
1443 void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, | 1569 void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, |
1444 MachineBasicBlock *TBB, | 1570 MachineBasicBlock *TBB, |
1445 MachineBasicBlock *FBB, | 1571 MachineBasicBlock *FBB, |
1446 MachineBasicBlock *CurBB, | 1572 MachineBasicBlock *CurBB, |
1447 MachineBasicBlock *SwitchBB, | 1573 MachineBasicBlock *SwitchBB, |
1448 unsigned Opc, uint32_t TWeight, | 1574 Instruction::BinaryOps Opc, |
1575 uint32_t TWeight, | |
1449 uint32_t FWeight) { | 1576 uint32_t FWeight) { |
1450 // If this node is not part of the or/and tree, emit it as a branch. | 1577 // If this node is not part of the or/and tree, emit it as a branch. |
1451 const Instruction *BOp = dyn_cast<Instruction>(Cond); | 1578 const Instruction *BOp = dyn_cast<Instruction>(Cond); |
1452 if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || | 1579 if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || |
1453 (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || | 1580 (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || |
1476 // | 1603 // |
1477 | 1604 |
1478 // We have flexibility in setting Prob for BB1 and Prob for TmpBB. | 1605 // We have flexibility in setting Prob for BB1 and Prob for TmpBB. |
1479 // The requirement is that | 1606 // The requirement is that |
1480 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) | 1607 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) |
1481 // = TrueProb for orignal BB. | 1608 // = TrueProb for original BB. |
1482 // Assuming the orignal weights are A and B, one choice is to set BB1's | 1609 // Assuming the original weights are A and B, one choice is to set BB1's |
1483 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice | 1610 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice |
1484 // assumes that | 1611 // assumes that |
1485 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. | 1612 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. |
1486 // Another choice is to assume TrueProb for BB1 equals to TrueProb for | 1613 // Another choice is to assume TrueProb for BB1 equals to TrueProb for |
1487 // TmpBB, but the math is more complicated. | 1614 // TmpBB, but the math is more complicated. |
1512 // This requires creation of TmpBB after CurBB. | 1639 // This requires creation of TmpBB after CurBB. |
1513 | 1640 |
1514 // We have flexibility in setting Prob for BB1 and Prob for TmpBB. | 1641 // We have flexibility in setting Prob for BB1 and Prob for TmpBB. |
1515 // The requirement is that | 1642 // The requirement is that |
1516 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) | 1643 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) |
1517 // = FalseProb for orignal BB. | 1644 // = FalseProb for original BB. |
1518 // Assuming the orignal weights are A and B, one choice is to set BB1's | 1645 // Assuming the original weights are A and B, one choice is to set BB1's |
1519 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice | 1646 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice |
1520 // assumes that | 1647 // assumes that |
1521 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. | 1648 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. |
1522 | 1649 |
1523 uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight; | 1650 uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight; |
1571 MachineBasicBlock *BrMBB = FuncInfo.MBB; | 1698 MachineBasicBlock *BrMBB = FuncInfo.MBB; |
1572 | 1699 |
1573 // Update machine-CFG edges. | 1700 // Update machine-CFG edges. |
1574 MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; | 1701 MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; |
1575 | 1702 |
1576 // Figure out which block is immediately after the current one. | |
1577 MachineBasicBlock *NextBlock = nullptr; | |
1578 MachineFunction::iterator BBI = BrMBB; | |
1579 if (++BBI != FuncInfo.MF->end()) | |
1580 NextBlock = BBI; | |
1581 | |
1582 if (I.isUnconditional()) { | 1703 if (I.isUnconditional()) { |
1583 // Update machine-CFG edges. | 1704 // Update machine-CFG edges. |
1584 BrMBB->addSuccessor(Succ0MBB); | 1705 BrMBB->addSuccessor(Succ0MBB); |
1585 | 1706 |
1586 // If this is not a fall-through branch or optimizations are switched off, | 1707 // If this is not a fall-through branch or optimizations are switched off, |
1587 // emit the branch. | 1708 // emit the branch. |
1588 if (Succ0MBB != NextBlock || TM.getOptLevel() == CodeGenOpt::None) | 1709 if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None) |
1589 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), | 1710 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), |
1590 MVT::Other, getControlRoot(), | 1711 MVT::Other, getControlRoot(), |
1591 DAG.getBasicBlock(Succ0MBB))); | 1712 DAG.getBasicBlock(Succ0MBB))); |
1592 | 1713 |
1593 return; | 1714 return; |
1613 // je foo | 1734 // je foo |
1614 // cmp D, E | 1735 // cmp D, E |
1615 // jle foo | 1736 // jle foo |
1616 // | 1737 // |
1617 if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { | 1738 if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { |
1618 if (!DAG.getTargetLoweringInfo().isJumpExpensive() && | 1739 Instruction::BinaryOps Opcode = BOp->getOpcode(); |
1619 BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || | 1740 if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && |
1620 BOp->getOpcode() == Instruction::Or)) { | 1741 !I.getMetadata(LLVMContext::MD_unpredictable) && |
1742 (Opcode == Instruction::And || Opcode == Instruction::Or)) { | |
1621 FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, | 1743 FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, |
1622 BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB), | 1744 Opcode, getEdgeWeight(BrMBB, Succ0MBB), |
1623 getEdgeWeight(BrMBB, Succ1MBB)); | 1745 getEdgeWeight(BrMBB, Succ1MBB)); |
1624 // If the compares in later blocks need to use values not currently | 1746 // If the compares in later blocks need to use values not currently |
1625 // exported from this block, export them now. This block should always | 1747 // exported from this block, export them now. This block should always |
1626 // be the first entry. | 1748 // be the first entry. |
1627 assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); | 1749 assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); |
1672 if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && | 1794 if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && |
1673 CB.CC == ISD::SETEQ) | 1795 CB.CC == ISD::SETEQ) |
1674 Cond = CondLHS; | 1796 Cond = CondLHS; |
1675 else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && | 1797 else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && |
1676 CB.CC == ISD::SETEQ) { | 1798 CB.CC == ISD::SETEQ) { |
1677 SDValue True = DAG.getConstant(1, CondLHS.getValueType()); | 1799 SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType()); |
1678 Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); | 1800 Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); |
1679 } else | 1801 } else |
1680 Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); | 1802 Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); |
1681 } else { | 1803 } else { |
1682 assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); | 1804 assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); |
1683 | 1805 |
1684 const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); | 1806 const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); |
1685 const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); | 1807 const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); |
1686 | 1808 |
1687 SDValue CmpOp = getValue(CB.CmpMHS); | 1809 SDValue CmpOp = getValue(CB.CmpMHS); |
1688 EVT VT = CmpOp.getValueType(); | 1810 EVT VT = CmpOp.getValueType(); |
1689 | 1811 |
1690 if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { | 1812 if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { |
1691 Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), | 1813 Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT), |
1692 ISD::SETLE); | 1814 ISD::SETLE); |
1693 } else { | 1815 } else { |
1694 SDValue SUB = DAG.getNode(ISD::SUB, dl, | 1816 SDValue SUB = DAG.getNode(ISD::SUB, dl, |
1695 VT, CmpOp, DAG.getConstant(Low, VT)); | 1817 VT, CmpOp, DAG.getConstant(Low, dl, VT)); |
1696 Cond = DAG.getSetCC(dl, MVT::i1, SUB, | 1818 Cond = DAG.getSetCC(dl, MVT::i1, SUB, |
1697 DAG.getConstant(High-Low, VT), ISD::SETULE); | 1819 DAG.getConstant(High-Low, dl, VT), ISD::SETULE); |
1698 } | 1820 } |
1699 } | 1821 } |
1700 | 1822 |
1701 // Update successor info | 1823 // Update successor info |
1702 addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight); | 1824 addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight); |
1703 // TrueBB and FalseBB are always different unless the incoming IR is | 1825 // TrueBB and FalseBB are always different unless the incoming IR is |
1704 // degenerate. This only happens when running llc on weird IR. | 1826 // degenerate. This only happens when running llc on weird IR. |
1705 if (CB.TrueBB != CB.FalseBB) | 1827 if (CB.TrueBB != CB.FalseBB) |
1706 addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); | 1828 addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); |
1707 | 1829 |
1708 // Set NextBlock to be the MBB immediately after the current one, if any. | |
1709 // This is used to avoid emitting unnecessary branches to the next block. | |
1710 MachineBasicBlock *NextBlock = nullptr; | |
1711 MachineFunction::iterator BBI = SwitchBB; | |
1712 if (++BBI != FuncInfo.MF->end()) | |
1713 NextBlock = BBI; | |
1714 | |
1715 // If the lhs block is the next block, invert the condition so that we can | 1830 // If the lhs block is the next block, invert the condition so that we can |
1716 // fall through to the lhs instead of the rhs block. | 1831 // fall through to the lhs instead of the rhs block. |
1717 if (CB.TrueBB == NextBlock) { | 1832 if (CB.TrueBB == NextBlock(SwitchBB)) { |
1718 std::swap(CB.TrueBB, CB.FalseBB); | 1833 std::swap(CB.TrueBB, CB.FalseBB); |
1719 SDValue True = DAG.getConstant(1, Cond.getValueType()); | 1834 SDValue True = DAG.getConstant(1, dl, Cond.getValueType()); |
1720 Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); | 1835 Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); |
1721 } | 1836 } |
1722 | 1837 |
1723 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, | 1838 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, |
1724 MVT::Other, getControlRoot(), Cond, | 1839 MVT::Other, getControlRoot(), Cond, |
1735 | 1850 |
1736 /// visitJumpTable - Emit JumpTable node in the current MBB | 1851 /// visitJumpTable - Emit JumpTable node in the current MBB |
1737 void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { | 1852 void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { |
1738 // Emit the code for the jump table | 1853 // Emit the code for the jump table |
1739 assert(JT.Reg != -1U && "Should lower JT Header first!"); | 1854 assert(JT.Reg != -1U && "Should lower JT Header first!"); |
1740 EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(); | 1855 EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); |
1741 SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), | 1856 SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), |
1742 JT.Reg, PTy); | 1857 JT.Reg, PTy); |
1743 SDValue Table = DAG.getJumpTable(JT.JTI, PTy); | 1858 SDValue Table = DAG.getJumpTable(JT.JTI, PTy); |
1744 SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(), | 1859 SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(), |
1745 MVT::Other, Index.getValue(1), | 1860 MVT::Other, Index.getValue(1), |
1750 /// visitJumpTableHeader - This function emits necessary code to produce index | 1865 /// visitJumpTableHeader - This function emits necessary code to produce index |
1751 /// in the JumpTable from switch case. | 1866 /// in the JumpTable from switch case. |
1752 void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, | 1867 void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, |
1753 JumpTableHeader &JTH, | 1868 JumpTableHeader &JTH, |
1754 MachineBasicBlock *SwitchBB) { | 1869 MachineBasicBlock *SwitchBB) { |
1870 SDLoc dl = getCurSDLoc(); | |
1871 | |
1755 // Subtract the lowest switch case value from the value being switched on and | 1872 // Subtract the lowest switch case value from the value being switched on and |
1756 // conditional branch to default mbb if the result is greater than the | 1873 // conditional branch to default mbb if the result is greater than the |
1757 // difference between smallest and largest cases. | 1874 // difference between smallest and largest cases. |
1758 SDValue SwitchOp = getValue(JTH.SValue); | 1875 SDValue SwitchOp = getValue(JTH.SValue); |
1759 EVT VT = SwitchOp.getValueType(); | 1876 EVT VT = SwitchOp.getValueType(); |
1760 SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, | 1877 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, |
1761 DAG.getConstant(JTH.First, VT)); | 1878 DAG.getConstant(JTH.First, dl, VT)); |
1762 | 1879 |
1763 // The SDNode we just created, which holds the value being switched on minus | 1880 // The SDNode we just created, which holds the value being switched on minus |
1764 // the smallest case value, needs to be copied to a virtual register so it | 1881 // the smallest case value, needs to be copied to a virtual register so it |
1765 // can be used as an index into the jump table in a subsequent basic block. | 1882 // can be used as an index into the jump table in a subsequent basic block. |
1766 // This value may be smaller or larger than the target's pointer type, and | 1883 // This value may be smaller or larger than the target's pointer type, and |
1767 // therefore require extension or truncating. | 1884 // therefore require extension or truncating. |
1768 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 1885 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
1769 SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI.getPointerTy()); | 1886 SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout())); |
1770 | 1887 |
1771 unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); | 1888 unsigned JumpTableReg = |
1772 SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), | 1889 FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout())); |
1890 SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, | |
1773 JumpTableReg, SwitchOp); | 1891 JumpTableReg, SwitchOp); |
1774 JT.Reg = JumpTableReg; | 1892 JT.Reg = JumpTableReg; |
1775 | 1893 |
1776 // Emit the range check for the jump table, and branch to the default block | 1894 // Emit the range check for the jump table, and branch to the default block |
1777 // for the switch statement if the value being switched on exceeds the largest | 1895 // for the switch statement if the value being switched on exceeds the largest |
1778 // case in the switch. | 1896 // case in the switch. |
1779 SDValue CMP = | 1897 SDValue CMP = DAG.getSetCC( |
1780 DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), | 1898 dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), |
1781 Sub.getValueType()), | 1899 Sub.getValueType()), |
1782 Sub, DAG.getConstant(JTH.Last - JTH.First, VT), ISD::SETUGT); | 1900 Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT); |
1783 | 1901 |
1784 // Set NextBlock to be the MBB immediately after the current one, if any. | 1902 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, |
1785 // This is used to avoid emitting unnecessary branches to the next block. | |
1786 MachineBasicBlock *NextBlock = nullptr; | |
1787 MachineFunction::iterator BBI = SwitchBB; | |
1788 | |
1789 if (++BBI != FuncInfo.MF->end()) | |
1790 NextBlock = BBI; | |
1791 | |
1792 SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), | |
1793 MVT::Other, CopyTo, CMP, | 1903 MVT::Other, CopyTo, CMP, |
1794 DAG.getBasicBlock(JT.Default)); | 1904 DAG.getBasicBlock(JT.Default)); |
1795 | 1905 |
1796 if (JT.MBB != NextBlock) | 1906 // Avoid emitting unnecessary branches to the next block. |
1797 BrCond = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrCond, | 1907 if (JT.MBB != NextBlock(SwitchBB)) |
1908 BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, | |
1798 DAG.getBasicBlock(JT.MBB)); | 1909 DAG.getBasicBlock(JT.MBB)); |
1799 | 1910 |
1800 DAG.setRoot(BrCond); | 1911 DAG.setRoot(BrCond); |
1801 } | 1912 } |
1802 | 1913 |
1809 void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, | 1920 void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, |
1810 MachineBasicBlock *ParentBB) { | 1921 MachineBasicBlock *ParentBB) { |
1811 | 1922 |
1812 // First create the loads to the guard/stack slot for the comparison. | 1923 // First create the loads to the guard/stack slot for the comparison. |
1813 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 1924 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
1814 EVT PtrTy = TLI.getPointerTy(); | 1925 EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); |
1815 | 1926 |
1816 MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); | 1927 MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); |
1817 int FI = MFI->getStackProtectorIndex(); | 1928 int FI = MFI->getStackProtectorIndex(); |
1818 | 1929 |
1819 const Value *IRGuard = SPD.getGuard(); | 1930 const Value *IRGuard = SPD.getGuard(); |
1820 SDValue GuardPtr = getValue(IRGuard); | 1931 SDValue GuardPtr = getValue(IRGuard); |
1821 SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); | 1932 SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); |
1822 | 1933 |
1823 unsigned Align = | 1934 unsigned Align = DL->getPrefTypeAlignment(IRGuard->getType()); |
1824 TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); | |
1825 | 1935 |
1826 SDValue Guard; | 1936 SDValue Guard; |
1937 SDLoc dl = getCurSDLoc(); | |
1827 | 1938 |
1828 // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the | 1939 // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the |
1829 // guard value from the virtual register holding the value. Otherwise, emit a | 1940 // guard value from the virtual register holding the value. Otherwise, emit a |
1830 // volatile load to retrieve the stack guard value. | 1941 // volatile load to retrieve the stack guard value. |
1831 unsigned GuardReg = SPD.getGuardReg(); | 1942 unsigned GuardReg = SPD.getGuardReg(); |
1832 | 1943 |
1833 if (GuardReg && TLI.useLoadStackGuardNode()) | 1944 if (GuardReg && TLI.useLoadStackGuardNode()) |
1834 Guard = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), GuardReg, | 1945 Guard = DAG.getCopyFromReg(DAG.getEntryNode(), dl, GuardReg, |
1835 PtrTy); | 1946 PtrTy); |
1836 else | 1947 else |
1837 Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), | 1948 Guard = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(), |
1838 GuardPtr, MachinePointerInfo(IRGuard, 0), | 1949 GuardPtr, MachinePointerInfo(IRGuard, 0), |
1839 true, false, false, Align); | 1950 true, false, false, Align); |
1840 | 1951 |
1841 SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), | 1952 SDValue StackSlot = DAG.getLoad( |
1842 StackSlotPtr, | 1953 PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, |
1843 MachinePointerInfo::getFixedStack(FI), | 1954 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true, |
1844 true, false, false, Align); | 1955 false, false, Align); |
1845 | 1956 |
1846 // Perform the comparison via a subtract/getsetcc. | 1957 // Perform the comparison via a subtract/getsetcc. |
1847 EVT VT = Guard.getValueType(); | 1958 EVT VT = Guard.getValueType(); |
1848 SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot); | 1959 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot); |
1849 | 1960 |
1850 SDValue Cmp = | 1961 SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), |
1851 DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), | 1962 *DAG.getContext(), |
1852 Sub.getValueType()), | 1963 Sub.getValueType()), |
1853 Sub, DAG.getConstant(0, VT), ISD::SETNE); | 1964 Sub, DAG.getConstant(0, dl, VT), ISD::SETNE); |
1854 | 1965 |
1855 // If the sub is not 0, then we know the guard/stackslot do not equal, so | 1966 // If the sub is not 0, then we know the guard/stackslot do not equal, so |
1856 // branch to failure MBB. | 1967 // branch to failure MBB. |
1857 SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), | 1968 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, |
1858 MVT::Other, StackSlot.getOperand(0), | 1969 MVT::Other, StackSlot.getOperand(0), |
1859 Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); | 1970 Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); |
1860 // Otherwise branch to success MBB. | 1971 // Otherwise branch to success MBB. |
1861 SDValue Br = DAG.getNode(ISD::BR, getCurSDLoc(), | 1972 SDValue Br = DAG.getNode(ISD::BR, dl, |
1862 MVT::Other, BrCond, | 1973 MVT::Other, BrCond, |
1863 DAG.getBasicBlock(SPD.getSuccessMBB())); | 1974 DAG.getBasicBlock(SPD.getSuccessMBB())); |
1864 | 1975 |
1865 DAG.setRoot(Br); | 1976 DAG.setRoot(Br); |
1866 } | 1977 } |
1884 | 1995 |
1885 /// visitBitTestHeader - This function emits necessary code to produce value | 1996 /// visitBitTestHeader - This function emits necessary code to produce value |
1886 /// suitable for "bit tests" | 1997 /// suitable for "bit tests" |
1887 void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, | 1998 void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, |
1888 MachineBasicBlock *SwitchBB) { | 1999 MachineBasicBlock *SwitchBB) { |
2000 SDLoc dl = getCurSDLoc(); | |
2001 | |
1889 // Subtract the minimum value | 2002 // Subtract the minimum value |
1890 SDValue SwitchOp = getValue(B.SValue); | 2003 SDValue SwitchOp = getValue(B.SValue); |
1891 EVT VT = SwitchOp.getValueType(); | 2004 EVT VT = SwitchOp.getValueType(); |
1892 SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, | 2005 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, |
1893 DAG.getConstant(B.First, VT)); | 2006 DAG.getConstant(B.First, dl, VT)); |
1894 | 2007 |
1895 // Check range | 2008 // Check range |
1896 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 2009 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
1897 SDValue RangeCmp = | 2010 SDValue RangeCmp = DAG.getSetCC( |
1898 DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), | 2011 dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), |
1899 Sub.getValueType()), | 2012 Sub.getValueType()), |
1900 Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT); | 2013 Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT); |
1901 | 2014 |
1902 // Determine the type of the test operands. | 2015 // Determine the type of the test operands. |
1903 bool UsePtrType = false; | 2016 bool UsePtrType = false; |
1904 if (!TLI.isTypeLegal(VT)) | 2017 if (!TLI.isTypeLegal(VT)) |
1905 UsePtrType = true; | 2018 UsePtrType = true; |
1911 UsePtrType = true; | 2024 UsePtrType = true; |
1912 break; | 2025 break; |
1913 } | 2026 } |
1914 } | 2027 } |
1915 if (UsePtrType) { | 2028 if (UsePtrType) { |
1916 VT = TLI.getPointerTy(); | 2029 VT = TLI.getPointerTy(DAG.getDataLayout()); |
1917 Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT); | 2030 Sub = DAG.getZExtOrTrunc(Sub, dl, VT); |
1918 } | 2031 } |
1919 | 2032 |
1920 B.RegVT = VT.getSimpleVT(); | 2033 B.RegVT = VT.getSimpleVT(); |
1921 B.Reg = FuncInfo.CreateReg(B.RegVT); | 2034 B.Reg = FuncInfo.CreateReg(B.RegVT); |
1922 SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), | 2035 SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub); |
1923 B.Reg, Sub); | |
1924 | |
1925 // Set NextBlock to be the MBB immediately after the current one, if any. | |
1926 // This is used to avoid emitting unnecessary branches to the next block. | |
1927 MachineBasicBlock *NextBlock = nullptr; | |
1928 MachineFunction::iterator BBI = SwitchBB; | |
1929 if (++BBI != FuncInfo.MF->end()) | |
1930 NextBlock = BBI; | |
1931 | 2036 |
1932 MachineBasicBlock* MBB = B.Cases[0].ThisBB; | 2037 MachineBasicBlock* MBB = B.Cases[0].ThisBB; |
1933 | 2038 |
1934 addSuccessorWithWeight(SwitchBB, B.Default); | 2039 addSuccessorWithWeight(SwitchBB, B.Default, B.DefaultWeight); |
1935 addSuccessorWithWeight(SwitchBB, MBB); | 2040 addSuccessorWithWeight(SwitchBB, MBB, B.Weight); |
1936 | 2041 |
1937 SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurSDLoc(), | 2042 SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, |
1938 MVT::Other, CopyTo, RangeCmp, | 2043 MVT::Other, CopyTo, RangeCmp, |
1939 DAG.getBasicBlock(B.Default)); | 2044 DAG.getBasicBlock(B.Default)); |
1940 | 2045 |
1941 if (MBB != NextBlock) | 2046 // Avoid emitting unnecessary branches to the next block. |
1942 BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo, | 2047 if (MBB != NextBlock(SwitchBB)) |
2048 BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange, | |
1943 DAG.getBasicBlock(MBB)); | 2049 DAG.getBasicBlock(MBB)); |
1944 | 2050 |
1945 DAG.setRoot(BrRange); | 2051 DAG.setRoot(BrRange); |
1946 } | 2052 } |
1947 | 2053 |
1950 MachineBasicBlock* NextMBB, | 2056 MachineBasicBlock* NextMBB, |
1951 uint32_t BranchWeightToNext, | 2057 uint32_t BranchWeightToNext, |
1952 unsigned Reg, | 2058 unsigned Reg, |
1953 BitTestCase &B, | 2059 BitTestCase &B, |
1954 MachineBasicBlock *SwitchBB) { | 2060 MachineBasicBlock *SwitchBB) { |
2061 SDLoc dl = getCurSDLoc(); | |
1955 MVT VT = BB.RegVT; | 2062 MVT VT = BB.RegVT; |
1956 SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), | 2063 SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT); |
1957 Reg, VT); | |
1958 SDValue Cmp; | 2064 SDValue Cmp; |
1959 unsigned PopCount = countPopulation(B.Mask); | 2065 unsigned PopCount = countPopulation(B.Mask); |
1960 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 2066 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
1961 if (PopCount == 1) { | 2067 if (PopCount == 1) { |
1962 // Testing for a single bit; just compare the shift count with what it | 2068 // Testing for a single bit; just compare the shift count with what it |
1963 // would need to be to shift a 1 bit in that position. | 2069 // would need to be to shift a 1 bit in that position. |
1964 Cmp = DAG.getSetCC( | 2070 Cmp = DAG.getSetCC( |
1965 getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, | 2071 dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), |
1966 DAG.getConstant(countTrailingZeros(B.Mask), VT), ISD::SETEQ); | 2072 ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT), |
2073 ISD::SETEQ); | |
1967 } else if (PopCount == BB.Range) { | 2074 } else if (PopCount == BB.Range) { |
1968 // There is only one zero bit in the range, test for it directly. | 2075 // There is only one zero bit in the range, test for it directly. |
1969 Cmp = DAG.getSetCC( | 2076 Cmp = DAG.getSetCC( |
1970 getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, | 2077 dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), |
1971 DAG.getConstant(countTrailingOnes(B.Mask), VT), ISD::SETNE); | 2078 ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT), |
2079 ISD::SETNE); | |
1972 } else { | 2080 } else { |
1973 // Make desired shift | 2081 // Make desired shift |
1974 SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT, | 2082 SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT, |
1975 DAG.getConstant(1, VT), ShiftOp); | 2083 DAG.getConstant(1, dl, VT), ShiftOp); |
1976 | 2084 |
1977 // Emit bit tests and jumps | 2085 // Emit bit tests and jumps |
1978 SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(), | 2086 SDValue AndOp = DAG.getNode(ISD::AND, dl, |
1979 VT, SwitchVal, DAG.getConstant(B.Mask, VT)); | 2087 VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT)); |
1980 Cmp = DAG.getSetCC(getCurSDLoc(), | 2088 Cmp = DAG.getSetCC( |
1981 TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp, | 2089 dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), |
1982 DAG.getConstant(0, VT), ISD::SETNE); | 2090 AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE); |
1983 } | 2091 } |
1984 | 2092 |
1985 // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. | 2093 // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. |
1986 addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight); | 2094 addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight); |
1987 // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. | 2095 // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. |
1988 addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); | 2096 addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); |
1989 | 2097 |
1990 SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurSDLoc(), | 2098 SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl, |
1991 MVT::Other, getControlRoot(), | 2099 MVT::Other, getControlRoot(), |
1992 Cmp, DAG.getBasicBlock(B.TargetBB)); | 2100 Cmp, DAG.getBasicBlock(B.TargetBB)); |
1993 | 2101 |
1994 // Set NextBlock to be the MBB immediately after the current one, if any. | 2102 // Avoid emitting unnecessary branches to the next block. |
1995 // This is used to avoid emitting unnecessary branches to the next block. | 2103 if (NextMBB != NextBlock(SwitchBB)) |
1996 MachineBasicBlock *NextBlock = nullptr; | 2104 BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd, |
1997 MachineFunction::iterator BBI = SwitchBB; | |
1998 if (++BBI != FuncInfo.MF->end()) | |
1999 NextBlock = BBI; | |
2000 | |
2001 if (NextMBB != NextBlock) | |
2002 BrAnd = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrAnd, | |
2003 DAG.getBasicBlock(NextMBB)); | 2105 DAG.getBasicBlock(NextMBB)); |
2004 | 2106 |
2005 DAG.setRoot(BrAnd); | 2107 DAG.setRoot(BrAnd); |
2006 } | 2108 } |
2007 | 2109 |
2008 void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { | 2110 void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { |
2009 MachineBasicBlock *InvokeMBB = FuncInfo.MBB; | 2111 MachineBasicBlock *InvokeMBB = FuncInfo.MBB; |
2010 | 2112 |
2011 // Retrieve successors. | 2113 // Retrieve successors. Look through artificial IR level blocks like catchpads |
2114 // and catchendpads for successors. | |
2012 MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; | 2115 MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; |
2013 MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; | 2116 const BasicBlock *EHPadBB = I.getSuccessor(1); |
2014 | 2117 |
2015 const Value *Callee(I.getCalledValue()); | 2118 const Value *Callee(I.getCalledValue()); |
2016 const Function *Fn = dyn_cast<Function>(Callee); | 2119 const Function *Fn = dyn_cast<Function>(Callee); |
2017 if (isa<InlineAsm>(Callee)) | 2120 if (isa<InlineAsm>(Callee)) |
2018 visitInlineAsm(&I); | 2121 visitInlineAsm(&I); |
2023 case Intrinsic::donothing: | 2126 case Intrinsic::donothing: |
2024 // Ignore invokes to @llvm.donothing: jump directly to the next BB. | 2127 // Ignore invokes to @llvm.donothing: jump directly to the next BB. |
2025 break; | 2128 break; |
2026 case Intrinsic::experimental_patchpoint_void: | 2129 case Intrinsic::experimental_patchpoint_void: |
2027 case Intrinsic::experimental_patchpoint_i64: | 2130 case Intrinsic::experimental_patchpoint_i64: |
2028 visitPatchpoint(&I, LandingPad); | 2131 visitPatchpoint(&I, EHPadBB); |
2029 break; | 2132 break; |
2133 case Intrinsic::experimental_gc_statepoint: | |
2134 LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); | |
2135 break; | |
2030 } | 2136 } |
2031 } else | 2137 } else |
2032 LowerCallTo(&I, getValue(Callee), false, LandingPad); | 2138 LowerCallTo(&I, getValue(Callee), false, EHPadBB); |
2033 | 2139 |
2034 // If the value of the invoke is used outside of its defining block, make it | 2140 // If the value of the invoke is used outside of its defining block, make it |
2035 // available as a virtual register. | 2141 // available as a virtual register. |
2036 CopyToExportRegsIfNeeded(&I); | 2142 // We already took care of the exported value for the statepoint instruction |
2037 | 2143 // during call to the LowerStatepoint. |
2038 // Update successor info | 2144 if (!isStatepoint(I)) { |
2145 CopyToExportRegsIfNeeded(&I); | |
2146 } | |
2147 | |
2148 SmallVector<std::pair<MachineBasicBlock *, uint32_t>, 1> UnwindDests; | |
2149 BranchProbabilityInfo *BPI = FuncInfo.BPI; | |
2150 uint32_t EHPadBBWeight = | |
2151 BPI ? BPI->getEdgeWeight(InvokeMBB->getBasicBlock(), EHPadBB) : 0; | |
2152 findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBWeight, UnwindDests); | |
2153 | |
2154 // Update successor info. | |
2039 addSuccessorWithWeight(InvokeMBB, Return); | 2155 addSuccessorWithWeight(InvokeMBB, Return); |
2040 addSuccessorWithWeight(InvokeMBB, LandingPad); | 2156 for (auto &UnwindDest : UnwindDests) { |
2157 UnwindDest.first->setIsEHPad(); | |
2158 addSuccessorWithWeight(InvokeMBB, UnwindDest.first, UnwindDest.second); | |
2159 } | |
2041 | 2160 |
2042 // Drop into normal successor. | 2161 // Drop into normal successor. |
2043 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), | 2162 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), |
2044 MVT::Other, getControlRoot(), | 2163 MVT::Other, getControlRoot(), |
2045 DAG.getBasicBlock(Return))); | 2164 DAG.getBasicBlock(Return))); |
2048 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { | 2167 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { |
2049 llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); | 2168 llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); |
2050 } | 2169 } |
2051 | 2170 |
2052 void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { | 2171 void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { |
2053 assert(FuncInfo.MBB->isLandingPad() && | 2172 assert(FuncInfo.MBB->isEHPad() && |
2054 "Call to landingpad not in landing pad!"); | 2173 "Call to landingpad not in landing pad!"); |
2055 | 2174 |
2056 MachineBasicBlock *MBB = FuncInfo.MBB; | 2175 MachineBasicBlock *MBB = FuncInfo.MBB; |
2057 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); | 2176 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); |
2058 AddLandingPadInfo(LP, MMI, MBB); | 2177 AddLandingPadInfo(LP, MMI, MBB); |
2063 if (TLI.getExceptionPointerRegister() == 0 && | 2182 if (TLI.getExceptionPointerRegister() == 0 && |
2064 TLI.getExceptionSelectorRegister() == 0) | 2183 TLI.getExceptionSelectorRegister() == 0) |
2065 return; | 2184 return; |
2066 | 2185 |
2067 SmallVector<EVT, 2> ValueVTs; | 2186 SmallVector<EVT, 2> ValueVTs; |
2068 ComputeValueVTs(TLI, LP.getType(), ValueVTs); | 2187 SDLoc dl = getCurSDLoc(); |
2188 ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs); | |
2069 assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); | 2189 assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); |
2070 | 2190 |
2071 // Get the two live-in registers as SDValues. The physregs have already been | 2191 // Get the two live-in registers as SDValues. The physregs have already been |
2072 // copied into virtual registers. | 2192 // copied into virtual registers. |
2073 SDValue Ops[2]; | 2193 SDValue Ops[2]; |
2074 if (FuncInfo.ExceptionPointerVirtReg) { | 2194 if (FuncInfo.ExceptionPointerVirtReg) { |
2075 Ops[0] = DAG.getZExtOrTrunc( | 2195 Ops[0] = DAG.getZExtOrTrunc( |
2076 DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), | 2196 DAG.getCopyFromReg(DAG.getEntryNode(), dl, |
2077 FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), | 2197 FuncInfo.ExceptionPointerVirtReg, |
2078 getCurSDLoc(), ValueVTs[0]); | 2198 TLI.getPointerTy(DAG.getDataLayout())), |
2199 dl, ValueVTs[0]); | |
2079 } else { | 2200 } else { |
2080 Ops[0] = DAG.getConstant(0, TLI.getPointerTy()); | 2201 Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout())); |
2081 } | 2202 } |
2082 Ops[1] = DAG.getZExtOrTrunc( | 2203 Ops[1] = DAG.getZExtOrTrunc( |
2083 DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), | 2204 DAG.getCopyFromReg(DAG.getEntryNode(), dl, |
2084 FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()), | 2205 FuncInfo.ExceptionSelectorVirtReg, |
2085 getCurSDLoc(), ValueVTs[1]); | 2206 TLI.getPointerTy(DAG.getDataLayout())), |
2207 dl, ValueVTs[1]); | |
2086 | 2208 |
2087 // Merge into one. | 2209 // Merge into one. |
2088 SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), | 2210 SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, |
2089 DAG.getVTList(ValueVTs), Ops); | 2211 DAG.getVTList(ValueVTs), Ops); |
2090 setValue(&LP, Res); | 2212 setValue(&LP, Res); |
2091 } | 2213 } |
2092 | 2214 |
2093 unsigned | 2215 void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { |
2094 SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV, | 2216 #ifndef NDEBUG |
2095 MachineBasicBlock *LPadBB) { | 2217 for (const CaseCluster &CC : Clusters) |
2096 SDValue Chain = getControlRoot(); | 2218 assert(CC.Low == CC.High && "Input clusters must be single-case"); |
2097 | 2219 #endif |
2098 // Get the typeid that we will dispatch on later. | 2220 |
2099 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 2221 std::sort(Clusters.begin(), Clusters.end(), |
2100 const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy()); | 2222 [](const CaseCluster &a, const CaseCluster &b) { |
2101 unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); | 2223 return a.Low->getValue().slt(b.Low->getValue()); |
2102 unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV); | 2224 }); |
2103 SDValue Sel = DAG.getConstant(TypeID, TLI.getPointerTy()); | 2225 |
2104 Chain = DAG.getCopyToReg(Chain, getCurSDLoc(), VReg, Sel); | 2226 // Merge adjacent clusters with the same destination. |
2105 | 2227 const unsigned N = Clusters.size(); |
2106 // Branch to the main landing pad block. | 2228 unsigned DstIndex = 0; |
2107 MachineBasicBlock *ClauseMBB = FuncInfo.MBB; | 2229 for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) { |
2108 ClauseMBB->addSuccessor(LPadBB); | 2230 CaseCluster &CC = Clusters[SrcIndex]; |
2109 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, Chain, | 2231 const ConstantInt *CaseVal = CC.Low; |
2110 DAG.getBasicBlock(LPadBB))); | 2232 MachineBasicBlock *Succ = CC.MBB; |
2111 return VReg; | 2233 |
2112 } | 2234 if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ && |
2113 | 2235 (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) { |
2114 /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for | 2236 // If this case has the same successor and is a neighbour, merge it into |
2115 /// small case ranges). | 2237 // the previous cluster. |
2116 bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, | 2238 Clusters[DstIndex - 1].High = CaseVal; |
2117 CaseRecVector& WorkList, | 2239 Clusters[DstIndex - 1].Weight += CC.Weight; |
2118 const Value* SV, | 2240 assert(Clusters[DstIndex - 1].Weight >= CC.Weight && "Weight overflow!"); |
2119 MachineBasicBlock *Default, | |
2120 MachineBasicBlock *SwitchBB) { | |
2121 // Size is the number of Cases represented by this range. | |
2122 size_t Size = CR.Range.second - CR.Range.first; | |
2123 if (Size > 3) | |
2124 return false; | |
2125 | |
2126 // Get the MachineFunction which holds the current MBB. This is used when | |
2127 // inserting any additional MBBs necessary to represent the switch. | |
2128 MachineFunction *CurMF = FuncInfo.MF; | |
2129 | |
2130 // Figure out which block is immediately after the current one. | |
2131 MachineBasicBlock *NextBlock = nullptr; | |
2132 MachineFunction::iterator BBI = CR.CaseBB; | |
2133 | |
2134 if (++BBI != FuncInfo.MF->end()) | |
2135 NextBlock = BBI; | |
2136 | |
2137 BranchProbabilityInfo *BPI = FuncInfo.BPI; | |
2138 // If any two of the cases has the same destination, and if one value | |
2139 // is the same as the other, but has one bit unset that the other has set, | |
2140 // use bit manipulation to do two compares at once. For example: | |
2141 // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" | |
2142 // TODO: This could be extended to merge any 2 cases in switches with 3 cases. | |
2143 // TODO: Handle cases where CR.CaseBB != SwitchBB. | |
2144 if (Size == 2 && CR.CaseBB == SwitchBB) { | |
2145 Case &Small = *CR.Range.first; | |
2146 Case &Big = *(CR.Range.second-1); | |
2147 | |
2148 if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) { | |
2149 const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue(); | |
2150 const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue(); | |
2151 | |
2152 // Check that there is only one bit different. | |
2153 if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && | |
2154 (SmallValue | BigValue) == BigValue) { | |
2155 // Isolate the common bit. | |
2156 APInt CommonBit = BigValue & ~SmallValue; | |
2157 assert((SmallValue | CommonBit) == BigValue && | |
2158 CommonBit.countPopulation() == 1 && "Not a common bit?"); | |
2159 | |
2160 SDValue CondLHS = getValue(SV); | |
2161 EVT VT = CondLHS.getValueType(); | |
2162 SDLoc DL = getCurSDLoc(); | |
2163 | |
2164 SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, | |
2165 DAG.getConstant(CommonBit, VT)); | |
2166 SDValue Cond = DAG.getSetCC(DL, MVT::i1, | |
2167 Or, DAG.getConstant(BigValue, VT), | |
2168 ISD::SETEQ); | |
2169 | |
2170 // Update successor info. | |
2171 // Both Small and Big will jump to Small.BB, so we sum up the weights. | |
2172 addSuccessorWithWeight(SwitchBB, Small.BB, | |
2173 Small.ExtraWeight + Big.ExtraWeight); | |
2174 addSuccessorWithWeight(SwitchBB, Default, | |
2175 // The default destination is the first successor in IR. | |
2176 BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0); | |
2177 | |
2178 // Insert the true branch. | |
2179 SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, | |
2180 getControlRoot(), Cond, | |
2181 DAG.getBasicBlock(Small.BB)); | |
2182 | |
2183 // Insert the false branch. | |
2184 BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, | |
2185 DAG.getBasicBlock(Default)); | |
2186 | |
2187 DAG.setRoot(BrCond); | |
2188 return true; | |
2189 } | |
2190 } | |
2191 } | |
2192 | |
2193 // Order cases by weight so the most likely case will be checked first. | |
2194 uint32_t UnhandledWeights = 0; | |
2195 if (BPI) { | |
2196 for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) { | |
2197 uint32_t IWeight = I->ExtraWeight; | |
2198 UnhandledWeights += IWeight; | |
2199 for (CaseItr J = CR.Range.first; J < I; ++J) { | |
2200 uint32_t JWeight = J->ExtraWeight; | |
2201 if (IWeight > JWeight) | |
2202 std::swap(*I, *J); | |
2203 } | |
2204 } | |
2205 } | |
2206 // Rearrange the case blocks so that the last one falls through if possible. | |
2207 Case &BackCase = *(CR.Range.second-1); | |
2208 if (Size > 1 && | |
2209 NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { | |
2210 // The last case block won't fall through into 'NextBlock' if we emit the | |
2211 // branches in this order. See if rearranging a case value would help. | |
2212 // We start at the bottom as it's the case with the least weight. | |
2213 for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I) | |
2214 if (I->BB == NextBlock) { | |
2215 std::swap(*I, BackCase); | |
2216 break; | |
2217 } | |
2218 } | |
2219 | |
2220 // Create a CaseBlock record representing a conditional branch to | |
2221 // the Case's target mbb if the value being switched on SV is equal | |
2222 // to C. | |
2223 MachineBasicBlock *CurBlock = CR.CaseBB; | |
2224 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { | |
2225 MachineBasicBlock *FallThrough; | |
2226 if (I != E-1) { | |
2227 FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock()); | |
2228 CurMF->insert(BBI, FallThrough); | |
2229 | |
2230 // Put SV in a virtual register to make it available from the new blocks. | |
2231 ExportFromCurrentBlock(SV); | |
2232 } else { | 2241 } else { |
2233 // If the last case doesn't match, go to the default block. | 2242 std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex], |
2234 FallThrough = Default; | 2243 sizeof(Clusters[SrcIndex])); |
2235 } | 2244 } |
2236 | 2245 } |
2237 const Value *RHS, *LHS, *MHS; | 2246 Clusters.resize(DstIndex); |
2238 ISD::CondCode CC; | |
2239 if (I->High == I->Low) { | |
2240 // This is just small small case range :) containing exactly 1 case | |
2241 CC = ISD::SETEQ; | |
2242 LHS = SV; RHS = I->High; MHS = nullptr; | |
2243 } else { | |
2244 CC = ISD::SETLE; | |
2245 LHS = I->Low; MHS = SV; RHS = I->High; | |
2246 } | |
2247 | |
2248 // The false weight should be sum of all un-handled cases. | |
2249 UnhandledWeights -= I->ExtraWeight; | |
2250 CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough, | |
2251 /* me */ CurBlock, | |
2252 /* trueweight */ I->ExtraWeight, | |
2253 /* falseweight */ UnhandledWeights); | |
2254 | |
2255 // If emitting the first comparison, just call visitSwitchCase to emit the | |
2256 // code into the current block. Otherwise, push the CaseBlock onto the | |
2257 // vector to be later processed by SDISel, and insert the node's MBB | |
2258 // before the next MBB. | |
2259 if (CurBlock == SwitchBB) | |
2260 visitSwitchCase(CB, SwitchBB); | |
2261 else | |
2262 SwitchCases.push_back(CB); | |
2263 | |
2264 CurBlock = FallThrough; | |
2265 } | |
2266 | |
2267 return true; | |
2268 } | |
2269 | |
2270 static inline bool areJTsAllowed(const TargetLowering &TLI) { | |
2271 return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || | |
2272 TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); | |
2273 } | |
2274 | |
2275 static APInt ComputeRange(const APInt &First, const APInt &Last) { | |
2276 uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; | |
2277 APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth); | |
2278 return (LastExt - FirstExt + 1ULL); | |
2279 } | |
2280 | |
2281 /// handleJTSwitchCase - Emit jumptable for current switch case range | |
2282 bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, | |
2283 CaseRecVector &WorkList, | |
2284 const Value *SV, | |
2285 MachineBasicBlock *Default, | |
2286 MachineBasicBlock *SwitchBB) { | |
2287 Case& FrontCase = *CR.Range.first; | |
2288 Case& BackCase = *(CR.Range.second-1); | |
2289 | |
2290 const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); | |
2291 const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); | |
2292 | |
2293 APInt TSize(First.getBitWidth(), 0); | |
2294 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) | |
2295 TSize += I->size(); | |
2296 | |
2297 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | |
2298 if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) | |
2299 return false; | |
2300 | |
2301 APInt Range = ComputeRange(First, Last); | |
2302 // The density is TSize / Range. Require at least 40%. | |
2303 // It should not be possible for IntTSize to saturate for sane code, but make | |
2304 // sure we handle Range saturation correctly. | |
2305 uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10); | |
2306 uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10); | |
2307 if (IntTSize * 10 < IntRange * 4) | |
2308 return false; | |
2309 | |
2310 DEBUG(dbgs() << "Lowering jump table\n" | |
2311 << "First entry: " << First << ". Last entry: " << Last << '\n' | |
2312 << "Range: " << Range << ". Size: " << TSize << ".\n\n"); | |
2313 | |
2314 // Get the MachineFunction which holds the current MBB. This is used when | |
2315 // inserting any additional MBBs necessary to represent the switch. | |
2316 MachineFunction *CurMF = FuncInfo.MF; | |
2317 | |
2318 // Figure out which block is immediately after the current one. | |
2319 MachineFunction::iterator BBI = CR.CaseBB; | |
2320 ++BBI; | |
2321 | |
2322 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); | |
2323 | |
2324 // Create a new basic block to hold the code for loading the address | |
2325 // of the jump table, and jumping to it. Update successor information; | |
2326 // we will either branch to the default case for the switch, or the jump | |
2327 // table. | |
2328 MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); | |
2329 CurMF->insert(BBI, JumpTableBB); | |
2330 | |
2331 addSuccessorWithWeight(CR.CaseBB, Default); | |
2332 addSuccessorWithWeight(CR.CaseBB, JumpTableBB); | |
2333 | |
2334 // Build a vector of destination BBs, corresponding to each target | |
2335 // of the jump table. If the value of the jump table slot corresponds to | |
2336 // a case statement, push the case's BB onto the vector, otherwise, push | |
2337 // the default BB. | |
2338 std::vector<MachineBasicBlock*> DestBBs; | |
2339 APInt TEI = First; | |
2340 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { | |
2341 const APInt &Low = cast<ConstantInt>(I->Low)->getValue(); | |
2342 const APInt &High = cast<ConstantInt>(I->High)->getValue(); | |
2343 | |
2344 if (Low.sle(TEI) && TEI.sle(High)) { | |
2345 DestBBs.push_back(I->BB); | |
2346 if (TEI==High) | |
2347 ++I; | |
2348 } else { | |
2349 DestBBs.push_back(Default); | |
2350 } | |
2351 } | |
2352 | |
2353 // Calculate weight for each unique destination in CR. | |
2354 DenseMap<MachineBasicBlock*, uint32_t> DestWeights; | |
2355 if (FuncInfo.BPI) | |
2356 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { | |
2357 DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = | |
2358 DestWeights.find(I->BB); | |
2359 if (Itr != DestWeights.end()) | |
2360 Itr->second += I->ExtraWeight; | |
2361 else | |
2362 DestWeights[I->BB] = I->ExtraWeight; | |
2363 } | |
2364 | |
2365 // Update successor info. Add one edge to each unique successor. | |
2366 BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); | |
2367 for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), | |
2368 E = DestBBs.end(); I != E; ++I) { | |
2369 if (!SuccsHandled[(*I)->getNumber()]) { | |
2370 SuccsHandled[(*I)->getNumber()] = true; | |
2371 DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = | |
2372 DestWeights.find(*I); | |
2373 addSuccessorWithWeight(JumpTableBB, *I, | |
2374 Itr != DestWeights.end() ? Itr->second : 0); | |
2375 } | |
2376 } | |
2377 | |
2378 // Create a jump table index for this jump table. | |
2379 unsigned JTEncoding = TLI.getJumpTableEncoding(); | |
2380 unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) | |
2381 ->createJumpTableIndex(DestBBs); | |
2382 | |
2383 // Set the jump table information so that we can codegen it as a second | |
2384 // MachineBasicBlock | |
2385 JumpTable JT(-1U, JTI, JumpTableBB, Default); | |
2386 JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB)); | |
2387 if (CR.CaseBB == SwitchBB) | |
2388 visitJumpTableHeader(JT, JTH, SwitchBB); | |
2389 | |
2390 JTCases.push_back(JumpTableBlock(JTH, JT)); | |
2391 return true; | |
2392 } | |
2393 | |
2394 /// handleBTSplitSwitchCase - emit comparison and split binary search tree into | |
2395 /// 2 subtrees. | |
2396 bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, | |
2397 CaseRecVector& WorkList, | |
2398 const Value* SV, | |
2399 MachineBasicBlock* SwitchBB) { | |
2400 Case& FrontCase = *CR.Range.first; | |
2401 Case& BackCase = *(CR.Range.second-1); | |
2402 | |
2403 // Size is the number of Cases represented by this range. | |
2404 unsigned Size = CR.Range.second - CR.Range.first; | |
2405 | |
2406 const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); | |
2407 const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); | |
2408 double FMetric = 0; | |
2409 CaseItr Pivot = CR.Range.first + Size/2; | |
2410 | |
2411 // Select optimal pivot, maximizing sum density of LHS and RHS. This will | |
2412 // (heuristically) allow us to emit JumpTable's later. | |
2413 APInt TSize(First.getBitWidth(), 0); | |
2414 for (CaseItr I = CR.Range.first, E = CR.Range.second; | |
2415 I!=E; ++I) | |
2416 TSize += I->size(); | |
2417 | |
2418 APInt LSize = FrontCase.size(); | |
2419 APInt RSize = TSize-LSize; | |
2420 DEBUG(dbgs() << "Selecting best pivot: \n" | |
2421 << "First: " << First << ", Last: " << Last <<'\n' | |
2422 << "LSize: " << LSize << ", RSize: " << RSize << '\n'); | |
2423 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | |
2424 for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; | |
2425 J!=E; ++I, ++J) { | |
2426 const APInt &LEnd = cast<ConstantInt>(I->High)->getValue(); | |
2427 const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue(); | |
2428 APInt Range = ComputeRange(LEnd, RBegin); | |
2429 assert((Range - 2ULL).isNonNegative() && | |
2430 "Invalid case distance"); | |
2431 // Use volatile double here to avoid excess precision issues on some hosts, | |
2432 // e.g. that use 80-bit X87 registers. | |
2433 // Only consider the density of sub-ranges that actually have sufficient | |
2434 // entries to be lowered as a jump table. | |
2435 volatile double LDensity = | |
2436 LSize.ult(TLI.getMinimumJumpTableEntries()) | |
2437 ? 0.0 | |
2438 : LSize.roundToDouble() / (LEnd - First + 1ULL).roundToDouble(); | |
2439 volatile double RDensity = | |
2440 RSize.ult(TLI.getMinimumJumpTableEntries()) | |
2441 ? 0.0 | |
2442 : RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble(); | |
2443 volatile double Metric = Range.logBase2() * (LDensity + RDensity); | |
2444 // Should always split in some non-trivial place | |
2445 DEBUG(dbgs() <<"=>Step\n" | |
2446 << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' | |
2447 << "LDensity: " << LDensity | |
2448 << ", RDensity: " << RDensity << '\n' | |
2449 << "Metric: " << Metric << '\n'); | |
2450 if (FMetric < Metric) { | |
2451 Pivot = J; | |
2452 FMetric = Metric; | |
2453 DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n'); | |
2454 } | |
2455 | |
2456 LSize += J->size(); | |
2457 RSize -= J->size(); | |
2458 } | |
2459 | |
2460 if (FMetric == 0 || !areJTsAllowed(TLI)) | |
2461 Pivot = CR.Range.first + Size/2; | |
2462 splitSwitchCase(CR, Pivot, WorkList, SV, SwitchBB); | |
2463 return true; | |
2464 } | |
2465 | |
2466 void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot, | |
2467 CaseRecVector &WorkList, | |
2468 const Value *SV, | |
2469 MachineBasicBlock *SwitchBB) { | |
2470 // Get the MachineFunction which holds the current MBB. This is used when | |
2471 // inserting any additional MBBs necessary to represent the switch. | |
2472 MachineFunction *CurMF = FuncInfo.MF; | |
2473 | |
2474 // Figure out which block is immediately after the current one. | |
2475 MachineFunction::iterator BBI = CR.CaseBB; | |
2476 ++BBI; | |
2477 | |
2478 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); | |
2479 | |
2480 CaseRange LHSR(CR.Range.first, Pivot); | |
2481 CaseRange RHSR(Pivot, CR.Range.second); | |
2482 const Constant *C = Pivot->Low; | |
2483 MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr; | |
2484 | |
2485 // We know that we branch to the LHS if the Value being switched on is | |
2486 // less than the Pivot value, C. We use this to optimize our binary | |
2487 // tree a bit, by recognizing that if SV is greater than or equal to the | |
2488 // LHS's Case Value, and that Case Value is exactly one less than the | |
2489 // Pivot's Value, then we can branch directly to the LHS's Target, | |
2490 // rather than creating a leaf node for it. | |
2491 if ((LHSR.second - LHSR.first) == 1 && LHSR.first->High == CR.GE && | |
2492 cast<ConstantInt>(C)->getValue() == | |
2493 (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) { | |
2494 TrueBB = LHSR.first->BB; | |
2495 } else { | |
2496 TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); | |
2497 CurMF->insert(BBI, TrueBB); | |
2498 WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR)); | |
2499 | |
2500 // Put SV in a virtual register to make it available from the new blocks. | |
2501 ExportFromCurrentBlock(SV); | |
2502 } | |
2503 | |
2504 // Similar to the optimization above, if the Value being switched on is | |
2505 // known to be less than the Constant CR.LT, and the current Case Value | |
2506 // is CR.LT - 1, then we can branch directly to the target block for | |
2507 // the current Case Value, rather than emitting a RHS leaf node for it. | |
2508 if ((RHSR.second - RHSR.first) == 1 && CR.LT && | |
2509 cast<ConstantInt>(RHSR.first->Low)->getValue() == | |
2510 (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) { | |
2511 FalseBB = RHSR.first->BB; | |
2512 } else { | |
2513 FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); | |
2514 CurMF->insert(BBI, FalseBB); | |
2515 WorkList.push_back(CaseRec(FalseBB, CR.LT, C, RHSR)); | |
2516 | |
2517 // Put SV in a virtual register to make it available from the new blocks. | |
2518 ExportFromCurrentBlock(SV); | |
2519 } | |
2520 | |
2521 // Create a CaseBlock record representing a conditional branch to | |
2522 // the LHS node if the value being switched on SV is less than C. | |
2523 // Otherwise, branch to LHS. | |
2524 CaseBlock CB(ISD::SETLT, SV, C, nullptr, TrueBB, FalseBB, CR.CaseBB); | |
2525 | |
2526 if (CR.CaseBB == SwitchBB) | |
2527 visitSwitchCase(CB, SwitchBB); | |
2528 else | |
2529 SwitchCases.push_back(CB); | |
2530 } | |
2531 | |
2532 /// handleBitTestsSwitchCase - if current case range has few destination and | |
2533 /// range span less, than machine word bitwidth, encode case range into series | |
2534 /// of masks and emit bit tests with these masks. | |
2535 bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, | |
2536 CaseRecVector& WorkList, | |
2537 const Value* SV, | |
2538 MachineBasicBlock* Default, | |
2539 MachineBasicBlock* SwitchBB) { | |
2540 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | |
2541 EVT PTy = TLI.getPointerTy(); | |
2542 unsigned IntPtrBits = PTy.getSizeInBits(); | |
2543 | |
2544 Case& FrontCase = *CR.Range.first; | |
2545 Case& BackCase = *(CR.Range.second-1); | |
2546 | |
2547 // Get the MachineFunction which holds the current MBB. This is used when | |
2548 // inserting any additional MBBs necessary to represent the switch. | |
2549 MachineFunction *CurMF = FuncInfo.MF; | |
2550 | |
2551 // If target does not have legal shift left, do not emit bit tests at all. | |
2552 if (!TLI.isOperationLegal(ISD::SHL, PTy)) | |
2553 return false; | |
2554 | |
2555 size_t numCmps = 0; | |
2556 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { | |
2557 // Single case counts one, case range - two. | |
2558 numCmps += (I->Low == I->High ? 1 : 2); | |
2559 } | |
2560 | |
2561 // Count unique destinations | |
2562 SmallSet<MachineBasicBlock*, 4> Dests; | |
2563 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { | |
2564 Dests.insert(I->BB); | |
2565 if (Dests.size() > 3) | |
2566 // Don't bother the code below, if there are too much unique destinations | |
2567 return false; | |
2568 } | |
2569 DEBUG(dbgs() << "Total number of unique destinations: " | |
2570 << Dests.size() << '\n' | |
2571 << "Total number of comparisons: " << numCmps << '\n'); | |
2572 | |
2573 // Compute span of values. | |
2574 const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue(); | |
2575 const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue(); | |
2576 APInt cmpRange = maxValue - minValue; | |
2577 | |
2578 DEBUG(dbgs() << "Compare range: " << cmpRange << '\n' | |
2579 << "Low bound: " << minValue << '\n' | |
2580 << "High bound: " << maxValue << '\n'); | |
2581 | |
2582 if (cmpRange.uge(IntPtrBits) || | |
2583 (!(Dests.size() == 1 && numCmps >= 3) && | |
2584 !(Dests.size() == 2 && numCmps >= 5) && | |
2585 !(Dests.size() >= 3 && numCmps >= 6))) | |
2586 return false; | |
2587 | |
2588 DEBUG(dbgs() << "Emitting bit tests\n"); | |
2589 APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth()); | |
2590 | |
2591 // Optimize the case where all the case values fit in a | |
2592 // word without having to subtract minValue. In this case, | |
2593 // we can optimize away the subtraction. | |
2594 if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { | |
2595 cmpRange = maxValue; | |
2596 } else { | |
2597 lowBound = minValue; | |
2598 } | |
2599 | |
2600 CaseBitsVector CasesBits; | |
2601 unsigned i, count = 0; | |
2602 | |
2603 for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { | |
2604 MachineBasicBlock* Dest = I->BB; | |
2605 for (i = 0; i < count; ++i) | |
2606 if (Dest == CasesBits[i].BB) | |
2607 break; | |
2608 | |
2609 if (i == count) { | |
2610 assert((count < 3) && "Too much destinations to test!"); | |
2611 CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/)); | |
2612 count++; | |
2613 } | |
2614 | |
2615 const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue(); | |
2616 const APInt& highValue = cast<ConstantInt>(I->High)->getValue(); | |
2617 | |
2618 uint64_t lo = (lowValue - lowBound).getZExtValue(); | |
2619 uint64_t hi = (highValue - lowBound).getZExtValue(); | |
2620 CasesBits[i].ExtraWeight += I->ExtraWeight; | |
2621 | |
2622 for (uint64_t j = lo; j <= hi; j++) { | |
2623 CasesBits[i].Mask |= 1ULL << j; | |
2624 CasesBits[i].Bits++; | |
2625 } | |
2626 | |
2627 } | |
2628 std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp()); | |
2629 | |
2630 BitTestInfo BTC; | |
2631 | |
2632 // Figure out which block is immediately after the current one. | |
2633 MachineFunction::iterator BBI = CR.CaseBB; | |
2634 ++BBI; | |
2635 | |
2636 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); | |
2637 | |
2638 DEBUG(dbgs() << "Cases:\n"); | |
2639 for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { | |
2640 DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask | |
2641 << ", Bits: " << CasesBits[i].Bits | |
2642 << ", BB: " << CasesBits[i].BB << '\n'); | |
2643 | |
2644 MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB); | |
2645 CurMF->insert(BBI, CaseBB); | |
2646 BTC.push_back(BitTestCase(CasesBits[i].Mask, | |
2647 CaseBB, | |
2648 CasesBits[i].BB, CasesBits[i].ExtraWeight)); | |
2649 | |
2650 // Put SV in a virtual register to make it available from the new blocks. | |
2651 ExportFromCurrentBlock(SV); | |
2652 } | |
2653 | |
2654 BitTestBlock BTB(lowBound, cmpRange, SV, | |
2655 -1U, MVT::Other, (CR.CaseBB == SwitchBB), | |
2656 CR.CaseBB, Default, std::move(BTC)); | |
2657 | |
2658 if (CR.CaseBB == SwitchBB) | |
2659 visitBitTestHeader(BTB, SwitchBB); | |
2660 | |
2661 BitTestCases.push_back(std::move(BTB)); | |
2662 | |
2663 return true; | |
2664 } | |
2665 | |
2666 /// Clusterify - Transform simple list of Cases into list of CaseRange's | |
2667 void SelectionDAGBuilder::Clusterify(CaseVector& Cases, | |
2668 const SwitchInst& SI) { | |
2669 BranchProbabilityInfo *BPI = FuncInfo.BPI; | |
2670 // Start with "simple" cases. | |
2671 for (SwitchInst::ConstCaseIt i : SI.cases()) { | |
2672 const BasicBlock *SuccBB = i.getCaseSuccessor(); | |
2673 MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; | |
2674 | |
2675 uint32_t ExtraWeight = | |
2676 BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0; | |
2677 | |
2678 Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), | |
2679 SMBB, ExtraWeight)); | |
2680 } | |
2681 std::sort(Cases.begin(), Cases.end(), CaseCmp()); | |
2682 | |
2683 // Merge case into clusters | |
2684 if (Cases.size() >= 2) | |
2685 // Must recompute end() each iteration because it may be | |
2686 // invalidated by erase if we hold on to it | |
2687 for (CaseItr I = Cases.begin(), J = std::next(Cases.begin()); | |
2688 J != Cases.end(); ) { | |
2689 const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); | |
2690 const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); | |
2691 MachineBasicBlock* nextBB = J->BB; | |
2692 MachineBasicBlock* currentBB = I->BB; | |
2693 | |
2694 // If the two neighboring cases go to the same destination, merge them | |
2695 // into a single case. | |
2696 if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { | |
2697 I->High = J->High; | |
2698 I->ExtraWeight += J->ExtraWeight; | |
2699 J = Cases.erase(J); | |
2700 } else { | |
2701 I = J++; | |
2702 } | |
2703 } | |
2704 | |
2705 DEBUG({ | |
2706 size_t numCmps = 0; | |
2707 for (auto &I : Cases) | |
2708 // A range counts double, since it requires two compares. | |
2709 numCmps += I.Low != I.High ? 2 : 1; | |
2710 | |
2711 dbgs() << "Clusterify finished. Total clusters: " << Cases.size() | |
2712 << ". Total compares: " << numCmps << '\n'; | |
2713 }); | |
2714 } | 2247 } |
2715 | 2248 |
2716 void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, | 2249 void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, |
2717 MachineBasicBlock *Last) { | 2250 MachineBasicBlock *Last) { |
2718 // Update JTCases. | 2251 // Update JTCases. |
2722 | 2255 |
2723 // Update BitTestCases. | 2256 // Update BitTestCases. |
2724 for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) | 2257 for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) |
2725 if (BitTestCases[i].Parent == First) | 2258 if (BitTestCases[i].Parent == First) |
2726 BitTestCases[i].Parent = Last; | 2259 BitTestCases[i].Parent = Last; |
2727 } | |
2728 | |
2729 void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { | |
2730 MachineBasicBlock *SwitchMBB = FuncInfo.MBB; | |
2731 | |
2732 // Figure out which block is immediately after the current one. | |
2733 MachineBasicBlock *NextBlock = nullptr; | |
2734 if (SwitchMBB + 1 != FuncInfo.MF->end()) | |
2735 NextBlock = SwitchMBB + 1; | |
2736 | |
2737 | |
2738 // Create a vector of Cases, sorted so that we can efficiently create a binary | |
2739 // search tree from them. | |
2740 CaseVector Cases; | |
2741 Clusterify(Cases, SI); | |
2742 | |
2743 // Get the default destination MBB. | |
2744 MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; | |
2745 | |
2746 if (isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg()) && | |
2747 !Cases.empty()) { | |
2748 // Replace an unreachable default destination with the most popular case | |
2749 // destination. | |
2750 DenseMap<const BasicBlock *, unsigned> Popularity; | |
2751 unsigned MaxPop = 0; | |
2752 const BasicBlock *MaxBB = nullptr; | |
2753 for (auto I : SI.cases()) { | |
2754 const BasicBlock *BB = I.getCaseSuccessor(); | |
2755 if (++Popularity[BB] > MaxPop) { | |
2756 MaxPop = Popularity[BB]; | |
2757 MaxBB = BB; | |
2758 } | |
2759 } | |
2760 | |
2761 // Set new default. | |
2762 assert(MaxPop > 0); | |
2763 assert(MaxBB); | |
2764 Default = FuncInfo.MBBMap[MaxBB]; | |
2765 | |
2766 // Remove cases that were pointing to the destination that is now the default. | |
2767 Cases.erase(std::remove_if(Cases.begin(), Cases.end(), | |
2768 [&](const Case &C) { return C.BB == Default; }), | |
2769 Cases.end()); | |
2770 } | |
2771 | |
2772 // If there is only the default destination, go there directly. | |
2773 if (Cases.empty()) { | |
2774 // Update machine-CFG edges. | |
2775 SwitchMBB->addSuccessor(Default); | |
2776 | |
2777 // If this is not a fall-through branch, emit the branch. | |
2778 if (Default != NextBlock) { | |
2779 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, | |
2780 getControlRoot(), DAG.getBasicBlock(Default))); | |
2781 } | |
2782 return; | |
2783 } | |
2784 | |
2785 // Get the Value to be switched on. | |
2786 const Value *SV = SI.getCondition(); | |
2787 | |
2788 // Push the initial CaseRec onto the worklist | |
2789 CaseRecVector WorkList; | |
2790 WorkList.push_back(CaseRec(SwitchMBB,nullptr,nullptr, | |
2791 CaseRange(Cases.begin(),Cases.end()))); | |
2792 | |
2793 while (!WorkList.empty()) { | |
2794 // Grab a record representing a case range to process off the worklist | |
2795 CaseRec CR = WorkList.back(); | |
2796 WorkList.pop_back(); | |
2797 | |
2798 if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) | |
2799 continue; | |
2800 | |
2801 // If the range has few cases (two or less) emit a series of specific | |
2802 // tests. | |
2803 if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) | |
2804 continue; | |
2805 | |
2806 // If the switch has more than N blocks, and is at least 40% dense, and the | |
2807 // target supports indirect branches, then emit a jump table rather than | |
2808 // lowering the switch to a binary tree of conditional branches. | |
2809 // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries(). | |
2810 if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) | |
2811 continue; | |
2812 | |
2813 // Emit binary tree. We need to pick a pivot, and push left and right ranges | |
2814 // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. | |
2815 handleBTSplitSwitchCase(CR, WorkList, SV, SwitchMBB); | |
2816 } | |
2817 } | 2260 } |
2818 | 2261 |
2819 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { | 2262 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { |
2820 MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; | 2263 MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; |
2821 | 2264 |
2836 getValue(I.getAddress()))); | 2279 getValue(I.getAddress()))); |
2837 } | 2280 } |
2838 | 2281 |
2839 void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { | 2282 void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { |
2840 if (DAG.getTarget().Options.TrapUnreachable) | 2283 if (DAG.getTarget().Options.TrapUnreachable) |
2841 DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); | 2284 DAG.setRoot( |
2285 DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); | |
2842 } | 2286 } |
2843 | 2287 |
2844 void SelectionDAGBuilder::visitFSub(const User &I) { | 2288 void SelectionDAGBuilder::visitFSub(const User &I) { |
2845 // -0.0 - X --> fneg | 2289 // -0.0 - X --> fneg |
2846 Type *Ty = I.getType(); | 2290 Type *Ty = I.getType(); |
2860 SDValue Op2 = getValue(I.getOperand(1)); | 2304 SDValue Op2 = getValue(I.getOperand(1)); |
2861 | 2305 |
2862 bool nuw = false; | 2306 bool nuw = false; |
2863 bool nsw = false; | 2307 bool nsw = false; |
2864 bool exact = false; | 2308 bool exact = false; |
2309 FastMathFlags FMF; | |
2310 | |
2865 if (const OverflowingBinaryOperator *OFBinOp = | 2311 if (const OverflowingBinaryOperator *OFBinOp = |
2866 dyn_cast<const OverflowingBinaryOperator>(&I)) { | 2312 dyn_cast<const OverflowingBinaryOperator>(&I)) { |
2867 nuw = OFBinOp->hasNoUnsignedWrap(); | 2313 nuw = OFBinOp->hasNoUnsignedWrap(); |
2868 nsw = OFBinOp->hasNoSignedWrap(); | 2314 nsw = OFBinOp->hasNoSignedWrap(); |
2869 } | 2315 } |
2870 if (const PossiblyExactOperator *ExactOp = | 2316 if (const PossiblyExactOperator *ExactOp = |
2871 dyn_cast<const PossiblyExactOperator>(&I)) | 2317 dyn_cast<const PossiblyExactOperator>(&I)) |
2872 exact = ExactOp->isExact(); | 2318 exact = ExactOp->isExact(); |
2873 | 2319 if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I)) |
2320 FMF = FPOp->getFastMathFlags(); | |
2321 | |
2322 SDNodeFlags Flags; | |
2323 Flags.setExact(exact); | |
2324 Flags.setNoSignedWrap(nsw); | |
2325 Flags.setNoUnsignedWrap(nuw); | |
2326 if (EnableFMFInDAG) { | |
2327 Flags.setAllowReciprocal(FMF.allowReciprocal()); | |
2328 Flags.setNoInfs(FMF.noInfs()); | |
2329 Flags.setNoNaNs(FMF.noNaNs()); | |
2330 Flags.setNoSignedZeros(FMF.noSignedZeros()); | |
2331 Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); | |
2332 } | |
2874 SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), | 2333 SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), |
2875 Op1, Op2, nuw, nsw, exact); | 2334 Op1, Op2, &Flags); |
2876 setValue(&I, BinNodeValue); | 2335 setValue(&I, BinNodeValue); |
2877 } | 2336 } |
2878 | 2337 |
2879 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { | 2338 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { |
2880 SDValue Op1 = getValue(I.getOperand(0)); | 2339 SDValue Op1 = getValue(I.getOperand(0)); |
2881 SDValue Op2 = getValue(I.getOperand(1)); | 2340 SDValue Op2 = getValue(I.getOperand(1)); |
2882 | 2341 |
2883 EVT ShiftTy = | 2342 EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy( |
2884 DAG.getTargetLoweringInfo().getShiftAmountTy(Op2.getValueType()); | 2343 Op2.getValueType(), DAG.getDataLayout()); |
2885 | 2344 |
2886 // Coerce the shift amount to the right type if we can. | 2345 // Coerce the shift amount to the right type if we can. |
2887 if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { | 2346 if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { |
2888 unsigned ShiftSize = ShiftTy.getSizeInBits(); | 2347 unsigned ShiftSize = ShiftTy.getSizeInBits(); |
2889 unsigned Op2Size = Op2.getValueType().getSizeInBits(); | 2348 unsigned Op2Size = Op2.getValueType().getSizeInBits(); |
2918 } | 2377 } |
2919 if (const PossiblyExactOperator *ExactOp = | 2378 if (const PossiblyExactOperator *ExactOp = |
2920 dyn_cast<const PossiblyExactOperator>(&I)) | 2379 dyn_cast<const PossiblyExactOperator>(&I)) |
2921 exact = ExactOp->isExact(); | 2380 exact = ExactOp->isExact(); |
2922 } | 2381 } |
2923 | 2382 SDNodeFlags Flags; |
2383 Flags.setExact(exact); | |
2384 Flags.setNoSignedWrap(nsw); | |
2385 Flags.setNoUnsignedWrap(nuw); | |
2924 SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, | 2386 SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, |
2925 nuw, nsw, exact); | 2387 &Flags); |
2926 setValue(&I, Res); | 2388 setValue(&I, Res); |
2927 } | 2389 } |
2928 | 2390 |
2929 void SelectionDAGBuilder::visitSDiv(const User &I) { | 2391 void SelectionDAGBuilder::visitSDiv(const User &I) { |
2930 SDValue Op1 = getValue(I.getOperand(0)); | 2392 SDValue Op1 = getValue(I.getOperand(0)); |
2931 SDValue Op2 = getValue(I.getOperand(1)); | 2393 SDValue Op2 = getValue(I.getOperand(1)); |
2932 | 2394 |
2933 // Turn exact SDivs into multiplications. | 2395 SDNodeFlags Flags; |
2934 // FIXME: This should be in DAGCombiner, but it doesn't have access to the | 2396 Flags.setExact(isa<PossiblyExactOperator>(&I) && |
2935 // exact bit. | 2397 cast<PossiblyExactOperator>(&I)->isExact()); |
2936 if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() && | 2398 setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, |
2937 !isa<ConstantSDNode>(Op1) && | 2399 Op2, &Flags)); |
2938 isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue()) | |
2939 setValue(&I, DAG.getTargetLoweringInfo() | |
2940 .BuildExactSDIV(Op1, Op2, getCurSDLoc(), DAG)); | |
2941 else | |
2942 setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), | |
2943 Op1, Op2)); | |
2944 } | 2400 } |
2945 | 2401 |
2946 void SelectionDAGBuilder::visitICmp(const User &I) { | 2402 void SelectionDAGBuilder::visitICmp(const User &I) { |
2947 ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; | 2403 ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; |
2948 if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) | 2404 if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) |
2951 predicate = ICmpInst::Predicate(IC->getPredicate()); | 2407 predicate = ICmpInst::Predicate(IC->getPredicate()); |
2952 SDValue Op1 = getValue(I.getOperand(0)); | 2408 SDValue Op1 = getValue(I.getOperand(0)); |
2953 SDValue Op2 = getValue(I.getOperand(1)); | 2409 SDValue Op2 = getValue(I.getOperand(1)); |
2954 ISD::CondCode Opcode = getICmpCondCode(predicate); | 2410 ISD::CondCode Opcode = getICmpCondCode(predicate); |
2955 | 2411 |
2956 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); | 2412 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
2413 I.getType()); | |
2957 setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); | 2414 setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); |
2958 } | 2415 } |
2959 | 2416 |
2960 void SelectionDAGBuilder::visitFCmp(const User &I) { | 2417 void SelectionDAGBuilder::visitFCmp(const User &I) { |
2961 FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; | 2418 FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; |
2964 else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) | 2421 else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) |
2965 predicate = FCmpInst::Predicate(FC->getPredicate()); | 2422 predicate = FCmpInst::Predicate(FC->getPredicate()); |
2966 SDValue Op1 = getValue(I.getOperand(0)); | 2423 SDValue Op1 = getValue(I.getOperand(0)); |
2967 SDValue Op2 = getValue(I.getOperand(1)); | 2424 SDValue Op2 = getValue(I.getOperand(1)); |
2968 ISD::CondCode Condition = getFCmpCondCode(predicate); | 2425 ISD::CondCode Condition = getFCmpCondCode(predicate); |
2426 | |
2427 // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them. | |
2428 // FIXME: We should propagate the fast-math-flags to the DAG node itself for | |
2429 // further optimization, but currently FMF is only applicable to binary nodes. | |
2969 if (TM.Options.NoNaNsFPMath) | 2430 if (TM.Options.NoNaNsFPMath) |
2970 Condition = getFCmpCodeWithoutNaN(Condition); | 2431 Condition = getFCmpCodeWithoutNaN(Condition); |
2971 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); | 2432 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
2433 I.getType()); | |
2972 setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); | 2434 setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); |
2973 } | 2435 } |
2974 | 2436 |
2975 void SelectionDAGBuilder::visitSelect(const User &I) { | 2437 void SelectionDAGBuilder::visitSelect(const User &I) { |
2976 SmallVector<EVT, 4> ValueVTs; | 2438 SmallVector<EVT, 4> ValueVTs; |
2977 ComputeValueVTs(DAG.getTargetLoweringInfo(), I.getType(), ValueVTs); | 2439 ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), |
2440 ValueVTs); | |
2978 unsigned NumValues = ValueVTs.size(); | 2441 unsigned NumValues = ValueVTs.size(); |
2979 if (NumValues == 0) return; | 2442 if (NumValues == 0) return; |
2980 | 2443 |
2981 SmallVector<SDValue, 4> Values(NumValues); | 2444 SmallVector<SDValue, 4> Values(NumValues); |
2982 SDValue Cond = getValue(I.getOperand(0)); | 2445 SDValue Cond = getValue(I.getOperand(0)); |
2983 SDValue TrueVal = getValue(I.getOperand(1)); | 2446 SDValue LHSVal = getValue(I.getOperand(1)); |
2984 SDValue FalseVal = getValue(I.getOperand(2)); | 2447 SDValue RHSVal = getValue(I.getOperand(2)); |
2448 auto BaseOps = {Cond}; | |
2985 ISD::NodeType OpCode = Cond.getValueType().isVector() ? | 2449 ISD::NodeType OpCode = Cond.getValueType().isVector() ? |
2986 ISD::VSELECT : ISD::SELECT; | 2450 ISD::VSELECT : ISD::SELECT; |
2987 | 2451 |
2988 for (unsigned i = 0; i != NumValues; ++i) | 2452 // Min/max matching is only viable if all output VTs are the same. |
2453 if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) { | |
2454 EVT VT = ValueVTs[0]; | |
2455 LLVMContext &Ctx = *DAG.getContext(); | |
2456 auto &TLI = DAG.getTargetLoweringInfo(); | |
2457 while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector) | |
2458 VT = TLI.getTypeToTransformTo(Ctx, VT); | |
2459 | |
2460 Value *LHS, *RHS; | |
2461 auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS); | |
2462 ISD::NodeType Opc = ISD::DELETED_NODE; | |
2463 switch (SPR.Flavor) { | |
2464 case SPF_UMAX: Opc = ISD::UMAX; break; | |
2465 case SPF_UMIN: Opc = ISD::UMIN; break; | |
2466 case SPF_SMAX: Opc = ISD::SMAX; break; | |
2467 case SPF_SMIN: Opc = ISD::SMIN; break; | |
2468 case SPF_FMINNUM: | |
2469 switch (SPR.NaNBehavior) { | |
2470 case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); | |
2471 case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break; | |
2472 case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break; | |
2473 case SPNB_RETURNS_ANY: | |
2474 Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ? ISD::FMINNUM | |
2475 : ISD::FMINNAN; | |
2476 break; | |
2477 } | |
2478 break; | |
2479 case SPF_FMAXNUM: | |
2480 switch (SPR.NaNBehavior) { | |
2481 case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); | |
2482 case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break; | |
2483 case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break; | |
2484 case SPNB_RETURNS_ANY: | |
2485 Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ? ISD::FMAXNUM | |
2486 : ISD::FMAXNAN; | |
2487 break; | |
2488 } | |
2489 break; | |
2490 default: break; | |
2491 } | |
2492 | |
2493 if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) && | |
2494 // If the underlying comparison instruction is used by any other instruction, | |
2495 // the consumed instructions won't be destroyed, so it is not profitable | |
2496 // to convert to a min/max. | |
2497 cast<SelectInst>(&I)->getCondition()->hasOneUse()) { | |
2498 OpCode = Opc; | |
2499 LHSVal = getValue(LHS); | |
2500 RHSVal = getValue(RHS); | |
2501 BaseOps = {}; | |
2502 } | |
2503 } | |
2504 | |
2505 for (unsigned i = 0; i != NumValues; ++i) { | |
2506 SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end()); | |
2507 Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); | |
2508 Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); | |
2989 Values[i] = DAG.getNode(OpCode, getCurSDLoc(), | 2509 Values[i] = DAG.getNode(OpCode, getCurSDLoc(), |
2990 TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), | 2510 LHSVal.getNode()->getValueType(LHSVal.getResNo()+i), |
2991 Cond, | 2511 Ops); |
2992 SDValue(TrueVal.getNode(), | 2512 } |
2993 TrueVal.getResNo() + i), | |
2994 SDValue(FalseVal.getNode(), | |
2995 FalseVal.getResNo() + i)); | |
2996 | 2513 |
2997 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), | 2514 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), |
2998 DAG.getVTList(ValueVTs), Values)); | 2515 DAG.getVTList(ValueVTs), Values)); |
2999 } | 2516 } |
3000 | 2517 |
3001 void SelectionDAGBuilder::visitTrunc(const User &I) { | 2518 void SelectionDAGBuilder::visitTrunc(const User &I) { |
3002 // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). | 2519 // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). |
3003 SDValue N = getValue(I.getOperand(0)); | 2520 SDValue N = getValue(I.getOperand(0)); |
3004 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); | 2521 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
2522 I.getType()); | |
3005 setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); | 2523 setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); |
3006 } | 2524 } |
3007 | 2525 |
3008 void SelectionDAGBuilder::visitZExt(const User &I) { | 2526 void SelectionDAGBuilder::visitZExt(const User &I) { |
3009 // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). | 2527 // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). |
3010 // ZExt also can't be a cast to bool for same reason. So, nothing much to do | 2528 // ZExt also can't be a cast to bool for same reason. So, nothing much to do |
3011 SDValue N = getValue(I.getOperand(0)); | 2529 SDValue N = getValue(I.getOperand(0)); |
3012 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); | 2530 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
2531 I.getType()); | |
3013 setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); | 2532 setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); |
3014 } | 2533 } |
3015 | 2534 |
3016 void SelectionDAGBuilder::visitSExt(const User &I) { | 2535 void SelectionDAGBuilder::visitSExt(const User &I) { |
3017 // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). | 2536 // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). |
3018 // SExt also can't be a cast to bool for same reason. So, nothing much to do | 2537 // SExt also can't be a cast to bool for same reason. So, nothing much to do |
3019 SDValue N = getValue(I.getOperand(0)); | 2538 SDValue N = getValue(I.getOperand(0)); |
3020 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); | 2539 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
2540 I.getType()); | |
3021 setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); | 2541 setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); |
3022 } | 2542 } |
3023 | 2543 |
3024 void SelectionDAGBuilder::visitFPTrunc(const User &I) { | 2544 void SelectionDAGBuilder::visitFPTrunc(const User &I) { |
3025 // FPTrunc is never a no-op cast, no need to check | 2545 // FPTrunc is never a no-op cast, no need to check |
3026 SDValue N = getValue(I.getOperand(0)); | 2546 SDValue N = getValue(I.getOperand(0)); |
2547 SDLoc dl = getCurSDLoc(); | |
3027 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 2548 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3028 EVT DestVT = TLI.getValueType(I.getType()); | 2549 EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
3029 setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), DestVT, N, | 2550 setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N, |
3030 DAG.getTargetConstant(0, TLI.getPointerTy()))); | 2551 DAG.getTargetConstant( |
2552 0, dl, TLI.getPointerTy(DAG.getDataLayout())))); | |
3031 } | 2553 } |
3032 | 2554 |
3033 void SelectionDAGBuilder::visitFPExt(const User &I) { | 2555 void SelectionDAGBuilder::visitFPExt(const User &I) { |
3034 // FPExt is never a no-op cast, no need to check | 2556 // FPExt is never a no-op cast, no need to check |
3035 SDValue N = getValue(I.getOperand(0)); | 2557 SDValue N = getValue(I.getOperand(0)); |
3036 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); | 2558 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
2559 I.getType()); | |
3037 setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); | 2560 setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); |
3038 } | 2561 } |
3039 | 2562 |
3040 void SelectionDAGBuilder::visitFPToUI(const User &I) { | 2563 void SelectionDAGBuilder::visitFPToUI(const User &I) { |
3041 // FPToUI is never a no-op cast, no need to check | 2564 // FPToUI is never a no-op cast, no need to check |
3042 SDValue N = getValue(I.getOperand(0)); | 2565 SDValue N = getValue(I.getOperand(0)); |
3043 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); | 2566 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
2567 I.getType()); | |
3044 setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); | 2568 setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); |
3045 } | 2569 } |
3046 | 2570 |
3047 void SelectionDAGBuilder::visitFPToSI(const User &I) { | 2571 void SelectionDAGBuilder::visitFPToSI(const User &I) { |
3048 // FPToSI is never a no-op cast, no need to check | 2572 // FPToSI is never a no-op cast, no need to check |
3049 SDValue N = getValue(I.getOperand(0)); | 2573 SDValue N = getValue(I.getOperand(0)); |
3050 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); | 2574 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
2575 I.getType()); | |
3051 setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); | 2576 setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); |
3052 } | 2577 } |
3053 | 2578 |
3054 void SelectionDAGBuilder::visitUIToFP(const User &I) { | 2579 void SelectionDAGBuilder::visitUIToFP(const User &I) { |
3055 // UIToFP is never a no-op cast, no need to check | 2580 // UIToFP is never a no-op cast, no need to check |
3056 SDValue N = getValue(I.getOperand(0)); | 2581 SDValue N = getValue(I.getOperand(0)); |
3057 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); | 2582 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
2583 I.getType()); | |
3058 setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); | 2584 setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); |
3059 } | 2585 } |
3060 | 2586 |
3061 void SelectionDAGBuilder::visitSIToFP(const User &I) { | 2587 void SelectionDAGBuilder::visitSIToFP(const User &I) { |
3062 // SIToFP is never a no-op cast, no need to check | 2588 // SIToFP is never a no-op cast, no need to check |
3063 SDValue N = getValue(I.getOperand(0)); | 2589 SDValue N = getValue(I.getOperand(0)); |
3064 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); | 2590 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
2591 I.getType()); | |
3065 setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); | 2592 setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); |
3066 } | 2593 } |
3067 | 2594 |
3068 void SelectionDAGBuilder::visitPtrToInt(const User &I) { | 2595 void SelectionDAGBuilder::visitPtrToInt(const User &I) { |
3069 // What to do depends on the size of the integer and the size of the pointer. | 2596 // What to do depends on the size of the integer and the size of the pointer. |
3070 // We can either truncate, zero extend, or no-op, accordingly. | 2597 // We can either truncate, zero extend, or no-op, accordingly. |
3071 SDValue N = getValue(I.getOperand(0)); | 2598 SDValue N = getValue(I.getOperand(0)); |
3072 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); | 2599 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
2600 I.getType()); | |
3073 setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); | 2601 setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); |
3074 } | 2602 } |
3075 | 2603 |
3076 void SelectionDAGBuilder::visitIntToPtr(const User &I) { | 2604 void SelectionDAGBuilder::visitIntToPtr(const User &I) { |
3077 // What to do depends on the size of the integer and the size of the pointer. | 2605 // What to do depends on the size of the integer and the size of the pointer. |
3078 // We can either truncate, zero extend, or no-op, accordingly. | 2606 // We can either truncate, zero extend, or no-op, accordingly. |
3079 SDValue N = getValue(I.getOperand(0)); | 2607 SDValue N = getValue(I.getOperand(0)); |
3080 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); | 2608 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
2609 I.getType()); | |
3081 setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); | 2610 setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); |
3082 } | 2611 } |
3083 | 2612 |
3084 void SelectionDAGBuilder::visitBitCast(const User &I) { | 2613 void SelectionDAGBuilder::visitBitCast(const User &I) { |
3085 SDValue N = getValue(I.getOperand(0)); | 2614 SDValue N = getValue(I.getOperand(0)); |
3086 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); | 2615 SDLoc dl = getCurSDLoc(); |
2616 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), | |
2617 I.getType()); | |
3087 | 2618 |
3088 // BitCast assures us that source and destination are the same size so this is | 2619 // BitCast assures us that source and destination are the same size so this is |
3089 // either a BITCAST or a no-op. | 2620 // either a BITCAST or a no-op. |
3090 if (DestVT != N.getValueType()) | 2621 if (DestVT != N.getValueType()) |
3091 setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(), | 2622 setValue(&I, DAG.getNode(ISD::BITCAST, dl, |
3092 DestVT, N)); // convert types. | 2623 DestVT, N)); // convert types. |
3093 // Check if the original LLVM IR Operand was a ConstantInt, because getValue() | 2624 // Check if the original LLVM IR Operand was a ConstantInt, because getValue() |
3094 // might fold any kind of constant expression to an integer constant and that | 2625 // might fold any kind of constant expression to an integer constant and that |
3095 // is not what we are looking for. Only regcognize a bitcast of a genuine | 2626 // is not what we are looking for. Only regcognize a bitcast of a genuine |
3096 // constant integer as an opaque constant. | 2627 // constant integer as an opaque constant. |
3097 else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0))) | 2628 else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0))) |
3098 setValue(&I, DAG.getConstant(C->getValue(), DestVT, /*isTarget=*/false, | 2629 setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false, |
3099 /*isOpaque*/true)); | 2630 /*isOpaque*/true)); |
3100 else | 2631 else |
3101 setValue(&I, N); // noop cast. | 2632 setValue(&I, N); // noop cast. |
3102 } | 2633 } |
3103 | 2634 |
3104 void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { | 2635 void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { |
3105 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 2636 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3106 const Value *SV = I.getOperand(0); | 2637 const Value *SV = I.getOperand(0); |
3107 SDValue N = getValue(SV); | 2638 SDValue N = getValue(SV); |
3108 EVT DestVT = TLI.getValueType(I.getType()); | 2639 EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
3109 | 2640 |
3110 unsigned SrcAS = SV->getType()->getPointerAddressSpace(); | 2641 unsigned SrcAS = SV->getType()->getPointerAddressSpace(); |
3111 unsigned DestAS = I.getType()->getPointerAddressSpace(); | 2642 unsigned DestAS = I.getType()->getPointerAddressSpace(); |
3112 | 2643 |
3113 if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) | 2644 if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) |
3118 | 2649 |
3119 void SelectionDAGBuilder::visitInsertElement(const User &I) { | 2650 void SelectionDAGBuilder::visitInsertElement(const User &I) { |
3120 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 2651 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3121 SDValue InVec = getValue(I.getOperand(0)); | 2652 SDValue InVec = getValue(I.getOperand(0)); |
3122 SDValue InVal = getValue(I.getOperand(1)); | 2653 SDValue InVal = getValue(I.getOperand(1)); |
3123 SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), | 2654 SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), |
3124 getCurSDLoc(), TLI.getVectorIdxTy()); | 2655 TLI.getVectorIdxTy(DAG.getDataLayout())); |
3125 setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), | 2656 setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), |
3126 TLI.getValueType(I.getType()), InVec, InVal, InIdx)); | 2657 TLI.getValueType(DAG.getDataLayout(), I.getType()), |
2658 InVec, InVal, InIdx)); | |
3127 } | 2659 } |
3128 | 2660 |
3129 void SelectionDAGBuilder::visitExtractElement(const User &I) { | 2661 void SelectionDAGBuilder::visitExtractElement(const User &I) { |
3130 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 2662 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3131 SDValue InVec = getValue(I.getOperand(0)); | 2663 SDValue InVec = getValue(I.getOperand(0)); |
3132 SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), | 2664 SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), |
3133 getCurSDLoc(), TLI.getVectorIdxTy()); | 2665 TLI.getVectorIdxTy(DAG.getDataLayout())); |
3134 setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), | 2666 setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), |
3135 TLI.getValueType(I.getType()), InVec, InIdx)); | 2667 TLI.getValueType(DAG.getDataLayout(), I.getType()), |
2668 InVec, InIdx)); | |
3136 } | 2669 } |
3137 | 2670 |
3138 // Utility for visitShuffleVector - Return true if every element in Mask, | 2671 // Utility for visitShuffleVector - Return true if every element in Mask, |
3139 // beginning from position Pos and ending in Pos+Size, falls within the | 2672 // beginning from position Pos and ending in Pos+Size, falls within the |
3140 // specified sequential range [L, L+Pos). or is undef. | 2673 // specified sequential range [L, L+Pos). or is undef. |
3153 SmallVector<int, 8> Mask; | 2686 SmallVector<int, 8> Mask; |
3154 ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); | 2687 ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); |
3155 unsigned MaskNumElts = Mask.size(); | 2688 unsigned MaskNumElts = Mask.size(); |
3156 | 2689 |
3157 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 2690 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3158 EVT VT = TLI.getValueType(I.getType()); | 2691 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
3159 EVT SrcVT = Src1.getValueType(); | 2692 EVT SrcVT = Src1.getValueType(); |
3160 unsigned SrcNumElts = SrcVT.getVectorNumElements(); | 2693 unsigned SrcNumElts = SrcVT.getVectorNumElements(); |
3161 | 2694 |
3162 if (SrcNumElts == MaskNumElts) { | 2695 if (SrcNumElts == MaskNumElts) { |
3163 setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, | 2696 setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, |
3271 // Extract appropriate subvector and generate a vector shuffle | 2804 // Extract appropriate subvector and generate a vector shuffle |
3272 for (unsigned Input = 0; Input < 2; ++Input) { | 2805 for (unsigned Input = 0; Input < 2; ++Input) { |
3273 SDValue &Src = Input == 0 ? Src1 : Src2; | 2806 SDValue &Src = Input == 0 ? Src1 : Src2; |
3274 if (RangeUse[Input] == 0) | 2807 if (RangeUse[Input] == 0) |
3275 Src = DAG.getUNDEF(VT); | 2808 Src = DAG.getUNDEF(VT); |
3276 else | 2809 else { |
2810 SDLoc dl = getCurSDLoc(); | |
3277 Src = DAG.getNode( | 2811 Src = DAG.getNode( |
3278 ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, Src, | 2812 ISD::EXTRACT_SUBVECTOR, dl, VT, Src, |
3279 DAG.getConstant(StartIdx[Input], TLI.getVectorIdxTy())); | 2813 DAG.getConstant(StartIdx[Input], dl, |
2814 TLI.getVectorIdxTy(DAG.getDataLayout()))); | |
2815 } | |
3280 } | 2816 } |
3281 | 2817 |
3282 // Calculate new mask. | 2818 // Calculate new mask. |
3283 SmallVector<int, 8> MappedOps; | 2819 SmallVector<int, 8> MappedOps; |
3284 for (unsigned i = 0; i != MaskNumElts; ++i) { | 2820 for (unsigned i = 0; i != MaskNumElts; ++i) { |
3300 | 2836 |
3301 // We can't use either concat vectors or extract subvectors so fall back to | 2837 // We can't use either concat vectors or extract subvectors so fall back to |
3302 // replacing the shuffle with extract and build vector. | 2838 // replacing the shuffle with extract and build vector. |
3303 // to insert and build vector. | 2839 // to insert and build vector. |
3304 EVT EltVT = VT.getVectorElementType(); | 2840 EVT EltVT = VT.getVectorElementType(); |
3305 EVT IdxVT = TLI.getVectorIdxTy(); | 2841 EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); |
2842 SDLoc dl = getCurSDLoc(); | |
3306 SmallVector<SDValue,8> Ops; | 2843 SmallVector<SDValue,8> Ops; |
3307 for (unsigned i = 0; i != MaskNumElts; ++i) { | 2844 for (unsigned i = 0; i != MaskNumElts; ++i) { |
3308 int Idx = Mask[i]; | 2845 int Idx = Mask[i]; |
3309 SDValue Res; | 2846 SDValue Res; |
3310 | 2847 |
3312 Res = DAG.getUNDEF(EltVT); | 2849 Res = DAG.getUNDEF(EltVT); |
3313 } else { | 2850 } else { |
3314 SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; | 2851 SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; |
3315 if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; | 2852 if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; |
3316 | 2853 |
3317 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), | 2854 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, |
3318 EltVT, Src, DAG.getConstant(Idx, IdxVT)); | 2855 EltVT, Src, DAG.getConstant(Idx, dl, IdxVT)); |
3319 } | 2856 } |
3320 | 2857 |
3321 Ops.push_back(Res); | 2858 Ops.push_back(Res); |
3322 } | 2859 } |
3323 | 2860 |
3324 setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops)); | 2861 setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops)); |
3325 } | 2862 } |
3326 | 2863 |
3327 void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { | 2864 void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { |
3328 const Value *Op0 = I.getOperand(0); | 2865 const Value *Op0 = I.getOperand(0); |
3329 const Value *Op1 = I.getOperand(1); | 2866 const Value *Op1 = I.getOperand(1); |
3334 | 2871 |
3335 unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); | 2872 unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); |
3336 | 2873 |
3337 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 2874 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3338 SmallVector<EVT, 4> AggValueVTs; | 2875 SmallVector<EVT, 4> AggValueVTs; |
3339 ComputeValueVTs(TLI, AggTy, AggValueVTs); | 2876 ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs); |
3340 SmallVector<EVT, 4> ValValueVTs; | 2877 SmallVector<EVT, 4> ValValueVTs; |
3341 ComputeValueVTs(TLI, ValTy, ValValueVTs); | 2878 ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs); |
3342 | 2879 |
3343 unsigned NumAggValues = AggValueVTs.size(); | 2880 unsigned NumAggValues = AggValueVTs.size(); |
3344 unsigned NumValValues = ValValueVTs.size(); | 2881 unsigned NumValValues = ValValueVTs.size(); |
3345 SmallVector<SDValue, 4> Values(NumAggValues); | 2882 SmallVector<SDValue, 4> Values(NumAggValues); |
3346 | 2883 |
3380 | 2917 |
3381 unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); | 2918 unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); |
3382 | 2919 |
3383 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 2920 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3384 SmallVector<EVT, 4> ValValueVTs; | 2921 SmallVector<EVT, 4> ValValueVTs; |
3385 ComputeValueVTs(TLI, ValTy, ValValueVTs); | 2922 ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs); |
3386 | 2923 |
3387 unsigned NumValValues = ValValueVTs.size(); | 2924 unsigned NumValValues = ValValueVTs.size(); |
3388 | 2925 |
3389 // Ignore a extractvalue that produces an empty object | 2926 // Ignore a extractvalue that produces an empty object |
3390 if (!NumValValues) { | 2927 if (!NumValValues) { |
3411 // Note that the pointer operand may be a vector of pointers. Take the scalar | 2948 // Note that the pointer operand may be a vector of pointers. Take the scalar |
3412 // element which holds a pointer. | 2949 // element which holds a pointer. |
3413 Type *Ty = Op0->getType()->getScalarType(); | 2950 Type *Ty = Op0->getType()->getScalarType(); |
3414 unsigned AS = Ty->getPointerAddressSpace(); | 2951 unsigned AS = Ty->getPointerAddressSpace(); |
3415 SDValue N = getValue(Op0); | 2952 SDValue N = getValue(Op0); |
3416 | 2953 SDLoc dl = getCurSDLoc(); |
2954 | |
2955 // Normalize Vector GEP - all scalar operands should be converted to the | |
2956 // splat vector. | |
2957 unsigned VectorWidth = I.getType()->isVectorTy() ? | |
2958 cast<VectorType>(I.getType())->getVectorNumElements() : 0; | |
2959 | |
2960 if (VectorWidth && !N.getValueType().isVector()) { | |
2961 MVT VT = MVT::getVectorVT(N.getValueType().getSimpleVT(), VectorWidth); | |
2962 SmallVector<SDValue, 16> Ops(VectorWidth, N); | |
2963 N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); | |
2964 } | |
3417 for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); | 2965 for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); |
3418 OI != E; ++OI) { | 2966 OI != E; ++OI) { |
3419 const Value *Idx = *OI; | 2967 const Value *Idx = *OI; |
3420 if (StructType *StTy = dyn_cast<StructType>(Ty)) { | 2968 if (StructType *StTy = dyn_cast<StructType>(Ty)) { |
3421 unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); | 2969 unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); |
3422 if (Field) { | 2970 if (Field) { |
3423 // N = N + Offset | 2971 // N = N + Offset |
3424 uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); | 2972 uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); |
3425 N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, | 2973 N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, |
3426 DAG.getConstant(Offset, N.getValueType())); | 2974 DAG.getConstant(Offset, dl, N.getValueType())); |
3427 } | 2975 } |
3428 | 2976 |
3429 Ty = StTy->getElementType(Field); | 2977 Ty = StTy->getElementType(Field); |
3430 } else { | 2978 } else { |
3431 Ty = cast<SequentialType>(Ty)->getElementType(); | 2979 Ty = cast<SequentialType>(Ty)->getElementType(); |
3432 | 2980 MVT PtrTy = |
3433 // If this is a constant subscript, handle it quickly. | 2981 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS); |
3434 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 2982 unsigned PtrSize = PtrTy.getSizeInBits(); |
3435 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { | 2983 APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty)); |
3436 if (CI->isZero()) continue; | 2984 |
3437 uint64_t Offs = | 2985 // If this is a scalar constant or a splat vector of constants, |
3438 DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); | 2986 // handle it quickly. |
3439 SDValue OffsVal; | 2987 const auto *CI = dyn_cast<ConstantInt>(Idx); |
3440 EVT PTy = TLI.getPointerTy(AS); | 2988 if (!CI && isa<ConstantDataVector>(Idx) && |
3441 unsigned PtrBits = PTy.getSizeInBits(); | 2989 cast<ConstantDataVector>(Idx)->getSplatValue()) |
3442 if (PtrBits < 64) | 2990 CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue()); |
3443 OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy, | 2991 |
3444 DAG.getConstant(Offs, MVT::i64)); | 2992 if (CI) { |
3445 else | 2993 if (CI->isZero()) |
3446 OffsVal = DAG.getConstant(Offs, PTy); | 2994 continue; |
3447 | 2995 APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); |
3448 N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, | 2996 SDValue OffsVal = VectorWidth ? |
3449 OffsVal); | 2997 DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) : |
2998 DAG.getConstant(Offs, dl, PtrTy); | |
2999 N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal); | |
3450 continue; | 3000 continue; |
3451 } | 3001 } |
3452 | 3002 |
3453 // N = N + Idx * ElementSize; | 3003 // N = N + Idx * ElementSize; |
3454 APInt ElementSize = | |
3455 APInt(TLI.getPointerSizeInBits(AS), DL->getTypeAllocSize(Ty)); | |
3456 SDValue IdxN = getValue(Idx); | 3004 SDValue IdxN = getValue(Idx); |
3457 | 3005 |
3006 if (!IdxN.getValueType().isVector() && VectorWidth) { | |
3007 MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth); | |
3008 SmallVector<SDValue, 16> Ops(VectorWidth, IdxN); | |
3009 IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); | |
3010 } | |
3458 // If the index is smaller or larger than intptr_t, truncate or extend | 3011 // If the index is smaller or larger than intptr_t, truncate or extend |
3459 // it. | 3012 // it. |
3460 IdxN = DAG.getSExtOrTrunc(IdxN, getCurSDLoc(), N.getValueType()); | 3013 IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); |
3461 | 3014 |
3462 // If this is a multiply by a power of two, turn it into a shl | 3015 // If this is a multiply by a power of two, turn it into a shl |
3463 // immediately. This is a very common case. | 3016 // immediately. This is a very common case. |
3464 if (ElementSize != 1) { | 3017 if (ElementSize != 1) { |
3465 if (ElementSize.isPowerOf2()) { | 3018 if (ElementSize.isPowerOf2()) { |
3466 unsigned Amt = ElementSize.logBase2(); | 3019 unsigned Amt = ElementSize.logBase2(); |
3467 IdxN = DAG.getNode(ISD::SHL, getCurSDLoc(), | 3020 IdxN = DAG.getNode(ISD::SHL, dl, |
3468 N.getValueType(), IdxN, | 3021 N.getValueType(), IdxN, |
3469 DAG.getConstant(Amt, IdxN.getValueType())); | 3022 DAG.getConstant(Amt, dl, IdxN.getValueType())); |
3470 } else { | 3023 } else { |
3471 SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType()); | 3024 SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType()); |
3472 IdxN = DAG.getNode(ISD::MUL, getCurSDLoc(), | 3025 IdxN = DAG.getNode(ISD::MUL, dl, |
3473 N.getValueType(), IdxN, Scale); | 3026 N.getValueType(), IdxN, Scale); |
3474 } | 3027 } |
3475 } | 3028 } |
3476 | 3029 |
3477 N = DAG.getNode(ISD::ADD, getCurSDLoc(), | 3030 N = DAG.getNode(ISD::ADD, dl, |
3478 N.getValueType(), N, IdxN); | 3031 N.getValueType(), N, IdxN); |
3479 } | 3032 } |
3480 } | 3033 } |
3481 | 3034 |
3482 setValue(&I, N); | 3035 setValue(&I, N); |
3486 // If this is a fixed sized alloca in the entry block of the function, | 3039 // If this is a fixed sized alloca in the entry block of the function, |
3487 // allocate it statically on the stack. | 3040 // allocate it statically on the stack. |
3488 if (FuncInfo.StaticAllocaMap.count(&I)) | 3041 if (FuncInfo.StaticAllocaMap.count(&I)) |
3489 return; // getValue will auto-populate this. | 3042 return; // getValue will auto-populate this. |
3490 | 3043 |
3044 SDLoc dl = getCurSDLoc(); | |
3491 Type *Ty = I.getAllocatedType(); | 3045 Type *Ty = I.getAllocatedType(); |
3492 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 3046 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3493 uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); | 3047 auto &DL = DAG.getDataLayout(); |
3048 uint64_t TySize = DL.getTypeAllocSize(Ty); | |
3494 unsigned Align = | 3049 unsigned Align = |
3495 std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), | 3050 std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment()); |
3496 I.getAlignment()); | |
3497 | 3051 |
3498 SDValue AllocSize = getValue(I.getArraySize()); | 3052 SDValue AllocSize = getValue(I.getArraySize()); |
3499 | 3053 |
3500 EVT IntPtr = TLI.getPointerTy(); | 3054 EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); |
3501 if (AllocSize.getValueType() != IntPtr) | 3055 if (AllocSize.getValueType() != IntPtr) |
3502 AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr); | 3056 AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); |
3503 | 3057 |
3504 AllocSize = DAG.getNode(ISD::MUL, getCurSDLoc(), IntPtr, | 3058 AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, |
3505 AllocSize, | 3059 AllocSize, |
3506 DAG.getConstant(TySize, IntPtr)); | 3060 DAG.getConstant(TySize, dl, IntPtr)); |
3507 | 3061 |
3508 // Handle alignment. If the requested alignment is less than or equal to | 3062 // Handle alignment. If the requested alignment is less than or equal to |
3509 // the stack alignment, ignore it. If the size is greater than or equal to | 3063 // the stack alignment, ignore it. If the size is greater than or equal to |
3510 // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. | 3064 // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. |
3511 unsigned StackAlign = | 3065 unsigned StackAlign = |
3513 if (Align <= StackAlign) | 3067 if (Align <= StackAlign) |
3514 Align = 0; | 3068 Align = 0; |
3515 | 3069 |
3516 // Round the size of the allocation up to the stack alignment size | 3070 // Round the size of the allocation up to the stack alignment size |
3517 // by add SA-1 to the size. | 3071 // by add SA-1 to the size. |
3518 AllocSize = DAG.getNode(ISD::ADD, getCurSDLoc(), | 3072 AllocSize = DAG.getNode(ISD::ADD, dl, |
3519 AllocSize.getValueType(), AllocSize, | 3073 AllocSize.getValueType(), AllocSize, |
3520 DAG.getIntPtrConstant(StackAlign-1)); | 3074 DAG.getIntPtrConstant(StackAlign - 1, dl)); |
3521 | 3075 |
3522 // Mask out the low bits for alignment purposes. | 3076 // Mask out the low bits for alignment purposes. |
3523 AllocSize = DAG.getNode(ISD::AND, getCurSDLoc(), | 3077 AllocSize = DAG.getNode(ISD::AND, dl, |
3524 AllocSize.getValueType(), AllocSize, | 3078 AllocSize.getValueType(), AllocSize, |
3525 DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); | 3079 DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1), |
3526 | 3080 dl)); |
3527 SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; | 3081 |
3082 SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) }; | |
3528 SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); | 3083 SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); |
3529 SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), VTs, Ops); | 3084 SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops); |
3530 setValue(&I, DSA); | 3085 setValue(&I, DSA); |
3531 DAG.setRoot(DSA.getValue(1)); | 3086 DAG.setRoot(DSA.getValue(1)); |
3532 | 3087 |
3533 assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects()); | 3088 assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects()); |
3534 } | 3089 } |
3542 | 3097 |
3543 Type *Ty = I.getType(); | 3098 Type *Ty = I.getType(); |
3544 | 3099 |
3545 bool isVolatile = I.isVolatile(); | 3100 bool isVolatile = I.isVolatile(); |
3546 bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; | 3101 bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; |
3547 bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; | 3102 |
3103 // The IR notion of invariant_load only guarantees that all *non-faulting* | |
3104 // invariant loads result in the same value. The MI notion of invariant load | |
3105 // guarantees that the load can be legally moved to any location within its | |
3106 // containing function. The MI notion of invariant_load is stronger than the | |
3107 // IR notion of invariant_load -- an MI invariant_load is an IR invariant_load | |
3108 // with a guarantee that the location being loaded from is dereferenceable | |
3109 // throughout the function's lifetime. | |
3110 | |
3111 bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr && | |
3112 isDereferenceablePointer(SV, DAG.getDataLayout()); | |
3548 unsigned Alignment = I.getAlignment(); | 3113 unsigned Alignment = I.getAlignment(); |
3549 | 3114 |
3550 AAMDNodes AAInfo; | 3115 AAMDNodes AAInfo; |
3551 I.getAAMetadata(AAInfo); | 3116 I.getAAMetadata(AAInfo); |
3552 const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); | 3117 const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); |
3553 | 3118 |
3554 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 3119 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3555 SmallVector<EVT, 4> ValueVTs; | 3120 SmallVector<EVT, 4> ValueVTs; |
3556 SmallVector<uint64_t, 4> Offsets; | 3121 SmallVector<uint64_t, 4> Offsets; |
3557 ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); | 3122 ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets); |
3558 unsigned NumValues = ValueVTs.size(); | 3123 unsigned NumValues = ValueVTs.size(); |
3559 if (NumValues == 0) | 3124 if (NumValues == 0) |
3560 return; | 3125 return; |
3561 | 3126 |
3562 SDValue Root; | 3127 SDValue Root; |
3563 bool ConstantMemory = false; | 3128 bool ConstantMemory = false; |
3564 if (isVolatile || NumValues > MaxParallelChains) | 3129 if (isVolatile || NumValues > MaxParallelChains) |
3565 // Serialize volatile loads with other side effects. | 3130 // Serialize volatile loads with other side effects. |
3566 Root = getRoot(); | 3131 Root = getRoot(); |
3567 else if (AA->pointsToConstantMemory( | 3132 else if (AA->pointsToConstantMemory(MemoryLocation( |
3568 AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), AAInfo))) { | 3133 SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) { |
3569 // Do not serialize (non-volatile) loads of constant memory with anything. | 3134 // Do not serialize (non-volatile) loads of constant memory with anything. |
3570 Root = DAG.getEntryNode(); | 3135 Root = DAG.getEntryNode(); |
3571 ConstantMemory = true; | 3136 ConstantMemory = true; |
3572 } else { | 3137 } else { |
3573 // Do not serialize non-volatile loads against each other. | 3138 // Do not serialize non-volatile loads against each other. |
3574 Root = DAG.getRoot(); | 3139 Root = DAG.getRoot(); |
3575 } | 3140 } |
3576 | 3141 |
3142 SDLoc dl = getCurSDLoc(); | |
3143 | |
3577 if (isVolatile) | 3144 if (isVolatile) |
3578 Root = TLI.prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); | 3145 Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); |
3579 | 3146 |
3580 SmallVector<SDValue, 4> Values(NumValues); | 3147 SmallVector<SDValue, 4> Values(NumValues); |
3581 SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), | 3148 SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); |
3582 NumValues)); | |
3583 EVT PtrVT = Ptr.getValueType(); | 3149 EVT PtrVT = Ptr.getValueType(); |
3584 unsigned ChainI = 0; | 3150 unsigned ChainI = 0; |
3585 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { | 3151 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { |
3586 // Serializing loads here may result in excessive register pressure, and | 3152 // Serializing loads here may result in excessive register pressure, and |
3587 // TokenFactor places arbitrary choke points on the scheduler. SD scheduling | 3153 // TokenFactor places arbitrary choke points on the scheduler. SD scheduling |
3589 // they are side-effect free or do not alias. The optimizer should really | 3155 // they are side-effect free or do not alias. The optimizer should really |
3590 // avoid this case by converting large object/array copies to llvm.memcpy | 3156 // avoid this case by converting large object/array copies to llvm.memcpy |
3591 // (MaxParallelChains should always remain as failsafe). | 3157 // (MaxParallelChains should always remain as failsafe). |
3592 if (ChainI == MaxParallelChains) { | 3158 if (ChainI == MaxParallelChains) { |
3593 assert(PendingLoads.empty() && "PendingLoads must be serialized first"); | 3159 assert(PendingLoads.empty() && "PendingLoads must be serialized first"); |
3594 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, | 3160 SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
3595 makeArrayRef(Chains.data(), ChainI)); | 3161 makeArrayRef(Chains.data(), ChainI)); |
3596 Root = Chain; | 3162 Root = Chain; |
3597 ChainI = 0; | 3163 ChainI = 0; |
3598 } | 3164 } |
3599 SDValue A = DAG.getNode(ISD::ADD, getCurSDLoc(), | 3165 SDValue A = DAG.getNode(ISD::ADD, dl, |
3600 PtrVT, Ptr, | 3166 PtrVT, Ptr, |
3601 DAG.getConstant(Offsets[i], PtrVT)); | 3167 DAG.getConstant(Offsets[i], dl, PtrVT)); |
3602 SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root, | 3168 SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, |
3603 A, MachinePointerInfo(SV, Offsets[i]), isVolatile, | 3169 A, MachinePointerInfo(SV, Offsets[i]), isVolatile, |
3604 isNonTemporal, isInvariant, Alignment, AAInfo, | 3170 isNonTemporal, isInvariant, Alignment, AAInfo, |
3605 Ranges); | 3171 Ranges); |
3606 | 3172 |
3607 Values[i] = L; | 3173 Values[i] = L; |
3608 Chains[ChainI] = L.getValue(1); | 3174 Chains[ChainI] = L.getValue(1); |
3609 } | 3175 } |
3610 | 3176 |
3611 if (!ConstantMemory) { | 3177 if (!ConstantMemory) { |
3612 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, | 3178 SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
3613 makeArrayRef(Chains.data(), ChainI)); | 3179 makeArrayRef(Chains.data(), ChainI)); |
3614 if (isVolatile) | 3180 if (isVolatile) |
3615 DAG.setRoot(Chain); | 3181 DAG.setRoot(Chain); |
3616 else | 3182 else |
3617 PendingLoads.push_back(Chain); | 3183 PendingLoads.push_back(Chain); |
3618 } | 3184 } |
3619 | 3185 |
3620 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), | 3186 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl, |
3621 DAG.getVTList(ValueVTs), Values)); | 3187 DAG.getVTList(ValueVTs), Values)); |
3622 } | 3188 } |
3623 | 3189 |
3624 void SelectionDAGBuilder::visitStore(const StoreInst &I) { | 3190 void SelectionDAGBuilder::visitStore(const StoreInst &I) { |
3625 if (I.isAtomic()) | 3191 if (I.isAtomic()) |
3628 const Value *SrcV = I.getOperand(0); | 3194 const Value *SrcV = I.getOperand(0); |
3629 const Value *PtrV = I.getOperand(1); | 3195 const Value *PtrV = I.getOperand(1); |
3630 | 3196 |
3631 SmallVector<EVT, 4> ValueVTs; | 3197 SmallVector<EVT, 4> ValueVTs; |
3632 SmallVector<uint64_t, 4> Offsets; | 3198 SmallVector<uint64_t, 4> Offsets; |
3633 ComputeValueVTs(DAG.getTargetLoweringInfo(), SrcV->getType(), | 3199 ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), |
3634 ValueVTs, &Offsets); | 3200 SrcV->getType(), ValueVTs, &Offsets); |
3635 unsigned NumValues = ValueVTs.size(); | 3201 unsigned NumValues = ValueVTs.size(); |
3636 if (NumValues == 0) | 3202 if (NumValues == 0) |
3637 return; | 3203 return; |
3638 | 3204 |
3639 // Get the lowered operands. Note that we do this after | 3205 // Get the lowered operands. Note that we do this after |
3641 // the operands won't have values in the map. | 3207 // the operands won't have values in the map. |
3642 SDValue Src = getValue(SrcV); | 3208 SDValue Src = getValue(SrcV); |
3643 SDValue Ptr = getValue(PtrV); | 3209 SDValue Ptr = getValue(PtrV); |
3644 | 3210 |
3645 SDValue Root = getRoot(); | 3211 SDValue Root = getRoot(); |
3646 SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), | 3212 SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); |
3647 NumValues)); | |
3648 EVT PtrVT = Ptr.getValueType(); | 3213 EVT PtrVT = Ptr.getValueType(); |
3649 bool isVolatile = I.isVolatile(); | 3214 bool isVolatile = I.isVolatile(); |
3650 bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; | 3215 bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; |
3651 unsigned Alignment = I.getAlignment(); | 3216 unsigned Alignment = I.getAlignment(); |
3217 SDLoc dl = getCurSDLoc(); | |
3652 | 3218 |
3653 AAMDNodes AAInfo; | 3219 AAMDNodes AAInfo; |
3654 I.getAAMetadata(AAInfo); | 3220 I.getAAMetadata(AAInfo); |
3655 | 3221 |
3656 unsigned ChainI = 0; | 3222 unsigned ChainI = 0; |
3657 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { | 3223 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { |
3658 // See visitLoad comments. | 3224 // See visitLoad comments. |
3659 if (ChainI == MaxParallelChains) { | 3225 if (ChainI == MaxParallelChains) { |
3660 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, | 3226 SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
3661 makeArrayRef(Chains.data(), ChainI)); | 3227 makeArrayRef(Chains.data(), ChainI)); |
3662 Root = Chain; | 3228 Root = Chain; |
3663 ChainI = 0; | 3229 ChainI = 0; |
3664 } | 3230 } |
3665 SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, Ptr, | 3231 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, |
3666 DAG.getConstant(Offsets[i], PtrVT)); | 3232 DAG.getConstant(Offsets[i], dl, PtrVT)); |
3667 SDValue St = DAG.getStore(Root, getCurSDLoc(), | 3233 SDValue St = DAG.getStore(Root, dl, |
3668 SDValue(Src.getNode(), Src.getResNo() + i), | 3234 SDValue(Src.getNode(), Src.getResNo() + i), |
3669 Add, MachinePointerInfo(PtrV, Offsets[i]), | 3235 Add, MachinePointerInfo(PtrV, Offsets[i]), |
3670 isVolatile, isNonTemporal, Alignment, AAInfo); | 3236 isVolatile, isNonTemporal, Alignment, AAInfo); |
3671 Chains[ChainI] = St; | 3237 Chains[ChainI] = St; |
3672 } | 3238 } |
3673 | 3239 |
3674 SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, | 3240 SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
3675 makeArrayRef(Chains.data(), ChainI)); | 3241 makeArrayRef(Chains.data(), ChainI)); |
3676 DAG.setRoot(StoreNode); | 3242 DAG.setRoot(StoreNode); |
3677 } | 3243 } |
3678 | 3244 |
3679 void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { | 3245 void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { |
3680 SDLoc sdl = getCurSDLoc(); | 3246 SDLoc sdl = getCurSDLoc(); |
3681 | 3247 |
3682 // llvm.masked.store.*(Src0, Ptr, alignemt, Mask) | 3248 // llvm.masked.store.*(Src0, Ptr, alignment, Mask) |
3683 Value *PtrOperand = I.getArgOperand(1); | 3249 Value *PtrOperand = I.getArgOperand(1); |
3684 SDValue Ptr = getValue(PtrOperand); | 3250 SDValue Ptr = getValue(PtrOperand); |
3685 SDValue Src0 = getValue(I.getArgOperand(0)); | 3251 SDValue Src0 = getValue(I.getArgOperand(0)); |
3686 SDValue Mask = getValue(I.getArgOperand(3)); | 3252 SDValue Mask = getValue(I.getArgOperand(3)); |
3687 EVT VT = Src0.getValueType(); | 3253 EVT VT = Src0.getValueType(); |
3701 MMO, false); | 3267 MMO, false); |
3702 DAG.setRoot(StoreNode); | 3268 DAG.setRoot(StoreNode); |
3703 setValue(&I, StoreNode); | 3269 setValue(&I, StoreNode); |
3704 } | 3270 } |
3705 | 3271 |
3272 // Get a uniform base for the Gather/Scatter intrinsic. | |
3273 // The first argument of the Gather/Scatter intrinsic is a vector of pointers. | |
3274 // We try to represent it as a base pointer + vector of indices. | |
3275 // Usually, the vector of pointers comes from a 'getelementptr' instruction. | |
3276 // The first operand of the GEP may be a single pointer or a vector of pointers | |
3277 // Example: | |
3278 // %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind | |
3279 // or | |
3280 // %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind | |
3281 // %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, .. | |
3282 // | |
3283 // When the first GEP operand is a single pointer - it is the uniform base we | |
3284 // are looking for. If first operand of the GEP is a splat vector - we | |
3285 // extract the spalt value and use it as a uniform base. | |
3286 // In all other cases the function returns 'false'. | |
3287 // | |
3288 static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index, | |
3289 SelectionDAGBuilder* SDB) { | |
3290 | |
3291 SelectionDAG& DAG = SDB->DAG; | |
3292 LLVMContext &Context = *DAG.getContext(); | |
3293 | |
3294 assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); | |
3295 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); | |
3296 if (!GEP || GEP->getNumOperands() > 2) | |
3297 return false; | |
3298 | |
3299 Value *GEPPtr = GEP->getPointerOperand(); | |
3300 if (!GEPPtr->getType()->isVectorTy()) | |
3301 Ptr = GEPPtr; | |
3302 else if (!(Ptr = getSplatValue(GEPPtr))) | |
3303 return false; | |
3304 | |
3305 Value *IndexVal = GEP->getOperand(1); | |
3306 | |
3307 // The operands of the GEP may be defined in another basic block. | |
3308 // In this case we'll not find nodes for the operands. | |
3309 if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal)) | |
3310 return false; | |
3311 | |
3312 Base = SDB->getValue(Ptr); | |
3313 Index = SDB->getValue(IndexVal); | |
3314 | |
3315 // Suppress sign extension. | |
3316 if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) { | |
3317 if (SDB->findValue(Sext->getOperand(0))) { | |
3318 IndexVal = Sext->getOperand(0); | |
3319 Index = SDB->getValue(IndexVal); | |
3320 } | |
3321 } | |
3322 if (!Index.getValueType().isVector()) { | |
3323 unsigned GEPWidth = GEP->getType()->getVectorNumElements(); | |
3324 EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); | |
3325 SmallVector<SDValue, 16> Ops(GEPWidth, Index); | |
3326 Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops); | |
3327 } | |
3328 return true; | |
3329 } | |
3330 | |
3331 void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { | |
3332 SDLoc sdl = getCurSDLoc(); | |
3333 | |
3334 // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask) | |
3335 Value *Ptr = I.getArgOperand(1); | |
3336 SDValue Src0 = getValue(I.getArgOperand(0)); | |
3337 SDValue Mask = getValue(I.getArgOperand(3)); | |
3338 EVT VT = Src0.getValueType(); | |
3339 unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue(); | |
3340 if (!Alignment) | |
3341 Alignment = DAG.getEVTAlignment(VT); | |
3342 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | |
3343 | |
3344 AAMDNodes AAInfo; | |
3345 I.getAAMetadata(AAInfo); | |
3346 | |
3347 SDValue Base; | |
3348 SDValue Index; | |
3349 Value *BasePtr = Ptr; | |
3350 bool UniformBase = getUniformBase(BasePtr, Base, Index, this); | |
3351 | |
3352 Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; | |
3353 MachineMemOperand *MMO = DAG.getMachineFunction(). | |
3354 getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), | |
3355 MachineMemOperand::MOStore, VT.getStoreSize(), | |
3356 Alignment, AAInfo); | |
3357 if (!UniformBase) { | |
3358 Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); | |
3359 Index = getValue(Ptr); | |
3360 } | |
3361 SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index }; | |
3362 SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, | |
3363 Ops, MMO); | |
3364 DAG.setRoot(Scatter); | |
3365 setValue(&I, Scatter); | |
3366 } | |
3367 | |
3706 void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { | 3368 void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { |
3707 SDLoc sdl = getCurSDLoc(); | 3369 SDLoc sdl = getCurSDLoc(); |
3708 | 3370 |
3709 // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) | 3371 // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) |
3710 Value *PtrOperand = I.getArgOperand(0); | 3372 Value *PtrOperand = I.getArgOperand(0); |
3711 SDValue Ptr = getValue(PtrOperand); | 3373 SDValue Ptr = getValue(PtrOperand); |
3712 SDValue Src0 = getValue(I.getArgOperand(3)); | 3374 SDValue Src0 = getValue(I.getArgOperand(3)); |
3713 SDValue Mask = getValue(I.getArgOperand(2)); | 3375 SDValue Mask = getValue(I.getArgOperand(2)); |
3714 | 3376 |
3715 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 3377 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3716 EVT VT = TLI.getValueType(I.getType()); | 3378 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
3717 unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); | 3379 unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); |
3718 if (!Alignment) | 3380 if (!Alignment) |
3719 Alignment = DAG.getEVTAlignment(VT); | 3381 Alignment = DAG.getEVTAlignment(VT); |
3720 | 3382 |
3721 AAMDNodes AAInfo; | 3383 AAMDNodes AAInfo; |
3722 I.getAAMetadata(AAInfo); | 3384 I.getAAMetadata(AAInfo); |
3723 const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); | 3385 const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); |
3724 | 3386 |
3725 SDValue InChain = DAG.getRoot(); | 3387 SDValue InChain = DAG.getRoot(); |
3726 if (AA->pointsToConstantMemory( | 3388 if (AA->pointsToConstantMemory(MemoryLocation( |
3727 AliasAnalysis::Location(PtrOperand, | 3389 PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), |
3728 AA->getTypeStoreSize(I.getType()), | 3390 AAInfo))) { |
3729 AAInfo))) { | |
3730 // Do not serialize (non-volatile) loads of constant memory with anything. | 3391 // Do not serialize (non-volatile) loads of constant memory with anything. |
3731 InChain = DAG.getEntryNode(); | 3392 InChain = DAG.getEntryNode(); |
3732 } | 3393 } |
3733 | 3394 |
3734 MachineMemOperand *MMO = | 3395 MachineMemOperand *MMO = |
3740 SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, | 3401 SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, |
3741 ISD::NON_EXTLOAD); | 3402 ISD::NON_EXTLOAD); |
3742 SDValue OutChain = Load.getValue(1); | 3403 SDValue OutChain = Load.getValue(1); |
3743 DAG.setRoot(OutChain); | 3404 DAG.setRoot(OutChain); |
3744 setValue(&I, Load); | 3405 setValue(&I, Load); |
3406 } | |
3407 | |
3408 void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { | |
3409 SDLoc sdl = getCurSDLoc(); | |
3410 | |
3411 // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0) | |
3412 Value *Ptr = I.getArgOperand(0); | |
3413 SDValue Src0 = getValue(I.getArgOperand(3)); | |
3414 SDValue Mask = getValue(I.getArgOperand(2)); | |
3415 | |
3416 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | |
3417 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); | |
3418 unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); | |
3419 if (!Alignment) | |
3420 Alignment = DAG.getEVTAlignment(VT); | |
3421 | |
3422 AAMDNodes AAInfo; | |
3423 I.getAAMetadata(AAInfo); | |
3424 const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); | |
3425 | |
3426 SDValue Root = DAG.getRoot(); | |
3427 SDValue Base; | |
3428 SDValue Index; | |
3429 Value *BasePtr = Ptr; | |
3430 bool UniformBase = getUniformBase(BasePtr, Base, Index, this); | |
3431 bool ConstantMemory = false; | |
3432 if (UniformBase && | |
3433 AA->pointsToConstantMemory(MemoryLocation( | |
3434 BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()), | |
3435 AAInfo))) { | |
3436 // Do not serialize (non-volatile) loads of constant memory with anything. | |
3437 Root = DAG.getEntryNode(); | |
3438 ConstantMemory = true; | |
3439 } | |
3440 | |
3441 MachineMemOperand *MMO = | |
3442 DAG.getMachineFunction(). | |
3443 getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr), | |
3444 MachineMemOperand::MOLoad, VT.getStoreSize(), | |
3445 Alignment, AAInfo, Ranges); | |
3446 | |
3447 if (!UniformBase) { | |
3448 Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); | |
3449 Index = getValue(Ptr); | |
3450 } | |
3451 SDValue Ops[] = { Root, Src0, Mask, Base, Index }; | |
3452 SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, | |
3453 Ops, MMO); | |
3454 | |
3455 SDValue OutChain = Gather.getValue(1); | |
3456 if (!ConstantMemory) | |
3457 PendingLoads.push_back(OutChain); | |
3458 setValue(&I, Gather); | |
3745 } | 3459 } |
3746 | 3460 |
3747 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { | 3461 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { |
3748 SDLoc dl = getCurSDLoc(); | 3462 SDLoc dl = getCurSDLoc(); |
3749 AtomicOrdering SuccessOrder = I.getSuccessOrdering(); | 3463 AtomicOrdering SuccessOrder = I.getSuccessOrdering(); |
3806 void SelectionDAGBuilder::visitFence(const FenceInst &I) { | 3520 void SelectionDAGBuilder::visitFence(const FenceInst &I) { |
3807 SDLoc dl = getCurSDLoc(); | 3521 SDLoc dl = getCurSDLoc(); |
3808 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 3522 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3809 SDValue Ops[3]; | 3523 SDValue Ops[3]; |
3810 Ops[0] = getRoot(); | 3524 Ops[0] = getRoot(); |
3811 Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy()); | 3525 Ops[1] = DAG.getConstant(I.getOrdering(), dl, |
3812 Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy()); | 3526 TLI.getPointerTy(DAG.getDataLayout())); |
3527 Ops[2] = DAG.getConstant(I.getSynchScope(), dl, | |
3528 TLI.getPointerTy(DAG.getDataLayout())); | |
3813 DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); | 3529 DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); |
3814 } | 3530 } |
3815 | 3531 |
3816 void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { | 3532 void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { |
3817 SDLoc dl = getCurSDLoc(); | 3533 SDLoc dl = getCurSDLoc(); |
3819 SynchronizationScope Scope = I.getSynchScope(); | 3535 SynchronizationScope Scope = I.getSynchScope(); |
3820 | 3536 |
3821 SDValue InChain = getRoot(); | 3537 SDValue InChain = getRoot(); |
3822 | 3538 |
3823 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 3539 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3824 EVT VT = TLI.getValueType(I.getType()); | 3540 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
3825 | 3541 |
3826 if (I.getAlignment() < VT.getSizeInBits() / 8) | 3542 if (I.getAlignment() < VT.getSizeInBits() / 8) |
3827 report_fatal_error("Cannot generate unaligned atomic load"); | 3543 report_fatal_error("Cannot generate unaligned atomic load"); |
3828 | 3544 |
3829 MachineMemOperand *MMO = | 3545 MachineMemOperand *MMO = |
3854 SynchronizationScope Scope = I.getSynchScope(); | 3570 SynchronizationScope Scope = I.getSynchScope(); |
3855 | 3571 |
3856 SDValue InChain = getRoot(); | 3572 SDValue InChain = getRoot(); |
3857 | 3573 |
3858 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 3574 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3859 EVT VT = TLI.getValueType(I.getValueOperand()->getType()); | 3575 EVT VT = |
3576 TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); | |
3860 | 3577 |
3861 if (I.getAlignment() < VT.getSizeInBits() / 8) | 3578 if (I.getAlignment() < VT.getSizeInBits() / 8) |
3862 report_fatal_error("Cannot generate unaligned atomic store"); | 3579 report_fatal_error("Cannot generate unaligned atomic store"); |
3863 | 3580 |
3864 SDValue OutChain = | 3581 SDValue OutChain = |
3896 bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); | 3613 bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); |
3897 | 3614 |
3898 // Add the intrinsic ID as an integer operand if it's not a target intrinsic. | 3615 // Add the intrinsic ID as an integer operand if it's not a target intrinsic. |
3899 if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || | 3616 if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || |
3900 Info.opc == ISD::INTRINSIC_W_CHAIN) | 3617 Info.opc == ISD::INTRINSIC_W_CHAIN) |
3901 Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); | 3618 Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(), |
3619 TLI.getPointerTy(DAG.getDataLayout()))); | |
3902 | 3620 |
3903 // Add all operands of the call to the operand list. | 3621 // Add all operands of the call to the operand list. |
3904 for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { | 3622 for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { |
3905 SDValue Op = getValue(I.getArgOperand(i)); | 3623 SDValue Op = getValue(I.getArgOperand(i)); |
3906 Ops.push_back(Op); | 3624 Ops.push_back(Op); |
3907 } | 3625 } |
3908 | 3626 |
3909 SmallVector<EVT, 4> ValueVTs; | 3627 SmallVector<EVT, 4> ValueVTs; |
3910 ComputeValueVTs(TLI, I.getType(), ValueVTs); | 3628 ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); |
3911 | 3629 |
3912 if (HasChain) | 3630 if (HasChain) |
3913 ValueVTs.push_back(MVT::Other); | 3631 ValueVTs.push_back(MVT::Other); |
3914 | 3632 |
3915 SDVTList VTs = DAG.getVTList(ValueVTs); | 3633 SDVTList VTs = DAG.getVTList(ValueVTs); |
3939 DAG.setRoot(Chain); | 3657 DAG.setRoot(Chain); |
3940 } | 3658 } |
3941 | 3659 |
3942 if (!I.getType()->isVoidTy()) { | 3660 if (!I.getType()->isVoidTy()) { |
3943 if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { | 3661 if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { |
3944 EVT VT = TLI.getValueType(PTy); | 3662 EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy); |
3945 Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); | 3663 Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); |
3946 } | 3664 } |
3947 | 3665 |
3948 setValue(&I, Result); | 3666 setValue(&I, Result); |
3949 } | 3667 } |
3956 /// | 3674 /// |
3957 /// where Op is the hexadecimal representation of floating point value. | 3675 /// where Op is the hexadecimal representation of floating point value. |
3958 static SDValue | 3676 static SDValue |
3959 GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { | 3677 GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { |
3960 SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, | 3678 SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, |
3961 DAG.getConstant(0x007fffff, MVT::i32)); | 3679 DAG.getConstant(0x007fffff, dl, MVT::i32)); |
3962 SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, | 3680 SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, |
3963 DAG.getConstant(0x3f800000, MVT::i32)); | 3681 DAG.getConstant(0x3f800000, dl, MVT::i32)); |
3964 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); | 3682 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); |
3965 } | 3683 } |
3966 | 3684 |
3967 /// GetExponent - Get the exponent: | 3685 /// GetExponent - Get the exponent: |
3968 /// | 3686 /// |
3971 /// where Op is the hexadecimal representation of floating point value. | 3689 /// where Op is the hexadecimal representation of floating point value. |
3972 static SDValue | 3690 static SDValue |
3973 GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, | 3691 GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, |
3974 SDLoc dl) { | 3692 SDLoc dl) { |
3975 SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, | 3693 SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, |
3976 DAG.getConstant(0x7f800000, MVT::i32)); | 3694 DAG.getConstant(0x7f800000, dl, MVT::i32)); |
3977 SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, | 3695 SDValue t1 = DAG.getNode( |
3978 DAG.getConstant(23, TLI.getPointerTy())); | 3696 ISD::SRL, dl, MVT::i32, t0, |
3697 DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout()))); | |
3979 SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, | 3698 SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, |
3980 DAG.getConstant(127, MVT::i32)); | 3699 DAG.getConstant(127, dl, MVT::i32)); |
3981 return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); | 3700 return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); |
3982 } | 3701 } |
3983 | 3702 |
3984 /// getF32Constant - Get 32-bit floating point constant. | 3703 /// getF32Constant - Get 32-bit floating point constant. |
3985 static SDValue | 3704 static SDValue |
3986 getF32Constant(SelectionDAG &DAG, unsigned Flt) { | 3705 getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) { |
3987 return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), | 3706 return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl, |
3988 MVT::f32); | 3707 MVT::f32); |
3708 } | |
3709 | |
3710 static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, | |
3711 SelectionDAG &DAG) { | |
3712 // TODO: What fast-math-flags should be set on the floating-point nodes? | |
3713 | |
3714 // IntegerPartOfX = ((int32_t)(t0); | |
3715 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); | |
3716 | |
3717 // FractionalPartOfX = t0 - (float)IntegerPartOfX; | |
3718 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); | |
3719 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); | |
3720 | |
3721 // IntegerPartOfX <<= 23; | |
3722 IntegerPartOfX = DAG.getNode( | |
3723 ISD::SHL, dl, MVT::i32, IntegerPartOfX, | |
3724 DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy( | |
3725 DAG.getDataLayout()))); | |
3726 | |
3727 SDValue TwoToFractionalPartOfX; | |
3728 if (LimitFloatPrecision <= 6) { | |
3729 // For floating-point precision of 6: | |
3730 // | |
3731 // TwoToFractionalPartOfX = | |
3732 // 0.997535578f + | |
3733 // (0.735607626f + 0.252464424f * x) * x; | |
3734 // | |
3735 // error 0.0144103317, which is 6 bits | |
3736 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | |
3737 getF32Constant(DAG, 0x3e814304, dl)); | |
3738 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, | |
3739 getF32Constant(DAG, 0x3f3c50c8, dl)); | |
3740 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); | |
3741 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, | |
3742 getF32Constant(DAG, 0x3f7f5e7e, dl)); | |
3743 } else if (LimitFloatPrecision <= 12) { | |
3744 // For floating-point precision of 12: | |
3745 // | |
3746 // TwoToFractionalPartOfX = | |
3747 // 0.999892986f + | |
3748 // (0.696457318f + | |
3749 // (0.224338339f + 0.792043434e-1f * x) * x) * x; | |
3750 // | |
3751 // error 0.000107046256, which is 13 to 14 bits | |
3752 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | |
3753 getF32Constant(DAG, 0x3da235e3, dl)); | |
3754 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, | |
3755 getF32Constant(DAG, 0x3e65b8f3, dl)); | |
3756 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); | |
3757 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, | |
3758 getF32Constant(DAG, 0x3f324b07, dl)); | |
3759 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); | |
3760 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, | |
3761 getF32Constant(DAG, 0x3f7ff8fd, dl)); | |
3762 } else { // LimitFloatPrecision <= 18 | |
3763 // For floating-point precision of 18: | |
3764 // | |
3765 // TwoToFractionalPartOfX = | |
3766 // 0.999999982f + | |
3767 // (0.693148872f + | |
3768 // (0.240227044f + | |
3769 // (0.554906021e-1f + | |
3770 // (0.961591928e-2f + | |
3771 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; | |
3772 // error 2.47208000*10^(-7), which is better than 18 bits | |
3773 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | |
3774 getF32Constant(DAG, 0x3924b03e, dl)); | |
3775 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, | |
3776 getF32Constant(DAG, 0x3ab24b87, dl)); | |
3777 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); | |
3778 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, | |
3779 getF32Constant(DAG, 0x3c1d8c17, dl)); | |
3780 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); | |
3781 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, | |
3782 getF32Constant(DAG, 0x3d634a1d, dl)); | |
3783 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); | |
3784 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, | |
3785 getF32Constant(DAG, 0x3e75fe14, dl)); | |
3786 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); | |
3787 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, | |
3788 getF32Constant(DAG, 0x3f317234, dl)); | |
3789 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); | |
3790 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, | |
3791 getF32Constant(DAG, 0x3f800000, dl)); | |
3792 } | |
3793 | |
3794 // Add the exponent into the result in integer domain. | |
3795 SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX); | |
3796 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, | |
3797 DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX)); | |
3989 } | 3798 } |
3990 | 3799 |
3991 /// expandExp - Lower an exp intrinsic. Handles the special sequences for | 3800 /// expandExp - Lower an exp intrinsic. Handles the special sequences for |
3992 /// limited-precision mode. | 3801 /// limited-precision mode. |
3993 static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, | 3802 static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, |
3997 | 3806 |
3998 // Put the exponent in the right bit position for later addition to the | 3807 // Put the exponent in the right bit position for later addition to the |
3999 // final result: | 3808 // final result: |
4000 // | 3809 // |
4001 // #define LOG2OFe 1.4426950f | 3810 // #define LOG2OFe 1.4426950f |
4002 // IntegerPartOfX = ((int32_t)(X * LOG2OFe)); | 3811 // t0 = Op * LOG2OFe |
3812 | |
3813 // TODO: What fast-math-flags should be set here? | |
4003 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, | 3814 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, |
4004 getF32Constant(DAG, 0x3fb8aa3b)); | 3815 getF32Constant(DAG, 0x3fb8aa3b, dl)); |
4005 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); | 3816 return getLimitedPrecisionExp2(t0, dl, DAG); |
4006 | |
4007 // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX; | |
4008 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); | |
4009 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); | |
4010 | |
4011 // IntegerPartOfX <<= 23; | |
4012 IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, | |
4013 DAG.getConstant(23, TLI.getPointerTy())); | |
4014 | |
4015 SDValue TwoToFracPartOfX; | |
4016 if (LimitFloatPrecision <= 6) { | |
4017 // For floating-point precision of 6: | |
4018 // | |
4019 // TwoToFractionalPartOfX = | |
4020 // 0.997535578f + | |
4021 // (0.735607626f + 0.252464424f * x) * x; | |
4022 // | |
4023 // error 0.0144103317, which is 6 bits | |
4024 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | |
4025 getF32Constant(DAG, 0x3e814304)); | |
4026 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, | |
4027 getF32Constant(DAG, 0x3f3c50c8)); | |
4028 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); | |
4029 TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, | |
4030 getF32Constant(DAG, 0x3f7f5e7e)); | |
4031 } else if (LimitFloatPrecision <= 12) { | |
4032 // For floating-point precision of 12: | |
4033 // | |
4034 // TwoToFractionalPartOfX = | |
4035 // 0.999892986f + | |
4036 // (0.696457318f + | |
4037 // (0.224338339f + 0.792043434e-1f * x) * x) * x; | |
4038 // | |
4039 // 0.000107046256 error, which is 13 to 14 bits | |
4040 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | |
4041 getF32Constant(DAG, 0x3da235e3)); | |
4042 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, | |
4043 getF32Constant(DAG, 0x3e65b8f3)); | |
4044 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); | |
4045 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, | |
4046 getF32Constant(DAG, 0x3f324b07)); | |
4047 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); | |
4048 TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, | |
4049 getF32Constant(DAG, 0x3f7ff8fd)); | |
4050 } else { // LimitFloatPrecision <= 18 | |
4051 // For floating-point precision of 18: | |
4052 // | |
4053 // TwoToFractionalPartOfX = | |
4054 // 0.999999982f + | |
4055 // (0.693148872f + | |
4056 // (0.240227044f + | |
4057 // (0.554906021e-1f + | |
4058 // (0.961591928e-2f + | |
4059 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; | |
4060 // | |
4061 // error 2.47208000*10^(-7), which is better than 18 bits | |
4062 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | |
4063 getF32Constant(DAG, 0x3924b03e)); | |
4064 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, | |
4065 getF32Constant(DAG, 0x3ab24b87)); | |
4066 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); | |
4067 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, | |
4068 getF32Constant(DAG, 0x3c1d8c17)); | |
4069 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); | |
4070 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, | |
4071 getF32Constant(DAG, 0x3d634a1d)); | |
4072 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); | |
4073 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, | |
4074 getF32Constant(DAG, 0x3e75fe14)); | |
4075 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); | |
4076 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, | |
4077 getF32Constant(DAG, 0x3f317234)); | |
4078 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); | |
4079 TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, | |
4080 getF32Constant(DAG, 0x3f800000)); | |
4081 } | |
4082 | |
4083 // Add the exponent into the result in integer domain. | |
4084 SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX); | |
4085 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, | |
4086 DAG.getNode(ISD::ADD, dl, MVT::i32, | |
4087 t13, IntegerPartOfX)); | |
4088 } | 3817 } |
4089 | 3818 |
4090 // No special expansion. | 3819 // No special expansion. |
4091 return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); | 3820 return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); |
4092 } | 3821 } |
4093 | 3822 |
4094 /// expandLog - Lower a log intrinsic. Handles the special sequences for | 3823 /// expandLog - Lower a log intrinsic. Handles the special sequences for |
4095 /// limited-precision mode. | 3824 /// limited-precision mode. |
4096 static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, | 3825 static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, |
4097 const TargetLowering &TLI) { | 3826 const TargetLowering &TLI) { |
3827 | |
3828 // TODO: What fast-math-flags should be set on the floating-point nodes? | |
3829 | |
4098 if (Op.getValueType() == MVT::f32 && | 3830 if (Op.getValueType() == MVT::f32 && |
4099 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { | 3831 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { |
4100 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); | 3832 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); |
4101 | 3833 |
4102 // Scale the exponent by log(2) [0.69314718f]. | 3834 // Scale the exponent by log(2) [0.69314718f]. |
4103 SDValue Exp = GetExponent(DAG, Op1, TLI, dl); | 3835 SDValue Exp = GetExponent(DAG, Op1, TLI, dl); |
4104 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, | 3836 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, |
4105 getF32Constant(DAG, 0x3f317218)); | 3837 getF32Constant(DAG, 0x3f317218, dl)); |
4106 | 3838 |
4107 // Get the significand and build it into a floating-point number with | 3839 // Get the significand and build it into a floating-point number with |
4108 // exponent of 1. | 3840 // exponent of 1. |
4109 SDValue X = GetSignificand(DAG, Op1, dl); | 3841 SDValue X = GetSignificand(DAG, Op1, dl); |
4110 | 3842 |
4116 // -1.1609546f + | 3848 // -1.1609546f + |
4117 // (1.4034025f - 0.23903021f * x) * x; | 3849 // (1.4034025f - 0.23903021f * x) * x; |
4118 // | 3850 // |
4119 // error 0.0034276066, which is better than 8 bits | 3851 // error 0.0034276066, which is better than 8 bits |
4120 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | 3852 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
4121 getF32Constant(DAG, 0xbe74c456)); | 3853 getF32Constant(DAG, 0xbe74c456, dl)); |
4122 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, | 3854 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, |
4123 getF32Constant(DAG, 0x3fb3a2b1)); | 3855 getF32Constant(DAG, 0x3fb3a2b1, dl)); |
4124 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); | 3856 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); |
4125 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, | 3857 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, |
4126 getF32Constant(DAG, 0x3f949a29)); | 3858 getF32Constant(DAG, 0x3f949a29, dl)); |
4127 } else if (LimitFloatPrecision <= 12) { | 3859 } else if (LimitFloatPrecision <= 12) { |
4128 // For floating-point precision of 12: | 3860 // For floating-point precision of 12: |
4129 // | 3861 // |
4130 // LogOfMantissa = | 3862 // LogOfMantissa = |
4131 // -1.7417939f + | 3863 // -1.7417939f + |
4133 // (-1.4699568f + | 3865 // (-1.4699568f + |
4134 // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; | 3866 // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; |
4135 // | 3867 // |
4136 // error 0.000061011436, which is 14 bits | 3868 // error 0.000061011436, which is 14 bits |
4137 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | 3869 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
4138 getF32Constant(DAG, 0xbd67b6d6)); | 3870 getF32Constant(DAG, 0xbd67b6d6, dl)); |
4139 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, | 3871 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, |
4140 getF32Constant(DAG, 0x3ee4f4b8)); | 3872 getF32Constant(DAG, 0x3ee4f4b8, dl)); |
4141 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); | 3873 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); |
4142 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, | 3874 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, |
4143 getF32Constant(DAG, 0x3fbc278b)); | 3875 getF32Constant(DAG, 0x3fbc278b, dl)); |
4144 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); | 3876 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); |
4145 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, | 3877 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, |
4146 getF32Constant(DAG, 0x40348e95)); | 3878 getF32Constant(DAG, 0x40348e95, dl)); |
4147 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); | 3879 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); |
4148 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, | 3880 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, |
4149 getF32Constant(DAG, 0x3fdef31a)); | 3881 getF32Constant(DAG, 0x3fdef31a, dl)); |
4150 } else { // LimitFloatPrecision <= 18 | 3882 } else { // LimitFloatPrecision <= 18 |
4151 // For floating-point precision of 18: | 3883 // For floating-point precision of 18: |
4152 // | 3884 // |
4153 // LogOfMantissa = | 3885 // LogOfMantissa = |
4154 // -2.1072184f + | 3886 // -2.1072184f + |
4158 // (-0.87823314f + | 3890 // (-0.87823314f + |
4159 // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; | 3891 // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; |
4160 // | 3892 // |
4161 // error 0.0000023660568, which is better than 18 bits | 3893 // error 0.0000023660568, which is better than 18 bits |
4162 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | 3894 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
4163 getF32Constant(DAG, 0xbc91e5ac)); | 3895 getF32Constant(DAG, 0xbc91e5ac, dl)); |
4164 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, | 3896 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, |
4165 getF32Constant(DAG, 0x3e4350aa)); | 3897 getF32Constant(DAG, 0x3e4350aa, dl)); |
4166 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); | 3898 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); |
4167 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, | 3899 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, |
4168 getF32Constant(DAG, 0x3f60d3e3)); | 3900 getF32Constant(DAG, 0x3f60d3e3, dl)); |
4169 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); | 3901 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); |
4170 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, | 3902 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, |
4171 getF32Constant(DAG, 0x4011cdf0)); | 3903 getF32Constant(DAG, 0x4011cdf0, dl)); |
4172 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); | 3904 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); |
4173 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, | 3905 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, |
4174 getF32Constant(DAG, 0x406cfd1c)); | 3906 getF32Constant(DAG, 0x406cfd1c, dl)); |
4175 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); | 3907 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); |
4176 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, | 3908 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, |
4177 getF32Constant(DAG, 0x408797cb)); | 3909 getF32Constant(DAG, 0x408797cb, dl)); |
4178 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); | 3910 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); |
4179 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, | 3911 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, |
4180 getF32Constant(DAG, 0x4006dcab)); | 3912 getF32Constant(DAG, 0x4006dcab, dl)); |
4181 } | 3913 } |
4182 | 3914 |
4183 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); | 3915 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); |
4184 } | 3916 } |
4185 | 3917 |
4189 | 3921 |
4190 /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for | 3922 /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for |
4191 /// limited-precision mode. | 3923 /// limited-precision mode. |
4192 static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, | 3924 static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, |
4193 const TargetLowering &TLI) { | 3925 const TargetLowering &TLI) { |
3926 | |
3927 // TODO: What fast-math-flags should be set on the floating-point nodes? | |
3928 | |
4194 if (Op.getValueType() == MVT::f32 && | 3929 if (Op.getValueType() == MVT::f32 && |
4195 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { | 3930 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { |
4196 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); | 3931 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); |
4197 | 3932 |
4198 // Get the exponent. | 3933 // Get the exponent. |
4210 // | 3945 // |
4211 // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; | 3946 // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; |
4212 // | 3947 // |
4213 // error 0.0049451742, which is more than 7 bits | 3948 // error 0.0049451742, which is more than 7 bits |
4214 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | 3949 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
4215 getF32Constant(DAG, 0xbeb08fe0)); | 3950 getF32Constant(DAG, 0xbeb08fe0, dl)); |
4216 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, | 3951 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, |
4217 getF32Constant(DAG, 0x40019463)); | 3952 getF32Constant(DAG, 0x40019463, dl)); |
4218 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); | 3953 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); |
4219 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, | 3954 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, |
4220 getF32Constant(DAG, 0x3fd6633d)); | 3955 getF32Constant(DAG, 0x3fd6633d, dl)); |
4221 } else if (LimitFloatPrecision <= 12) { | 3956 } else if (LimitFloatPrecision <= 12) { |
4222 // For floating-point precision of 12: | 3957 // For floating-point precision of 12: |
4223 // | 3958 // |
4224 // Log2ofMantissa = | 3959 // Log2ofMantissa = |
4225 // -2.51285454f + | 3960 // -2.51285454f + |
4227 // (-2.12067489f + | 3962 // (-2.12067489f + |
4228 // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; | 3963 // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; |
4229 // | 3964 // |
4230 // error 0.0000876136000, which is better than 13 bits | 3965 // error 0.0000876136000, which is better than 13 bits |
4231 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | 3966 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
4232 getF32Constant(DAG, 0xbda7262e)); | 3967 getF32Constant(DAG, 0xbda7262e, dl)); |
4233 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, | 3968 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, |
4234 getF32Constant(DAG, 0x3f25280b)); | 3969 getF32Constant(DAG, 0x3f25280b, dl)); |
4235 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); | 3970 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); |
4236 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, | 3971 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, |
4237 getF32Constant(DAG, 0x4007b923)); | 3972 getF32Constant(DAG, 0x4007b923, dl)); |
4238 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); | 3973 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); |
4239 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, | 3974 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, |
4240 getF32Constant(DAG, 0x40823e2f)); | 3975 getF32Constant(DAG, 0x40823e2f, dl)); |
4241 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); | 3976 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); |
4242 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, | 3977 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, |
4243 getF32Constant(DAG, 0x4020d29c)); | 3978 getF32Constant(DAG, 0x4020d29c, dl)); |
4244 } else { // LimitFloatPrecision <= 18 | 3979 } else { // LimitFloatPrecision <= 18 |
4245 // For floating-point precision of 18: | 3980 // For floating-point precision of 18: |
4246 // | 3981 // |
4247 // Log2ofMantissa = | 3982 // Log2ofMantissa = |
4248 // -3.0400495f + | 3983 // -3.0400495f + |
4253 // (0.27515199f - | 3988 // (0.27515199f - |
4254 // 0.25691327e-1f * x) * x) * x) * x) * x) * x; | 3989 // 0.25691327e-1f * x) * x) * x) * x) * x) * x; |
4255 // | 3990 // |
4256 // error 0.0000018516, which is better than 18 bits | 3991 // error 0.0000018516, which is better than 18 bits |
4257 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | 3992 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
4258 getF32Constant(DAG, 0xbcd2769e)); | 3993 getF32Constant(DAG, 0xbcd2769e, dl)); |
4259 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, | 3994 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, |
4260 getF32Constant(DAG, 0x3e8ce0b9)); | 3995 getF32Constant(DAG, 0x3e8ce0b9, dl)); |
4261 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); | 3996 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); |
4262 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, | 3997 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, |
4263 getF32Constant(DAG, 0x3fa22ae7)); | 3998 getF32Constant(DAG, 0x3fa22ae7, dl)); |
4264 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); | 3999 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); |
4265 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, | 4000 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, |
4266 getF32Constant(DAG, 0x40525723)); | 4001 getF32Constant(DAG, 0x40525723, dl)); |
4267 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); | 4002 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); |
4268 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, | 4003 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, |
4269 getF32Constant(DAG, 0x40aaf200)); | 4004 getF32Constant(DAG, 0x40aaf200, dl)); |
4270 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); | 4005 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); |
4271 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, | 4006 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, |
4272 getF32Constant(DAG, 0x40c39dad)); | 4007 getF32Constant(DAG, 0x40c39dad, dl)); |
4273 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); | 4008 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); |
4274 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, | 4009 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, |
4275 getF32Constant(DAG, 0x4042902c)); | 4010 getF32Constant(DAG, 0x4042902c, dl)); |
4276 } | 4011 } |
4277 | 4012 |
4278 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); | 4013 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); |
4279 } | 4014 } |
4280 | 4015 |
4284 | 4019 |
4285 /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for | 4020 /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for |
4286 /// limited-precision mode. | 4021 /// limited-precision mode. |
4287 static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, | 4022 static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, |
4288 const TargetLowering &TLI) { | 4023 const TargetLowering &TLI) { |
4024 | |
4025 // TODO: What fast-math-flags should be set on the floating-point nodes? | |
4026 | |
4289 if (Op.getValueType() == MVT::f32 && | 4027 if (Op.getValueType() == MVT::f32 && |
4290 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { | 4028 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { |
4291 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); | 4029 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); |
4292 | 4030 |
4293 // Scale the exponent by log10(2) [0.30102999f]. | 4031 // Scale the exponent by log10(2) [0.30102999f]. |
4294 SDValue Exp = GetExponent(DAG, Op1, TLI, dl); | 4032 SDValue Exp = GetExponent(DAG, Op1, TLI, dl); |
4295 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, | 4033 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, |
4296 getF32Constant(DAG, 0x3e9a209a)); | 4034 getF32Constant(DAG, 0x3e9a209a, dl)); |
4297 | 4035 |
4298 // Get the significand and build it into a floating-point number with | 4036 // Get the significand and build it into a floating-point number with |
4299 // exponent of 1. | 4037 // exponent of 1. |
4300 SDValue X = GetSignificand(DAG, Op1, dl); | 4038 SDValue X = GetSignificand(DAG, Op1, dl); |
4301 | 4039 |
4307 // -0.50419619f + | 4045 // -0.50419619f + |
4308 // (0.60948995f - 0.10380950f * x) * x; | 4046 // (0.60948995f - 0.10380950f * x) * x; |
4309 // | 4047 // |
4310 // error 0.0014886165, which is 6 bits | 4048 // error 0.0014886165, which is 6 bits |
4311 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | 4049 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
4312 getF32Constant(DAG, 0xbdd49a13)); | 4050 getF32Constant(DAG, 0xbdd49a13, dl)); |
4313 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, | 4051 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, |
4314 getF32Constant(DAG, 0x3f1c0789)); | 4052 getF32Constant(DAG, 0x3f1c0789, dl)); |
4315 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); | 4053 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); |
4316 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, | 4054 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, |
4317 getF32Constant(DAG, 0x3f011300)); | 4055 getF32Constant(DAG, 0x3f011300, dl)); |
4318 } else if (LimitFloatPrecision <= 12) { | 4056 } else if (LimitFloatPrecision <= 12) { |
4319 // For floating-point precision of 12: | 4057 // For floating-point precision of 12: |
4320 // | 4058 // |
4321 // Log10ofMantissa = | 4059 // Log10ofMantissa = |
4322 // -0.64831180f + | 4060 // -0.64831180f + |
4323 // (0.91751397f + | 4061 // (0.91751397f + |
4324 // (-0.31664806f + 0.47637168e-1f * x) * x) * x; | 4062 // (-0.31664806f + 0.47637168e-1f * x) * x) * x; |
4325 // | 4063 // |
4326 // error 0.00019228036, which is better than 12 bits | 4064 // error 0.00019228036, which is better than 12 bits |
4327 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | 4065 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
4328 getF32Constant(DAG, 0x3d431f31)); | 4066 getF32Constant(DAG, 0x3d431f31, dl)); |
4329 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, | 4067 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, |
4330 getF32Constant(DAG, 0x3ea21fb2)); | 4068 getF32Constant(DAG, 0x3ea21fb2, dl)); |
4331 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); | 4069 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); |
4332 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, | 4070 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, |
4333 getF32Constant(DAG, 0x3f6ae232)); | 4071 getF32Constant(DAG, 0x3f6ae232, dl)); |
4334 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); | 4072 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); |
4335 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, | 4073 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, |
4336 getF32Constant(DAG, 0x3f25f7c3)); | 4074 getF32Constant(DAG, 0x3f25f7c3, dl)); |
4337 } else { // LimitFloatPrecision <= 18 | 4075 } else { // LimitFloatPrecision <= 18 |
4338 // For floating-point precision of 18: | 4076 // For floating-point precision of 18: |
4339 // | 4077 // |
4340 // Log10ofMantissa = | 4078 // Log10ofMantissa = |
4341 // -0.84299375f + | 4079 // -0.84299375f + |
4344 // (0.49102474f + | 4082 // (0.49102474f + |
4345 // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; | 4083 // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; |
4346 // | 4084 // |
4347 // error 0.0000037995730, which is better than 18 bits | 4085 // error 0.0000037995730, which is better than 18 bits |
4348 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | 4086 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
4349 getF32Constant(DAG, 0x3c5d51ce)); | 4087 getF32Constant(DAG, 0x3c5d51ce, dl)); |
4350 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, | 4088 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, |
4351 getF32Constant(DAG, 0x3e00685a)); | 4089 getF32Constant(DAG, 0x3e00685a, dl)); |
4352 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); | 4090 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); |
4353 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, | 4091 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, |
4354 getF32Constant(DAG, 0x3efb6798)); | 4092 getF32Constant(DAG, 0x3efb6798, dl)); |
4355 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); | 4093 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); |
4356 SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, | 4094 SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, |
4357 getF32Constant(DAG, 0x3f88d192)); | 4095 getF32Constant(DAG, 0x3f88d192, dl)); |
4358 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); | 4096 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); |
4359 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, | 4097 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, |
4360 getF32Constant(DAG, 0x3fc4316c)); | 4098 getF32Constant(DAG, 0x3fc4316c, dl)); |
4361 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); | 4099 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); |
4362 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, | 4100 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, |
4363 getF32Constant(DAG, 0x3f57ce70)); | 4101 getF32Constant(DAG, 0x3f57ce70, dl)); |
4364 } | 4102 } |
4365 | 4103 |
4366 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); | 4104 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); |
4367 } | 4105 } |
4368 | 4106 |
4373 /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for | 4111 /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for |
4374 /// limited-precision mode. | 4112 /// limited-precision mode. |
4375 static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, | 4113 static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, |
4376 const TargetLowering &TLI) { | 4114 const TargetLowering &TLI) { |
4377 if (Op.getValueType() == MVT::f32 && | 4115 if (Op.getValueType() == MVT::f32 && |
4378 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { | 4116 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) |
4379 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); | 4117 return getLimitedPrecisionExp2(Op, dl, DAG); |
4380 | |
4381 // FractionalPartOfX = x - (float)IntegerPartOfX; | |
4382 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); | |
4383 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1); | |
4384 | |
4385 // IntegerPartOfX <<= 23; | |
4386 IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, | |
4387 DAG.getConstant(23, TLI.getPointerTy())); | |
4388 | |
4389 SDValue TwoToFractionalPartOfX; | |
4390 if (LimitFloatPrecision <= 6) { | |
4391 // For floating-point precision of 6: | |
4392 // | |
4393 // TwoToFractionalPartOfX = | |
4394 // 0.997535578f + | |
4395 // (0.735607626f + 0.252464424f * x) * x; | |
4396 // | |
4397 // error 0.0144103317, which is 6 bits | |
4398 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | |
4399 getF32Constant(DAG, 0x3e814304)); | |
4400 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, | |
4401 getF32Constant(DAG, 0x3f3c50c8)); | |
4402 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); | |
4403 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, | |
4404 getF32Constant(DAG, 0x3f7f5e7e)); | |
4405 } else if (LimitFloatPrecision <= 12) { | |
4406 // For floating-point precision of 12: | |
4407 // | |
4408 // TwoToFractionalPartOfX = | |
4409 // 0.999892986f + | |
4410 // (0.696457318f + | |
4411 // (0.224338339f + 0.792043434e-1f * x) * x) * x; | |
4412 // | |
4413 // error 0.000107046256, which is 13 to 14 bits | |
4414 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | |
4415 getF32Constant(DAG, 0x3da235e3)); | |
4416 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, | |
4417 getF32Constant(DAG, 0x3e65b8f3)); | |
4418 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); | |
4419 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, | |
4420 getF32Constant(DAG, 0x3f324b07)); | |
4421 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); | |
4422 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, | |
4423 getF32Constant(DAG, 0x3f7ff8fd)); | |
4424 } else { // LimitFloatPrecision <= 18 | |
4425 // For floating-point precision of 18: | |
4426 // | |
4427 // TwoToFractionalPartOfX = | |
4428 // 0.999999982f + | |
4429 // (0.693148872f + | |
4430 // (0.240227044f + | |
4431 // (0.554906021e-1f + | |
4432 // (0.961591928e-2f + | |
4433 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; | |
4434 // error 2.47208000*10^(-7), which is better than 18 bits | |
4435 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | |
4436 getF32Constant(DAG, 0x3924b03e)); | |
4437 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, | |
4438 getF32Constant(DAG, 0x3ab24b87)); | |
4439 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); | |
4440 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, | |
4441 getF32Constant(DAG, 0x3c1d8c17)); | |
4442 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); | |
4443 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, | |
4444 getF32Constant(DAG, 0x3d634a1d)); | |
4445 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); | |
4446 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, | |
4447 getF32Constant(DAG, 0x3e75fe14)); | |
4448 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); | |
4449 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, | |
4450 getF32Constant(DAG, 0x3f317234)); | |
4451 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); | |
4452 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, | |
4453 getF32Constant(DAG, 0x3f800000)); | |
4454 } | |
4455 | |
4456 // Add the exponent into the result in integer domain. | |
4457 SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, | |
4458 TwoToFractionalPartOfX); | |
4459 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, | |
4460 DAG.getNode(ISD::ADD, dl, MVT::i32, | |
4461 t13, IntegerPartOfX)); | |
4462 } | |
4463 | 4118 |
4464 // No special expansion. | 4119 // No special expansion. |
4465 return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); | 4120 return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); |
4466 } | 4121 } |
4467 | 4122 |
4476 APFloat Ten(10.0f); | 4131 APFloat Ten(10.0f); |
4477 IsExp10 = LHSC->isExactlyValue(Ten); | 4132 IsExp10 = LHSC->isExactlyValue(Ten); |
4478 } | 4133 } |
4479 } | 4134 } |
4480 | 4135 |
4136 // TODO: What fast-math-flags should be set on the FMUL node? | |
4481 if (IsExp10) { | 4137 if (IsExp10) { |
4482 // Put the exponent in the right bit position for later addition to the | 4138 // Put the exponent in the right bit position for later addition to the |
4483 // final result: | 4139 // final result: |
4484 // | 4140 // |
4485 // #define LOG2OF10 3.3219281f | 4141 // #define LOG2OF10 3.3219281f |
4486 // IntegerPartOfX = (int32_t)(x * LOG2OF10); | 4142 // t0 = Op * LOG2OF10; |
4487 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, | 4143 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, |
4488 getF32Constant(DAG, 0x40549a78)); | 4144 getF32Constant(DAG, 0x40549a78, dl)); |
4489 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); | 4145 return getLimitedPrecisionExp2(t0, dl, DAG); |
4490 | |
4491 // FractionalPartOfX = x - (float)IntegerPartOfX; | |
4492 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); | |
4493 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); | |
4494 | |
4495 // IntegerPartOfX <<= 23; | |
4496 IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, | |
4497 DAG.getConstant(23, TLI.getPointerTy())); | |
4498 | |
4499 SDValue TwoToFractionalPartOfX; | |
4500 if (LimitFloatPrecision <= 6) { | |
4501 // For floating-point precision of 6: | |
4502 // | |
4503 // twoToFractionalPartOfX = | |
4504 // 0.997535578f + | |
4505 // (0.735607626f + 0.252464424f * x) * x; | |
4506 // | |
4507 // error 0.0144103317, which is 6 bits | |
4508 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | |
4509 getF32Constant(DAG, 0x3e814304)); | |
4510 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, | |
4511 getF32Constant(DAG, 0x3f3c50c8)); | |
4512 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); | |
4513 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, | |
4514 getF32Constant(DAG, 0x3f7f5e7e)); | |
4515 } else if (LimitFloatPrecision <= 12) { | |
4516 // For floating-point precision of 12: | |
4517 // | |
4518 // TwoToFractionalPartOfX = | |
4519 // 0.999892986f + | |
4520 // (0.696457318f + | |
4521 // (0.224338339f + 0.792043434e-1f * x) * x) * x; | |
4522 // | |
4523 // error 0.000107046256, which is 13 to 14 bits | |
4524 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | |
4525 getF32Constant(DAG, 0x3da235e3)); | |
4526 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, | |
4527 getF32Constant(DAG, 0x3e65b8f3)); | |
4528 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); | |
4529 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, | |
4530 getF32Constant(DAG, 0x3f324b07)); | |
4531 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); | |
4532 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, | |
4533 getF32Constant(DAG, 0x3f7ff8fd)); | |
4534 } else { // LimitFloatPrecision <= 18 | |
4535 // For floating-point precision of 18: | |
4536 // | |
4537 // TwoToFractionalPartOfX = | |
4538 // 0.999999982f + | |
4539 // (0.693148872f + | |
4540 // (0.240227044f + | |
4541 // (0.554906021e-1f + | |
4542 // (0.961591928e-2f + | |
4543 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; | |
4544 // error 2.47208000*10^(-7), which is better than 18 bits | |
4545 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, | |
4546 getF32Constant(DAG, 0x3924b03e)); | |
4547 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, | |
4548 getF32Constant(DAG, 0x3ab24b87)); | |
4549 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); | |
4550 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, | |
4551 getF32Constant(DAG, 0x3c1d8c17)); | |
4552 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); | |
4553 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, | |
4554 getF32Constant(DAG, 0x3d634a1d)); | |
4555 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); | |
4556 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, | |
4557 getF32Constant(DAG, 0x3e75fe14)); | |
4558 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); | |
4559 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, | |
4560 getF32Constant(DAG, 0x3f317234)); | |
4561 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); | |
4562 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, | |
4563 getF32Constant(DAG, 0x3f800000)); | |
4564 } | |
4565 | |
4566 SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX); | |
4567 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, | |
4568 DAG.getNode(ISD::ADD, dl, MVT::i32, | |
4569 t13, IntegerPartOfX)); | |
4570 } | 4146 } |
4571 | 4147 |
4572 // No special expansion. | 4148 // No special expansion. |
4573 return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); | 4149 return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); |
4574 } | 4150 } |
4586 unsigned Val = RHSC->getSExtValue(); | 4162 unsigned Val = RHSC->getSExtValue(); |
4587 if ((int)Val < 0) Val = -Val; | 4163 if ((int)Val < 0) Val = -Val; |
4588 | 4164 |
4589 // powi(x, 0) -> 1.0 | 4165 // powi(x, 0) -> 1.0 |
4590 if (Val == 0) | 4166 if (Val == 0) |
4591 return DAG.getConstantFP(1.0, LHS.getValueType()); | 4167 return DAG.getConstantFP(1.0, DL, LHS.getValueType()); |
4592 | 4168 |
4593 const Function *F = DAG.getMachineFunction().getFunction(); | 4169 const Function *F = DAG.getMachineFunction().getFunction(); |
4594 if (!F->hasFnAttribute(Attribute::OptimizeForSize) || | 4170 if (!F->optForSize() || |
4595 // If optimizing for size, don't insert too many multiplies. This | 4171 // If optimizing for size, don't insert too many multiplies. |
4596 // inserts up to 5 multiplies. | 4172 // This inserts up to 5 multiplies. |
4597 countPopulation(Val) + Log2_32(Val) < 7) { | 4173 countPopulation(Val) + Log2_32(Val) < 7) { |
4598 // We use the simple binary decomposition method to generate the multiply | 4174 // We use the simple binary decomposition method to generate the multiply |
4599 // sequence. There are more optimal ways to do this (for example, | 4175 // sequence. There are more optimal ways to do this (for example, |
4600 // powi(x,15) generates one more multiply than it should), but this has | 4176 // powi(x,15) generates one more multiply than it should), but this has |
4601 // the benefit of being both really simple and much better than a libcall. | 4177 // the benefit of being both really simple and much better than a libcall. |
4602 SDValue Res; // Logically starts equal to 1.0 | 4178 SDValue Res; // Logically starts equal to 1.0 |
4603 SDValue CurSquare = LHS; | 4179 SDValue CurSquare = LHS; |
4180 // TODO: Intrinsics should have fast-math-flags that propagate to these | |
4181 // nodes. | |
4604 while (Val) { | 4182 while (Val) { |
4605 if (Val & 1) { | 4183 if (Val & 1) { |
4606 if (Res.getNode()) | 4184 if (Res.getNode()) |
4607 Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare); | 4185 Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare); |
4608 else | 4186 else |
4615 } | 4193 } |
4616 | 4194 |
4617 // If the original was negative, invert the result, producing 1/(x*x*x). | 4195 // If the original was negative, invert the result, producing 1/(x*x*x). |
4618 if (RHSC->getSExtValue() < 0) | 4196 if (RHSC->getSExtValue() < 0) |
4619 Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), | 4197 Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), |
4620 DAG.getConstantFP(1.0, LHS.getValueType()), Res); | 4198 DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res); |
4621 return Res; | 4199 return Res; |
4622 } | 4200 } |
4623 } | 4201 } |
4624 | 4202 |
4625 // Otherwise, expand to a libcall. | 4203 // Otherwise, expand to a libcall. |
4626 return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); | 4204 return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); |
4627 } | 4205 } |
4628 | 4206 |
4629 // getTruncatedArgReg - Find underlying register used for an truncated | 4207 // getUnderlyingArgReg - Find underlying register used for a truncated or |
4630 // argument. | 4208 // bitcasted argument. |
4631 static unsigned getTruncatedArgReg(const SDValue &N) { | 4209 static unsigned getUnderlyingArgReg(const SDValue &N) { |
4632 if (N.getOpcode() != ISD::TRUNCATE) | 4210 switch (N.getOpcode()) { |
4211 case ISD::CopyFromReg: | |
4212 return cast<RegisterSDNode>(N.getOperand(1))->getReg(); | |
4213 case ISD::BITCAST: | |
4214 case ISD::AssertZext: | |
4215 case ISD::AssertSext: | |
4216 case ISD::TRUNCATE: | |
4217 return getUnderlyingArgReg(N.getOperand(0)); | |
4218 default: | |
4633 return 0; | 4219 return 0; |
4634 | 4220 } |
4635 const SDValue &Ext = N.getOperand(0); | |
4636 if (Ext.getOpcode() == ISD::AssertZext || | |
4637 Ext.getOpcode() == ISD::AssertSext) { | |
4638 const SDValue &CFR = Ext.getOperand(0); | |
4639 if (CFR.getOpcode() == ISD::CopyFromReg) | |
4640 return cast<RegisterSDNode>(CFR.getOperand(1))->getReg(); | |
4641 if (CFR.getOpcode() == ISD::TRUNCATE) | |
4642 return getTruncatedArgReg(CFR); | |
4643 } | |
4644 return 0; | |
4645 } | 4221 } |
4646 | 4222 |
4647 /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function | 4223 /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function |
4648 /// argument, create the corresponding DBG_VALUE machine instruction for it now. | 4224 /// argument, create the corresponding DBG_VALUE machine instruction for it now. |
4649 /// At the end of instruction selection, they will be inserted to the entry BB. | 4225 /// At the end of instruction selection, they will be inserted to the entry BB. |
4650 bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, | 4226 bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( |
4651 MDNode *Variable, | 4227 const Value *V, DILocalVariable *Variable, DIExpression *Expr, |
4652 MDNode *Expr, int64_t Offset, | 4228 DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { |
4653 bool IsIndirect, | |
4654 const SDValue &N) { | |
4655 const Argument *Arg = dyn_cast<Argument>(V); | 4229 const Argument *Arg = dyn_cast<Argument>(V); |
4656 if (!Arg) | 4230 if (!Arg) |
4657 return false; | 4231 return false; |
4658 | 4232 |
4659 MachineFunction &MF = DAG.getMachineFunction(); | 4233 MachineFunction &MF = DAG.getMachineFunction(); |
4660 const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); | 4234 const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); |
4661 | 4235 |
4662 // Ignore inlined function arguments here. | 4236 // Ignore inlined function arguments here. |
4663 DIVariable DV(Variable); | 4237 // |
4664 if (DV.isInlinedFnArgument(MF.getFunction())) | 4238 // FIXME: Should we be checking DL->inlinedAt() to determine this? |
4239 if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction())) | |
4665 return false; | 4240 return false; |
4666 | 4241 |
4667 Optional<MachineOperand> Op; | 4242 Optional<MachineOperand> Op; |
4668 // Some arguments' frame index is recorded during argument lowering. | 4243 // Some arguments' frame index is recorded during argument lowering. |
4669 if (int FI = FuncInfo.getArgumentFrameIndex(Arg)) | 4244 if (int FI = FuncInfo.getArgumentFrameIndex(Arg)) |
4670 Op = MachineOperand::CreateFI(FI); | 4245 Op = MachineOperand::CreateFI(FI); |
4671 | 4246 |
4672 if (!Op && N.getNode()) { | 4247 if (!Op && N.getNode()) { |
4673 unsigned Reg; | 4248 unsigned Reg = getUnderlyingArgReg(N); |
4674 if (N.getOpcode() == ISD::CopyFromReg) | |
4675 Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); | |
4676 else | |
4677 Reg = getTruncatedArgReg(N); | |
4678 if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { | 4249 if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { |
4679 MachineRegisterInfo &RegInfo = MF.getRegInfo(); | 4250 MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
4680 unsigned PR = RegInfo.getLiveInPhysReg(Reg); | 4251 unsigned PR = RegInfo.getLiveInPhysReg(Reg); |
4681 if (PR) | 4252 if (PR) |
4682 Reg = PR; | 4253 Reg = PR; |
4700 Op = MachineOperand::CreateFI(FINode->getIndex()); | 4271 Op = MachineOperand::CreateFI(FINode->getIndex()); |
4701 | 4272 |
4702 if (!Op) | 4273 if (!Op) |
4703 return false; | 4274 return false; |
4704 | 4275 |
4276 assert(Variable->isValidLocationForIntrinsic(DL) && | |
4277 "Expected inlined-at fields to agree"); | |
4705 if (Op->isReg()) | 4278 if (Op->isReg()) |
4706 FuncInfo.ArgDbgValues.push_back( | 4279 FuncInfo.ArgDbgValues.push_back( |
4707 BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE), | 4280 BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, |
4708 IsIndirect, Op->getReg(), Offset, Variable, Expr)); | 4281 Op->getReg(), Offset, Variable, Expr)); |
4709 else | 4282 else |
4710 FuncInfo.ArgDbgValues.push_back( | 4283 FuncInfo.ArgDbgValues.push_back( |
4711 BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) | 4284 BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE)) |
4712 .addOperand(*Op) | 4285 .addOperand(*Op) |
4713 .addImm(Offset) | 4286 .addImm(Offset) |
4714 .addMetadata(Variable) | 4287 .addMetadata(Variable) |
4715 .addMetadata(Expr)); | 4288 .addMetadata(Expr)); |
4716 | 4289 |
4742 return nullptr; | 4315 return nullptr; |
4743 case Intrinsic::vastart: visitVAStart(I); return nullptr; | 4316 case Intrinsic::vastart: visitVAStart(I); return nullptr; |
4744 case Intrinsic::vaend: visitVAEnd(I); return nullptr; | 4317 case Intrinsic::vaend: visitVAEnd(I); return nullptr; |
4745 case Intrinsic::vacopy: visitVACopy(I); return nullptr; | 4318 case Intrinsic::vacopy: visitVACopy(I); return nullptr; |
4746 case Intrinsic::returnaddress: | 4319 case Intrinsic::returnaddress: |
4747 setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getPointerTy(), | 4320 setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, |
4321 TLI.getPointerTy(DAG.getDataLayout()), | |
4748 getValue(I.getArgOperand(0)))); | 4322 getValue(I.getArgOperand(0)))); |
4749 return nullptr; | 4323 return nullptr; |
4750 case Intrinsic::frameaddress: | 4324 case Intrinsic::frameaddress: |
4751 setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), | 4325 setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, |
4326 TLI.getPointerTy(DAG.getDataLayout()), | |
4752 getValue(I.getArgOperand(0)))); | 4327 getValue(I.getArgOperand(0)))); |
4753 return nullptr; | 4328 return nullptr; |
4754 case Intrinsic::read_register: { | 4329 case Intrinsic::read_register: { |
4755 Value *Reg = I.getArgOperand(0); | 4330 Value *Reg = I.getArgOperand(0); |
4331 SDValue Chain = getRoot(); | |
4756 SDValue RegName = | 4332 SDValue RegName = |
4757 DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); | 4333 DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); |
4758 EVT VT = TLI.getValueType(I.getType()); | 4334 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
4759 setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName)); | 4335 Res = DAG.getNode(ISD::READ_REGISTER, sdl, |
4336 DAG.getVTList(VT, MVT::Other), Chain, RegName); | |
4337 setValue(&I, Res); | |
4338 DAG.setRoot(Res.getValue(1)); | |
4760 return nullptr; | 4339 return nullptr; |
4761 } | 4340 } |
4762 case Intrinsic::write_register: { | 4341 case Intrinsic::write_register: { |
4763 Value *Reg = I.getArgOperand(0); | 4342 Value *Reg = I.getArgOperand(0); |
4764 Value *RegValue = I.getArgOperand(1); | 4343 Value *RegValue = I.getArgOperand(1); |
4765 SDValue Chain = getValue(RegValue).getOperand(0); | 4344 SDValue Chain = getRoot(); |
4766 SDValue RegName = | 4345 SDValue RegName = |
4767 DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); | 4346 DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); |
4768 DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, | 4347 DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, |
4769 RegName, getValue(RegValue))); | 4348 RegName, getValue(RegValue))); |
4770 return nullptr; | 4349 return nullptr; |
4787 SDValue Op3 = getValue(I.getArgOperand(2)); | 4366 SDValue Op3 = getValue(I.getArgOperand(2)); |
4788 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); | 4367 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); |
4789 if (!Align) | 4368 if (!Align) |
4790 Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. | 4369 Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. |
4791 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); | 4370 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); |
4792 DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, | 4371 bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); |
4793 MachinePointerInfo(I.getArgOperand(0)), | 4372 SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, |
4794 MachinePointerInfo(I.getArgOperand(1)))); | 4373 false, isTC, |
4374 MachinePointerInfo(I.getArgOperand(0)), | |
4375 MachinePointerInfo(I.getArgOperand(1))); | |
4376 updateDAGForMaybeTailCall(MC); | |
4795 return nullptr; | 4377 return nullptr; |
4796 } | 4378 } |
4797 case Intrinsic::memset: { | 4379 case Intrinsic::memset: { |
4798 // FIXME: this definition of "user defined address space" is x86-specific | 4380 // FIXME: this definition of "user defined address space" is x86-specific |
4799 // Assert for address < 256 since we support only user defined address | 4381 // Assert for address < 256 since we support only user defined address |
4806 SDValue Op3 = getValue(I.getArgOperand(2)); | 4388 SDValue Op3 = getValue(I.getArgOperand(2)); |
4807 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); | 4389 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); |
4808 if (!Align) | 4390 if (!Align) |
4809 Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. | 4391 Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. |
4810 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); | 4392 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); |
4811 DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, | 4393 bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); |
4812 MachinePointerInfo(I.getArgOperand(0)))); | 4394 SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, |
4395 isTC, MachinePointerInfo(I.getArgOperand(0))); | |
4396 updateDAGForMaybeTailCall(MS); | |
4813 return nullptr; | 4397 return nullptr; |
4814 } | 4398 } |
4815 case Intrinsic::memmove: { | 4399 case Intrinsic::memmove: { |
4816 // FIXME: this definition of "user defined address space" is x86-specific | 4400 // FIXME: this definition of "user defined address space" is x86-specific |
4817 // Assert for address < 256 since we support only user defined address | 4401 // Assert for address < 256 since we support only user defined address |
4826 SDValue Op3 = getValue(I.getArgOperand(2)); | 4410 SDValue Op3 = getValue(I.getArgOperand(2)); |
4827 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); | 4411 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); |
4828 if (!Align) | 4412 if (!Align) |
4829 Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. | 4413 Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. |
4830 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); | 4414 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); |
4831 DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, | 4415 bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); |
4832 MachinePointerInfo(I.getArgOperand(0)), | 4416 SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, |
4833 MachinePointerInfo(I.getArgOperand(1)))); | 4417 isTC, MachinePointerInfo(I.getArgOperand(0)), |
4418 MachinePointerInfo(I.getArgOperand(1))); | |
4419 updateDAGForMaybeTailCall(MM); | |
4834 return nullptr; | 4420 return nullptr; |
4835 } | 4421 } |
4836 case Intrinsic::dbg_declare: { | 4422 case Intrinsic::dbg_declare: { |
4837 const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); | 4423 const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); |
4838 MDNode *Variable = DI.getVariable(); | 4424 DILocalVariable *Variable = DI.getVariable(); |
4839 MDNode *Expression = DI.getExpression(); | 4425 DIExpression *Expression = DI.getExpression(); |
4840 const Value *Address = DI.getAddress(); | 4426 const Value *Address = DI.getAddress(); |
4841 DIVariable DIVar(Variable); | 4427 assert(Variable && "Missing variable"); |
4842 assert((!DIVar || DIVar.isVariable()) && | 4428 if (!Address) { |
4843 "Variable in DbgDeclareInst should be either null or a DIVariable."); | |
4844 if (!Address || !DIVar) { | |
4845 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); | 4429 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); |
4846 return nullptr; | 4430 return nullptr; |
4847 } | 4431 } |
4848 | 4432 |
4849 // Check if address has undef value. | 4433 // Check if address has undef value. |
4860 SDDbgValue *SDV; | 4444 SDDbgValue *SDV; |
4861 if (N.getNode()) { | 4445 if (N.getNode()) { |
4862 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) | 4446 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) |
4863 Address = BCI->getOperand(0); | 4447 Address = BCI->getOperand(0); |
4864 // Parameters are handled specially. | 4448 // Parameters are handled specially. |
4865 bool isParameter = | 4449 bool isParameter = Variable->isParameter() || isa<Argument>(Address); |
4866 (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable || | |
4867 isa<Argument>(Address)); | |
4868 | 4450 |
4869 const AllocaInst *AI = dyn_cast<AllocaInst>(Address); | 4451 const AllocaInst *AI = dyn_cast<AllocaInst>(Address); |
4870 | 4452 |
4871 if (isParameter && !AI) { | 4453 if (isParameter && !AI) { |
4872 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); | 4454 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); |
4875 SDV = DAG.getFrameIndexDbgValue( | 4457 SDV = DAG.getFrameIndexDbgValue( |
4876 Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder); | 4458 Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder); |
4877 else { | 4459 else { |
4878 // Address is an argument, so try to emit its dbg value using | 4460 // Address is an argument, so try to emit its dbg value using |
4879 // virtual register info from the FuncInfo.ValueMap. | 4461 // virtual register info from the FuncInfo.ValueMap. |
4880 EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, N); | 4462 EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, |
4463 N); | |
4881 return nullptr; | 4464 return nullptr; |
4882 } | 4465 } |
4883 } else if (AI) | 4466 } else { |
4884 SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), | 4467 SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), |
4885 true, 0, dl, SDNodeOrder); | 4468 true, 0, dl, SDNodeOrder); |
4886 else { | |
4887 // Can't do anything with other non-AI cases yet. | |
4888 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); | |
4889 DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t"); | |
4890 DEBUG(Address->dump()); | |
4891 return nullptr; | |
4892 } | 4469 } |
4893 DAG.AddDbgValue(SDV, N.getNode(), isParameter); | 4470 DAG.AddDbgValue(SDV, N.getNode(), isParameter); |
4894 } else { | 4471 } else { |
4895 // If Address is an argument then try to emit its dbg value using | 4472 // If Address is an argument then try to emit its dbg value using |
4896 // virtual register info from the FuncInfo.ValueMap. | 4473 // virtual register info from the FuncInfo.ValueMap. |
4897 if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, | 4474 if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, |
4898 N)) { | 4475 N)) { |
4899 // If variable is pinned by a alloca in dominating bb then | 4476 // If variable is pinned by a alloca in dominating bb then |
4900 // use StaticAllocaMap. | 4477 // use StaticAllocaMap. |
4901 if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { | 4478 if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { |
4902 if (AI->getParent() != DI.getParent()) { | 4479 if (AI->getParent() != DI.getParent()) { |
4915 } | 4492 } |
4916 return nullptr; | 4493 return nullptr; |
4917 } | 4494 } |
4918 case Intrinsic::dbg_value: { | 4495 case Intrinsic::dbg_value: { |
4919 const DbgValueInst &DI = cast<DbgValueInst>(I); | 4496 const DbgValueInst &DI = cast<DbgValueInst>(I); |
4920 DIVariable DIVar(DI.getVariable()); | 4497 assert(DI.getVariable() && "Missing variable"); |
4921 assert((!DIVar || DIVar.isVariable()) && | 4498 |
4922 "Variable in DbgValueInst should be either null or a DIVariable."); | 4499 DILocalVariable *Variable = DI.getVariable(); |
4923 if (!DIVar) | 4500 DIExpression *Expression = DI.getExpression(); |
4924 return nullptr; | |
4925 | |
4926 MDNode *Variable = DI.getVariable(); | |
4927 MDNode *Expression = DI.getExpression(); | |
4928 uint64_t Offset = DI.getOffset(); | 4501 uint64_t Offset = DI.getOffset(); |
4929 const Value *V = DI.getValue(); | 4502 const Value *V = DI.getValue(); |
4930 if (!V) | 4503 if (!V) |
4931 return nullptr; | 4504 return nullptr; |
4932 | 4505 |
4943 // Check unused arguments map. | 4516 // Check unused arguments map. |
4944 N = UnusedArgNodeMap[V]; | 4517 N = UnusedArgNodeMap[V]; |
4945 if (N.getNode()) { | 4518 if (N.getNode()) { |
4946 // A dbg.value for an alloca is always indirect. | 4519 // A dbg.value for an alloca is always indirect. |
4947 bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; | 4520 bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; |
4948 if (!EmitFuncArgumentDbgValue(V, Variable, Expression, Offset, | 4521 if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, |
4949 IsIndirect, N)) { | 4522 IsIndirect, N)) { |
4950 SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), | 4523 SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), |
4951 IsIndirect, Offset, dl, SDNodeOrder); | 4524 IsIndirect, Offset, dl, SDNodeOrder); |
4952 DAG.AddDbgValue(SDV, N.getNode(), false); | 4525 DAG.AddDbgValue(SDV, N.getNode(), false); |
4953 } | 4526 } |
4982 | 4555 |
4983 case Intrinsic::eh_typeid_for: { | 4556 case Intrinsic::eh_typeid_for: { |
4984 // Find the type id for the given typeinfo. | 4557 // Find the type id for the given typeinfo. |
4985 GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); | 4558 GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); |
4986 unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); | 4559 unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); |
4987 Res = DAG.getConstant(TypeID, MVT::i32); | 4560 Res = DAG.getConstant(TypeID, sdl, MVT::i32); |
4988 setValue(&I, Res); | 4561 setValue(&I, Res); |
4989 return nullptr; | 4562 return nullptr; |
4990 } | 4563 } |
4991 | 4564 |
4992 case Intrinsic::eh_return_i32: | 4565 case Intrinsic::eh_return_i32: |
5001 case Intrinsic::eh_unwind_init: | 4574 case Intrinsic::eh_unwind_init: |
5002 DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); | 4575 DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); |
5003 return nullptr; | 4576 return nullptr; |
5004 case Intrinsic::eh_dwarf_cfa: { | 4577 case Intrinsic::eh_dwarf_cfa: { |
5005 SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, | 4578 SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, |
5006 TLI.getPointerTy()); | 4579 TLI.getPointerTy(DAG.getDataLayout())); |
5007 SDValue Offset = DAG.getNode(ISD::ADD, sdl, | 4580 SDValue Offset = DAG.getNode(ISD::ADD, sdl, |
5008 CfaArg.getValueType(), | 4581 CfaArg.getValueType(), |
5009 DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, | 4582 DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, |
5010 CfaArg.getValueType()), | 4583 CfaArg.getValueType()), |
5011 CfaArg); | 4584 CfaArg); |
5012 SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), | 4585 SDValue FA = DAG.getNode( |
5013 DAG.getConstant(0, TLI.getPointerTy())); | 4586 ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), |
4587 DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()))); | |
5014 setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), | 4588 setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), |
5015 FA, Offset)); | 4589 FA, Offset)); |
5016 return nullptr; | 4590 return nullptr; |
5017 } | 4591 } |
5018 case Intrinsic::eh_sjlj_callsite: { | 4592 case Intrinsic::eh_sjlj_callsite: { |
5046 case Intrinsic::eh_sjlj_longjmp: { | 4620 case Intrinsic::eh_sjlj_longjmp: { |
5047 DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, | 4621 DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, |
5048 getRoot(), getValue(I.getArgOperand(0)))); | 4622 getRoot(), getValue(I.getArgOperand(0)))); |
5049 return nullptr; | 4623 return nullptr; |
5050 } | 4624 } |
5051 | 4625 case Intrinsic::eh_sjlj_setup_dispatch: { |
4626 DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, | |
4627 getRoot())); | |
4628 return nullptr; | |
4629 } | |
4630 | |
4631 case Intrinsic::masked_gather: | |
4632 visitMaskedGather(I); | |
4633 return nullptr; | |
5052 case Intrinsic::masked_load: | 4634 case Intrinsic::masked_load: |
5053 visitMaskedLoad(I); | 4635 visitMaskedLoad(I); |
4636 return nullptr; | |
4637 case Intrinsic::masked_scatter: | |
4638 visitMaskedScatter(I); | |
5054 return nullptr; | 4639 return nullptr; |
5055 case Intrinsic::masked_store: | 4640 case Intrinsic::masked_store: |
5056 visitMaskedStore(I); | 4641 visitMaskedStore(I); |
5057 return nullptr; | 4642 return nullptr; |
5058 case Intrinsic::x86_mmx_pslli_w: | 4643 case Intrinsic::x86_mmx_pslli_w: |
5102 // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits | 4687 // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits |
5103 // to be zero. | 4688 // to be zero. |
5104 // We must do this early because v2i32 is not a legal type. | 4689 // We must do this early because v2i32 is not a legal type. |
5105 SDValue ShOps[2]; | 4690 SDValue ShOps[2]; |
5106 ShOps[0] = ShAmt; | 4691 ShOps[0] = ShAmt; |
5107 ShOps[1] = DAG.getConstant(0, MVT::i32); | 4692 ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); |
5108 ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); | 4693 ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); |
5109 EVT DestVT = TLI.getValueType(I.getType()); | 4694 EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
5110 ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); | 4695 ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); |
5111 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, | 4696 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, |
5112 DAG.getConstant(NewIntrinsic, MVT::i32), | 4697 DAG.getConstant(NewIntrinsic, sdl, MVT::i32), |
5113 getValue(I.getArgOperand(0)), ShAmt); | 4698 getValue(I.getArgOperand(0)), ShAmt); |
5114 setValue(&I, Res); | |
5115 return nullptr; | |
5116 } | |
5117 case Intrinsic::x86_avx_vinsertf128_pd_256: | |
5118 case Intrinsic::x86_avx_vinsertf128_ps_256: | |
5119 case Intrinsic::x86_avx_vinsertf128_si_256: | |
5120 case Intrinsic::x86_avx2_vinserti128: { | |
5121 EVT DestVT = TLI.getValueType(I.getType()); | |
5122 EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); | |
5123 uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) * | |
5124 ElVT.getVectorNumElements(); | |
5125 Res = | |
5126 DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT, | |
5127 getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), | |
5128 DAG.getConstant(Idx, TLI.getVectorIdxTy())); | |
5129 setValue(&I, Res); | |
5130 return nullptr; | |
5131 } | |
5132 case Intrinsic::x86_avx_vextractf128_pd_256: | |
5133 case Intrinsic::x86_avx_vextractf128_ps_256: | |
5134 case Intrinsic::x86_avx_vextractf128_si_256: | |
5135 case Intrinsic::x86_avx2_vextracti128: { | |
5136 EVT DestVT = TLI.getValueType(I.getType()); | |
5137 uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * | |
5138 DestVT.getVectorNumElements(); | |
5139 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT, | |
5140 getValue(I.getArgOperand(0)), | |
5141 DAG.getConstant(Idx, TLI.getVectorIdxTy())); | |
5142 setValue(&I, Res); | 4699 setValue(&I, Res); |
5143 return nullptr; | 4700 return nullptr; |
5144 } | 4701 } |
5145 case Intrinsic::convertff: | 4702 case Intrinsic::convertff: |
5146 case Intrinsic::convertfsi: | 4703 case Intrinsic::convertfsi: |
5162 case Intrinsic::convertss: Code = ISD::CVT_SS; break; | 4719 case Intrinsic::convertss: Code = ISD::CVT_SS; break; |
5163 case Intrinsic::convertsu: Code = ISD::CVT_SU; break; | 4720 case Intrinsic::convertsu: Code = ISD::CVT_SU; break; |
5164 case Intrinsic::convertus: Code = ISD::CVT_US; break; | 4721 case Intrinsic::convertus: Code = ISD::CVT_US; break; |
5165 case Intrinsic::convertuu: Code = ISD::CVT_UU; break; | 4722 case Intrinsic::convertuu: Code = ISD::CVT_UU; break; |
5166 } | 4723 } |
5167 EVT DestVT = TLI.getValueType(I.getType()); | 4724 EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
5168 const Value *Op1 = I.getArgOperand(0); | 4725 const Value *Op1 = I.getArgOperand(0); |
5169 Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), | 4726 Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), |
5170 DAG.getValueType(DestVT), | 4727 DAG.getValueType(DestVT), |
5171 DAG.getValueType(getValue(Op1).getValueType()), | 4728 DAG.getValueType(getValue(Op1).getValueType()), |
5172 getValue(I.getArgOperand(1)), | 4729 getValue(I.getArgOperand(1)), |
5252 getValue(I.getArgOperand(0)), | 4809 getValue(I.getArgOperand(0)), |
5253 getValue(I.getArgOperand(1)), | 4810 getValue(I.getArgOperand(1)), |
5254 getValue(I.getArgOperand(2)))); | 4811 getValue(I.getArgOperand(2)))); |
5255 return nullptr; | 4812 return nullptr; |
5256 case Intrinsic::fmuladd: { | 4813 case Intrinsic::fmuladd: { |
5257 EVT VT = TLI.getValueType(I.getType()); | 4814 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
5258 if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && | 4815 if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && |
5259 TLI.isFMAFasterThanFMulAndFAdd(VT)) { | 4816 TLI.isFMAFasterThanFMulAndFAdd(VT)) { |
5260 setValue(&I, DAG.getNode(ISD::FMA, sdl, | 4817 setValue(&I, DAG.getNode(ISD::FMA, sdl, |
5261 getValue(I.getArgOperand(0)).getValueType(), | 4818 getValue(I.getArgOperand(0)).getValueType(), |
5262 getValue(I.getArgOperand(0)), | 4819 getValue(I.getArgOperand(0)), |
5263 getValue(I.getArgOperand(1)), | 4820 getValue(I.getArgOperand(1)), |
5264 getValue(I.getArgOperand(2)))); | 4821 getValue(I.getArgOperand(2)))); |
5265 } else { | 4822 } else { |
4823 // TODO: Intrinsic calls should have fast-math-flags. | |
5266 SDValue Mul = DAG.getNode(ISD::FMUL, sdl, | 4824 SDValue Mul = DAG.getNode(ISD::FMUL, sdl, |
5267 getValue(I.getArgOperand(0)).getValueType(), | 4825 getValue(I.getArgOperand(0)).getValueType(), |
5268 getValue(I.getArgOperand(0)), | 4826 getValue(I.getArgOperand(0)), |
5269 getValue(I.getArgOperand(1))); | 4827 getValue(I.getArgOperand(1))); |
5270 SDValue Add = DAG.getNode(ISD::FADD, sdl, | 4828 SDValue Add = DAG.getNode(ISD::FADD, sdl, |
5277 } | 4835 } |
5278 case Intrinsic::convert_to_fp16: | 4836 case Intrinsic::convert_to_fp16: |
5279 setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, | 4837 setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, |
5280 DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, | 4838 DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, |
5281 getValue(I.getArgOperand(0)), | 4839 getValue(I.getArgOperand(0)), |
5282 DAG.getTargetConstant(0, MVT::i32)))); | 4840 DAG.getTargetConstant(0, sdl, |
4841 MVT::i32)))); | |
5283 return nullptr; | 4842 return nullptr; |
5284 case Intrinsic::convert_from_fp16: | 4843 case Intrinsic::convert_from_fp16: |
5285 setValue(&I, | 4844 setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl, |
5286 DAG.getNode(ISD::FP_EXTEND, sdl, TLI.getValueType(I.getType()), | 4845 TLI.getValueType(DAG.getDataLayout(), I.getType()), |
5287 DAG.getNode(ISD::BITCAST, sdl, MVT::f16, | 4846 DAG.getNode(ISD::BITCAST, sdl, MVT::f16, |
5288 getValue(I.getArgOperand(0))))); | 4847 getValue(I.getArgOperand(0))))); |
5289 return nullptr; | 4848 return nullptr; |
5290 case Intrinsic::pcmarker: { | 4849 case Intrinsic::pcmarker: { |
5291 SDValue Tmp = getValue(I.getArgOperand(0)); | 4850 SDValue Tmp = getValue(I.getArgOperand(0)); |
5292 DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); | 4851 DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); |
5293 return nullptr; | 4852 return nullptr; |
5302 } | 4861 } |
5303 case Intrinsic::bswap: | 4862 case Intrinsic::bswap: |
5304 setValue(&I, DAG.getNode(ISD::BSWAP, sdl, | 4863 setValue(&I, DAG.getNode(ISD::BSWAP, sdl, |
5305 getValue(I.getArgOperand(0)).getValueType(), | 4864 getValue(I.getArgOperand(0)).getValueType(), |
5306 getValue(I.getArgOperand(0)))); | 4865 getValue(I.getArgOperand(0)))); |
4866 return nullptr; | |
4867 case Intrinsic::uabsdiff: | |
4868 setValue(&I, DAG.getNode(ISD::UABSDIFF, sdl, | |
4869 getValue(I.getArgOperand(0)).getValueType(), | |
4870 getValue(I.getArgOperand(0)), | |
4871 getValue(I.getArgOperand(1)))); | |
4872 return nullptr; | |
4873 case Intrinsic::sabsdiff: | |
4874 setValue(&I, DAG.getNode(ISD::SABSDIFF, sdl, | |
4875 getValue(I.getArgOperand(0)).getValueType(), | |
4876 getValue(I.getArgOperand(0)), | |
4877 getValue(I.getArgOperand(1)))); | |
5307 return nullptr; | 4878 return nullptr; |
5308 case Intrinsic::cttz: { | 4879 case Intrinsic::cttz: { |
5309 SDValue Arg = getValue(I.getArgOperand(0)); | 4880 SDValue Arg = getValue(I.getArgOperand(0)); |
5310 ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); | 4881 ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); |
5311 EVT Ty = Arg.getValueType(); | 4882 EVT Ty = Arg.getValueType(); |
5327 setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); | 4898 setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); |
5328 return nullptr; | 4899 return nullptr; |
5329 } | 4900 } |
5330 case Intrinsic::stacksave: { | 4901 case Intrinsic::stacksave: { |
5331 SDValue Op = getRoot(); | 4902 SDValue Op = getRoot(); |
5332 Res = DAG.getNode(ISD::STACKSAVE, sdl, | 4903 Res = DAG.getNode( |
5333 DAG.getVTList(TLI.getPointerTy(), MVT::Other), Op); | 4904 ISD::STACKSAVE, sdl, |
4905 DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op); | |
5334 setValue(&I, Res); | 4906 setValue(&I, Res); |
5335 DAG.setRoot(Res.getValue(1)); | 4907 DAG.setRoot(Res.getValue(1)); |
5336 return nullptr; | 4908 return nullptr; |
5337 } | 4909 } |
5338 case Intrinsic::stackrestore: { | 4910 case Intrinsic::stackrestore: { |
5342 } | 4914 } |
5343 case Intrinsic::stackprotector: { | 4915 case Intrinsic::stackprotector: { |
5344 // Emit code into the DAG to store the stack guard onto the stack. | 4916 // Emit code into the DAG to store the stack guard onto the stack. |
5345 MachineFunction &MF = DAG.getMachineFunction(); | 4917 MachineFunction &MF = DAG.getMachineFunction(); |
5346 MachineFrameInfo *MFI = MF.getFrameInfo(); | 4918 MachineFrameInfo *MFI = MF.getFrameInfo(); |
5347 EVT PtrTy = TLI.getPointerTy(); | 4919 EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); |
5348 SDValue Src, Chain = getRoot(); | 4920 SDValue Src, Chain = getRoot(); |
5349 const Value *Ptr = cast<LoadInst>(I.getArgOperand(0))->getPointerOperand(); | 4921 const Value *Ptr = cast<LoadInst>(I.getArgOperand(0))->getPointerOperand(); |
5350 const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr); | 4922 const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr); |
5351 | 4923 |
5352 // See if Ptr is a bitcast. If it is, look through it and see if we can get | 4924 // See if Ptr is a bitcast. If it is, look through it and see if we can get |
5388 MFI->setStackProtectorIndex(FI); | 4960 MFI->setStackProtectorIndex(FI); |
5389 | 4961 |
5390 SDValue FIN = DAG.getFrameIndex(FI, PtrTy); | 4962 SDValue FIN = DAG.getFrameIndex(FI, PtrTy); |
5391 | 4963 |
5392 // Store the stack protector onto the stack. | 4964 // Store the stack protector onto the stack. |
5393 Res = DAG.getStore(Chain, sdl, Src, FIN, | 4965 Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack( |
5394 MachinePointerInfo::getFixedStack(FI), | 4966 DAG.getMachineFunction(), FI), |
5395 true, false, 0); | 4967 true, false, 0); |
5396 setValue(&I, Res); | 4968 setValue(&I, Res); |
5397 DAG.setRoot(Res); | 4969 DAG.setRoot(Res); |
5398 return nullptr; | 4970 return nullptr; |
5399 } | 4971 } |
5405 | 4977 |
5406 SDValue Arg = getValue(I.getCalledValue()); | 4978 SDValue Arg = getValue(I.getCalledValue()); |
5407 EVT Ty = Arg.getValueType(); | 4979 EVT Ty = Arg.getValueType(); |
5408 | 4980 |
5409 if (CI->isZero()) | 4981 if (CI->isZero()) |
5410 Res = DAG.getConstant(-1ULL, Ty); | 4982 Res = DAG.getConstant(-1ULL, sdl, Ty); |
5411 else | 4983 else |
5412 Res = DAG.getConstant(0, Ty); | 4984 Res = DAG.getConstant(0, sdl, Ty); |
5413 | 4985 |
5414 setValue(&I, Res); | 4986 setValue(&I, Res); |
5415 return nullptr; | 4987 return nullptr; |
5416 } | 4988 } |
5417 case Intrinsic::annotation: | 4989 case Intrinsic::annotation: |
5440 DAG.setRoot(Res); | 5012 DAG.setRoot(Res); |
5441 return nullptr; | 5013 return nullptr; |
5442 } | 5014 } |
5443 case Intrinsic::adjust_trampoline: { | 5015 case Intrinsic::adjust_trampoline: { |
5444 setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, | 5016 setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, |
5445 TLI.getPointerTy(), | 5017 TLI.getPointerTy(DAG.getDataLayout()), |
5446 getValue(I.getArgOperand(0)))); | 5018 getValue(I.getArgOperand(0)))); |
5447 return nullptr; | 5019 return nullptr; |
5448 } | 5020 } |
5449 case Intrinsic::gcroot: | 5021 case Intrinsic::gcroot: |
5450 if (GFI) { | 5022 if (GFI) { |
5468 return nullptr; | 5040 return nullptr; |
5469 } | 5041 } |
5470 | 5042 |
5471 case Intrinsic::debugtrap: | 5043 case Intrinsic::debugtrap: |
5472 case Intrinsic::trap: { | 5044 case Intrinsic::trap: { |
5473 StringRef TrapFuncName = TM.Options.getTrapFunctionName(); | 5045 StringRef TrapFuncName = |
5046 I.getAttributes() | |
5047 .getAttribute(AttributeSet::FunctionIndex, "trap-func-name") | |
5048 .getValueAsString(); | |
5474 if (TrapFuncName.empty()) { | 5049 if (TrapFuncName.empty()) { |
5475 ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? | 5050 ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? |
5476 ISD::TRAP : ISD::DEBUGTRAP; | 5051 ISD::TRAP : ISD::DEBUGTRAP; |
5477 DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); | 5052 DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); |
5478 return nullptr; | 5053 return nullptr; |
5479 } | 5054 } |
5480 TargetLowering::ArgListTy Args; | 5055 TargetLowering::ArgListTy Args; |
5481 | 5056 |
5482 TargetLowering::CallLoweringInfo CLI(DAG); | 5057 TargetLowering::CallLoweringInfo CLI(DAG); |
5483 CLI.setDebugLoc(sdl).setChain(getRoot()) | 5058 CLI.setDebugLoc(sdl).setChain(getRoot()).setCallee( |
5484 .setCallee(CallingConv::C, I.getType(), | 5059 CallingConv::C, I.getType(), |
5485 DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), | 5060 DAG.getExternalSymbol(TrapFuncName.data(), |
5486 std::move(Args), 0); | 5061 TLI.getPointerTy(DAG.getDataLayout())), |
5062 std::move(Args), 0); | |
5487 | 5063 |
5488 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); | 5064 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); |
5489 DAG.setRoot(Result.second); | 5065 DAG.setRoot(Result.second); |
5490 return nullptr; | 5066 return nullptr; |
5491 } | 5067 } |
5537 // Stack coloring is not enabled in O0, discard region information. | 5113 // Stack coloring is not enabled in O0, discard region information. |
5538 if (TM.getOptLevel() == CodeGenOpt::None) | 5114 if (TM.getOptLevel() == CodeGenOpt::None) |
5539 return nullptr; | 5115 return nullptr; |
5540 | 5116 |
5541 SmallVector<Value *, 4> Allocas; | 5117 SmallVector<Value *, 4> Allocas; |
5542 GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL); | 5118 GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL); |
5543 | 5119 |
5544 for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), | 5120 for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), |
5545 E = Allocas.end(); Object != E; ++Object) { | 5121 E = Allocas.end(); Object != E; ++Object) { |
5546 AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); | 5122 AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); |
5547 | 5123 |
5557 | 5133 |
5558 int FI = SI->second; | 5134 int FI = SI->second; |
5559 | 5135 |
5560 SDValue Ops[2]; | 5136 SDValue Ops[2]; |
5561 Ops[0] = getRoot(); | 5137 Ops[0] = getRoot(); |
5562 Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); | 5138 Ops[1] = |
5139 DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true); | |
5563 unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); | 5140 unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); |
5564 | 5141 |
5565 Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); | 5142 Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); |
5566 DAG.setRoot(Res); | 5143 DAG.setRoot(Res); |
5567 } | 5144 } |
5568 return nullptr; | 5145 return nullptr; |
5569 } | 5146 } |
5570 case Intrinsic::invariant_start: | 5147 case Intrinsic::invariant_start: |
5571 // Discard region information. | 5148 // Discard region information. |
5572 setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); | 5149 setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); |
5573 return nullptr; | 5150 return nullptr; |
5574 case Intrinsic::invariant_end: | 5151 case Intrinsic::invariant_end: |
5575 // Discard region information. | 5152 // Discard region information. |
5576 return nullptr; | 5153 return nullptr; |
5577 case Intrinsic::stackprotectorcheck: { | 5154 case Intrinsic::stackprotectorcheck: { |
5616 return nullptr; | 5193 return nullptr; |
5617 } | 5194 } |
5618 case Intrinsic::instrprof_increment: | 5195 case Intrinsic::instrprof_increment: |
5619 llvm_unreachable("instrprof failed to lower an increment"); | 5196 llvm_unreachable("instrprof failed to lower an increment"); |
5620 | 5197 |
5621 case Intrinsic::frameallocate: { | 5198 case Intrinsic::localescape: { |
5622 MachineFunction &MF = DAG.getMachineFunction(); | 5199 MachineFunction &MF = DAG.getMachineFunction(); |
5623 const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); | 5200 const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); |
5624 | 5201 |
5625 // Do the allocation and map it as a normal value. | 5202 // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission |
5626 // FIXME: Maybe we should add this to the alloca map so that we don't have | 5203 // is the same on all targets. |
5627 // to register allocate it? | 5204 for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) { |
5628 uint64_t Size = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(); | 5205 Value *Arg = I.getArgOperand(Idx)->stripPointerCasts(); |
5629 int Alloc = MF.getFrameInfo()->CreateFrameAllocation(Size); | 5206 if (isa<ConstantPointerNull>(Arg)) |
5630 MVT PtrVT = TLI.getPointerTy(0); | 5207 continue; // Skip null pointers. They represent a hole in index space. |
5631 SDValue FIVal = DAG.getFrameIndex(Alloc, PtrVT); | 5208 AllocaInst *Slot = cast<AllocaInst>(Arg); |
5632 setValue(&I, FIVal); | 5209 assert(FuncInfo.StaticAllocaMap.count(Slot) && |
5633 | 5210 "can only escape static allocas"); |
5634 // Directly emit a FRAME_ALLOC machine instr. Label assignment emission is | 5211 int FI = FuncInfo.StaticAllocaMap[Slot]; |
5635 // the same on all targets. | 5212 MCSymbol *FrameAllocSym = |
5213 MF.getMMI().getContext().getOrCreateFrameAllocSymbol( | |
5214 GlobalValue::getRealLinkageName(MF.getName()), Idx); | |
5215 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, | |
5216 TII->get(TargetOpcode::LOCAL_ESCAPE)) | |
5217 .addSym(FrameAllocSym) | |
5218 .addFrameIndex(FI); | |
5219 } | |
5220 | |
5221 return nullptr; | |
5222 } | |
5223 | |
5224 case Intrinsic::localrecover: { | |
5225 // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx) | |
5226 MachineFunction &MF = DAG.getMachineFunction(); | |
5227 MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0); | |
5228 | |
5229 // Get the symbol that defines the frame offset. | |
5230 auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); | |
5231 auto *Idx = cast<ConstantInt>(I.getArgOperand(2)); | |
5232 unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX)); | |
5636 MCSymbol *FrameAllocSym = | 5233 MCSymbol *FrameAllocSym = |
5637 MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName()); | 5234 MF.getMMI().getContext().getOrCreateFrameAllocSymbol( |
5638 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, | 5235 GlobalValue::getRealLinkageName(Fn->getName()), IdxVal); |
5639 TII->get(TargetOpcode::FRAME_ALLOC)) | 5236 |
5640 .addSym(FrameAllocSym) | 5237 // Create a MCSymbol for the label to avoid any target lowering |
5641 .addFrameIndex(Alloc); | |
5642 | |
5643 return nullptr; | |
5644 } | |
5645 | |
5646 case Intrinsic::framerecover: { | |
5647 // i8* @llvm.framerecover(i8* %fn, i8* %fp) | |
5648 MachineFunction &MF = DAG.getMachineFunction(); | |
5649 MVT PtrVT = TLI.getPointerTy(0); | |
5650 | |
5651 // Get the symbol that defines the frame offset. | |
5652 Function *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); | |
5653 MCSymbol *FrameAllocSym = | |
5654 MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName()); | |
5655 | |
5656 // Create a TargetExternalSymbol for the label to avoid any target lowering | |
5657 // that would make this PC relative. | 5238 // that would make this PC relative. |
5658 StringRef Name = FrameAllocSym->getName(); | 5239 SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT); |
5659 assert(Name.size() == strlen(Name.data()) && "not null terminated"); | |
5660 SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT); | |
5661 SDValue OffsetVal = | 5240 SDValue OffsetVal = |
5662 DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym); | 5241 DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym); |
5663 | 5242 |
5664 // Add the offset to the FP. | 5243 // Add the offset to the FP. |
5665 Value *FP = I.getArgOperand(1); | 5244 Value *FP = I.getArgOperand(1); |
5666 SDValue FPVal = getValue(FP); | 5245 SDValue FPVal = getValue(FP); |
5667 SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal); | 5246 SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal); |
5668 setValue(&I, Add); | 5247 setValue(&I, Add); |
5669 | 5248 |
5670 return nullptr; | 5249 return nullptr; |
5671 } | 5250 } |
5672 case Intrinsic::eh_begincatch: | 5251 |
5673 case Intrinsic::eh_endcatch: | 5252 case Intrinsic::eh_exceptionpointer: |
5674 llvm_unreachable("begin/end catch intrinsics not lowered in codegen"); | 5253 case Intrinsic::eh_exceptioncode: { |
5254 // Get the exception pointer vreg, copy from it, and resize it to fit. | |
5255 const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0)); | |
5256 MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); | |
5257 const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); | |
5258 unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC); | |
5259 SDValue N = | |
5260 DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); | |
5261 N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); | |
5262 setValue(&I, N); | |
5263 return nullptr; | |
5264 } | |
5675 } | 5265 } |
5676 } | 5266 } |
5677 | 5267 |
5678 std::pair<SDValue, SDValue> | 5268 std::pair<SDValue, SDValue> |
5679 SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, | 5269 SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, |
5680 MachineBasicBlock *LandingPad) { | 5270 const BasicBlock *EHPadBB) { |
5681 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); | 5271 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); |
5682 MCSymbol *BeginLabel = nullptr; | 5272 MCSymbol *BeginLabel = nullptr; |
5683 | 5273 |
5684 if (LandingPad) { | 5274 if (EHPadBB) { |
5685 // Insert a label before the invoke call to mark the try range. This can be | 5275 // Insert a label before the invoke call to mark the try range. This can be |
5686 // used to detect deletion of the invoke via the MachineModuleInfo. | 5276 // used to detect deletion of the invoke via the MachineModuleInfo. |
5687 BeginLabel = MMI.getContext().CreateTempSymbol(); | 5277 BeginLabel = MMI.getContext().createTempSymbol(); |
5688 | 5278 |
5689 // For SjLj, keep track of which landing pads go with which invokes | 5279 // For SjLj, keep track of which landing pads go with which invokes |
5690 // so as to maintain the ordering of pads in the LSDA. | 5280 // so as to maintain the ordering of pads in the LSDA. |
5691 unsigned CallSiteIndex = MMI.getCurrentCallSite(); | 5281 unsigned CallSiteIndex = MMI.getCurrentCallSite(); |
5692 if (CallSiteIndex) { | 5282 if (CallSiteIndex) { |
5693 MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); | 5283 MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); |
5694 LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex); | 5284 LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex); |
5695 | 5285 |
5696 // Now that the call site is handled, stop tracking it. | 5286 // Now that the call site is handled, stop tracking it. |
5697 MMI.setCurrentCallSite(0); | 5287 MMI.setCurrentCallSite(0); |
5698 } | 5288 } |
5699 | 5289 |
5722 PendingExports.clear(); | 5312 PendingExports.clear(); |
5723 } else { | 5313 } else { |
5724 DAG.setRoot(Result.second); | 5314 DAG.setRoot(Result.second); |
5725 } | 5315 } |
5726 | 5316 |
5727 if (LandingPad) { | 5317 if (EHPadBB) { |
5728 // Insert a label at the end of the invoke call to mark the try range. This | 5318 // Insert a label at the end of the invoke call to mark the try range. This |
5729 // can be used to detect deletion of the invoke via the MachineModuleInfo. | 5319 // can be used to detect deletion of the invoke via the MachineModuleInfo. |
5730 MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); | 5320 MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); |
5731 DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); | 5321 DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); |
5732 | 5322 |
5733 // Inform MachineModuleInfo of range. | 5323 // Inform MachineModuleInfo of range. |
5734 MMI.addInvoke(LandingPad, BeginLabel, EndLabel); | 5324 if (MMI.hasEHFunclets()) { |
5325 WinEHFuncInfo &EHInfo = | |
5326 MMI.getWinEHFuncInfo(DAG.getMachineFunction().getFunction()); | |
5327 EHInfo.addIPToStateRange(EHPadBB, BeginLabel, EndLabel); | |
5328 } else { | |
5329 MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); | |
5330 } | |
5735 } | 5331 } |
5736 | 5332 |
5737 return Result; | 5333 return Result; |
5738 } | 5334 } |
5739 | 5335 |
5740 void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, | 5336 void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, |
5741 bool isTailCall, | 5337 bool isTailCall, |
5742 MachineBasicBlock *LandingPad) { | 5338 const BasicBlock *EHPadBB) { |
5743 PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); | 5339 PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); |
5744 FunctionType *FTy = cast<FunctionType>(PT->getElementType()); | 5340 FunctionType *FTy = cast<FunctionType>(PT->getElementType()); |
5745 Type *RetTy = FTy->getReturnType(); | 5341 Type *RetTy = FTy->getReturnType(); |
5746 | 5342 |
5747 TargetLowering::ArgListTy Args; | 5343 TargetLowering::ArgListTy Args; |
5760 Entry.Node = ArgNode; Entry.Ty = V->getType(); | 5356 Entry.Node = ArgNode; Entry.Ty = V->getType(); |
5761 | 5357 |
5762 // Skip the first return-type Attribute to get to params. | 5358 // Skip the first return-type Attribute to get to params. |
5763 Entry.setAttributes(&CS, i - CS.arg_begin() + 1); | 5359 Entry.setAttributes(&CS, i - CS.arg_begin() + 1); |
5764 Args.push_back(Entry); | 5360 Args.push_back(Entry); |
5361 | |
5362 // If we have an explicit sret argument that is an Instruction, (i.e., it | |
5363 // might point to function-local memory), we can't meaningfully tail-call. | |
5364 if (Entry.isSRet && isa<Instruction>(V)) | |
5365 isTailCall = false; | |
5765 } | 5366 } |
5766 | 5367 |
5767 // Check if target-independent constraints permit a tail call here. | 5368 // Check if target-independent constraints permit a tail call here. |
5768 // Target-dependent constraints are checked within TLI->LowerCallTo. | 5369 // Target-dependent constraints are checked within TLI->LowerCallTo. |
5769 if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) | 5370 if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) |
5798 else if (CS.getCalledValue()->getType()->isPointerTy()) // if it is a pointer access; ex) goto codesegmentPointer; | 5399 else if (CS.getCalledValue()->getType()->isPointerTy()) // if it is a pointer access; ex) goto codesegmentPointer; |
5799 DAG.getContext()->emitError(CS.getInstruction(), CS.getCaller()->getName() + | 5400 DAG.getContext()->emitError(CS.getInstruction(), CS.getCaller()->getName() + |
5800 " : Tail call elimination was failed on codesegment which is accessed by pointer!"); // we can't get name from Type... | 5401 " : Tail call elimination was failed on codesegment which is accessed by pointer!"); // we can't get name from Type... |
5801 } | 5402 } |
5802 #endif | 5403 #endif |
5803 std::pair<SDValue,SDValue> Result = lowerInvokable(CLI, LandingPad); | 5404 std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); |
5804 | 5405 |
5805 if (Result.first.getNode()) | 5406 if (Result.first.getNode()) |
5806 setValue(CS.getInstruction(), Result.first); | 5407 setValue(CS.getInstruction(), Result.first); |
5807 } | 5408 } |
5808 | 5409 |
5830 if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { | 5431 if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { |
5831 // Cast pointer to the type we really want to load. | 5432 // Cast pointer to the type we really want to load. |
5832 LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), | 5433 LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), |
5833 PointerType::getUnqual(LoadTy)); | 5434 PointerType::getUnqual(LoadTy)); |
5834 | 5435 |
5835 if (const Constant *LoadCst = | 5436 if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr( |
5836 ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), | 5437 const_cast<Constant *>(LoadInput), *Builder.DL)) |
5837 Builder.DL)) | |
5838 return Builder.getValue(LoadCst); | 5438 return Builder.getValue(LoadCst); |
5839 } | 5439 } |
5840 | 5440 |
5841 // Otherwise, we have to emit the load. If the pointer is to unfoldable but | 5441 // Otherwise, we have to emit the load. If the pointer is to unfoldable but |
5842 // still constant memory, the input chain can be the entry node. | 5442 // still constant memory, the input chain can be the entry node. |
5867 /// processIntegerCallValue - Record the value for an instruction that | 5467 /// processIntegerCallValue - Record the value for an instruction that |
5868 /// produces an integer result, converting the type where necessary. | 5468 /// produces an integer result, converting the type where necessary. |
5869 void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, | 5469 void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, |
5870 SDValue Value, | 5470 SDValue Value, |
5871 bool IsSigned) { | 5471 bool IsSigned) { |
5872 EVT VT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); | 5472 EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
5473 I.getType(), true); | |
5873 if (IsSigned) | 5474 if (IsSigned) |
5874 Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); | 5475 Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); |
5875 else | 5476 else |
5876 Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT); | 5477 Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT); |
5877 setValue(&I, Value); | 5478 setValue(&I, Value); |
5892 return false; | 5493 return false; |
5893 | 5494 |
5894 const Value *Size = I.getArgOperand(2); | 5495 const Value *Size = I.getArgOperand(2); |
5895 const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); | 5496 const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); |
5896 if (CSize && CSize->getZExtValue() == 0) { | 5497 if (CSize && CSize->getZExtValue() == 0) { |
5897 EVT CallVT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); | 5498 EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
5898 setValue(&I, DAG.getConstant(0, CallVT)); | 5499 I.getType(), true); |
5500 setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT)); | |
5899 return true; | 5501 return true; |
5900 } | 5502 } |
5901 | 5503 |
5902 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); | 5504 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); |
5903 std::pair<SDValue, SDValue> Res = | 5505 std::pair<SDValue, SDValue> Res = |
6176 RenameFn = visitIntrinsicCall(I, IID); | 5778 RenameFn = visitIntrinsicCall(I, IID); |
6177 if (!RenameFn) | 5779 if (!RenameFn) |
6178 return; | 5780 return; |
6179 } | 5781 } |
6180 } | 5782 } |
6181 if (unsigned IID = F->getIntrinsicID()) { | 5783 if (Intrinsic::ID IID = F->getIntrinsicID()) { |
6182 RenameFn = visitIntrinsicCall(I, IID); | 5784 RenameFn = visitIntrinsicCall(I, IID); |
6183 if (!RenameFn) | 5785 if (!RenameFn) |
6184 return; | 5786 return; |
6185 } | 5787 } |
6186 } | 5788 } |
6329 | 5931 |
6330 SDValue Callee; | 5932 SDValue Callee; |
6331 if (!RenameFn) | 5933 if (!RenameFn) |
6332 Callee = getValue(I.getCalledValue()); | 5934 Callee = getValue(I.getCalledValue()); |
6333 else | 5935 else |
6334 Callee = DAG.getExternalSymbol(RenameFn, | 5936 Callee = DAG.getExternalSymbol( |
6335 DAG.getTargetLoweringInfo().getPointerTy()); | 5937 RenameFn, |
5938 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); | |
6336 | 5939 |
6337 // Check if we can potentially perform a tail call. More detailed checking is | 5940 // Check if we can potentially perform a tail call. More detailed checking is |
6338 // be done within LowerCallTo, after more information about the call is known. | 5941 // be done within LowerCallTo, after more information about the call is known. |
6339 LowerCallTo(&I, Callee, I.isTailCall()); | 5942 LowerCallTo(&I, Callee, I.isTailCall()); |
6340 } | 5943 } |
6359 } | 5962 } |
6360 | 5963 |
6361 /// getCallOperandValEVT - Return the EVT of the Value* that this operand | 5964 /// getCallOperandValEVT - Return the EVT of the Value* that this operand |
6362 /// corresponds to. If there is no Value* for this operand, it returns | 5965 /// corresponds to. If there is no Value* for this operand, it returns |
6363 /// MVT::Other. | 5966 /// MVT::Other. |
6364 EVT getCallOperandValEVT(LLVMContext &Context, | 5967 EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, |
6365 const TargetLowering &TLI, | 5968 const DataLayout &DL) const { |
6366 const DataLayout *DL) const { | |
6367 if (!CallOperandVal) return MVT::Other; | 5969 if (!CallOperandVal) return MVT::Other; |
6368 | 5970 |
6369 if (isa<BasicBlock>(CallOperandVal)) | 5971 if (isa<BasicBlock>(CallOperandVal)) |
6370 return TLI.getPointerTy(); | 5972 return TLI.getPointerTy(DL); |
6371 | 5973 |
6372 llvm::Type *OpTy = CallOperandVal->getType(); | 5974 llvm::Type *OpTy = CallOperandVal->getType(); |
6373 | 5975 |
6374 // FIXME: code duplicated from TargetLowering::ParseConstraints(). | 5976 // FIXME: code duplicated from TargetLowering::ParseConstraints(). |
6375 // If this is an indirect operand, the operand is a pointer to the | 5977 // If this is an indirect operand, the operand is a pointer to the |
6387 OpTy = STy->getElementType(0); | 5989 OpTy = STy->getElementType(0); |
6388 | 5990 |
6389 // If OpTy is not a single value, it may be a struct/union that we | 5991 // If OpTy is not a single value, it may be a struct/union that we |
6390 // can tile with integers. | 5992 // can tile with integers. |
6391 if (!OpTy->isSingleValueType() && OpTy->isSized()) { | 5993 if (!OpTy->isSingleValueType() && OpTy->isSized()) { |
6392 unsigned BitSize = DL->getTypeSizeInBits(OpTy); | 5994 unsigned BitSize = DL.getTypeSizeInBits(OpTy); |
6393 switch (BitSize) { | 5995 switch (BitSize) { |
6394 default: break; | 5996 default: break; |
6395 case 1: | 5997 case 1: |
6396 case 8: | 5998 case 8: |
6397 case 16: | 5999 case 16: |
6401 OpTy = IntegerType::get(Context, BitSize); | 6003 OpTy = IntegerType::get(Context, BitSize); |
6402 break; | 6004 break; |
6403 } | 6005 } |
6404 } | 6006 } |
6405 | 6007 |
6406 return TLI.getValueType(OpTy, true); | 6008 return TLI.getValueType(DL, OpTy, true); |
6407 } | 6009 } |
6408 }; | 6010 }; |
6409 | 6011 |
6410 typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; | 6012 typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; |
6411 | 6013 |
6428 MachineFunction &MF = DAG.getMachineFunction(); | 6030 MachineFunction &MF = DAG.getMachineFunction(); |
6429 SmallVector<unsigned, 4> Regs; | 6031 SmallVector<unsigned, 4> Regs; |
6430 | 6032 |
6431 // If this is a constraint for a single physreg, or a constraint for a | 6033 // If this is a constraint for a single physreg, or a constraint for a |
6432 // register class, find it. | 6034 // register class, find it. |
6433 std::pair<unsigned, const TargetRegisterClass*> PhysReg = | 6035 std::pair<unsigned, const TargetRegisterClass *> PhysReg = |
6434 TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, | 6036 TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(), |
6435 OpInfo.ConstraintVT); | 6037 OpInfo.ConstraintCode, |
6038 OpInfo.ConstraintVT); | |
6436 | 6039 |
6437 unsigned NumRegs = 1; | 6040 unsigned NumRegs = 1; |
6438 if (OpInfo.ConstraintVT != MVT::Other) { | 6041 if (OpInfo.ConstraintVT != MVT::Other) { |
6439 // If this is a FP input in an integer register (or visa versa) insert a bit | 6042 // If this is a FP input in an integer register (or visa versa) insert a bit |
6440 // cast of the input value. More generally, handle any case where the input | 6043 // cast of the input value. More generally, handle any case where the input |
6526 | 6129 |
6527 /// ConstraintOperands - Information about all of the constraints. | 6130 /// ConstraintOperands - Information about all of the constraints. |
6528 SDISelAsmOperandInfoVector ConstraintOperands; | 6131 SDISelAsmOperandInfoVector ConstraintOperands; |
6529 | 6132 |
6530 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 6133 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
6531 TargetLowering::AsmOperandInfoVector | 6134 TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints( |
6532 TargetConstraints = TLI.ParseConstraints(CS); | 6135 DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS); |
6533 | 6136 |
6534 bool hasMemory = false; | 6137 bool hasMemory = false; |
6535 | 6138 |
6536 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. | 6139 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. |
6537 unsigned ResNo = 0; // ResNo - The result number of the next output. | 6140 unsigned ResNo = 0; // ResNo - The result number of the next output. |
6552 | 6155 |
6553 // The return value of the call is this value. As such, there is no | 6156 // The return value of the call is this value. As such, there is no |
6554 // corresponding argument. | 6157 // corresponding argument. |
6555 assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); | 6158 assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); |
6556 if (StructType *STy = dyn_cast<StructType>(CS.getType())) { | 6159 if (StructType *STy = dyn_cast<StructType>(CS.getType())) { |
6557 OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo)); | 6160 OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), |
6161 STy->getElementType(ResNo)); | |
6558 } else { | 6162 } else { |
6559 assert(ResNo == 0 && "Asm only has one result!"); | 6163 assert(ResNo == 0 && "Asm only has one result!"); |
6560 OpVT = TLI.getSimpleValueType(CS.getType()); | 6164 OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType()); |
6561 } | 6165 } |
6562 ++ResNo; | 6166 ++ResNo; |
6563 break; | 6167 break; |
6564 case InlineAsm::isInput: | 6168 case InlineAsm::isInput: |
6565 OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); | 6169 OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); |
6576 OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); | 6180 OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); |
6577 } else { | 6181 } else { |
6578 OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); | 6182 OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); |
6579 } | 6183 } |
6580 | 6184 |
6581 OpVT = | 6185 OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, |
6582 OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, DL).getSimpleVT(); | 6186 DAG.getDataLayout()).getSimpleVT(); |
6583 } | 6187 } |
6584 | 6188 |
6585 OpInfo.ConstraintVT = OpVT; | 6189 OpInfo.ConstraintVT = OpVT; |
6586 | 6190 |
6587 // Indirect operand accesses access memory. | 6191 // Indirect operand accesses access memory. |
6619 // error. | 6223 // error. |
6620 if (OpInfo.hasMatchingInput()) { | 6224 if (OpInfo.hasMatchingInput()) { |
6621 SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; | 6225 SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; |
6622 | 6226 |
6623 if (OpInfo.ConstraintVT != Input.ConstraintVT) { | 6227 if (OpInfo.ConstraintVT != Input.ConstraintVT) { |
6624 std::pair<unsigned, const TargetRegisterClass*> MatchRC = | 6228 const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); |
6625 TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, | 6229 std::pair<unsigned, const TargetRegisterClass *> MatchRC = |
6626 OpInfo.ConstraintVT); | 6230 TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, |
6627 std::pair<unsigned, const TargetRegisterClass*> InputRC = | 6231 OpInfo.ConstraintVT); |
6628 TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, | 6232 std::pair<unsigned, const TargetRegisterClass *> InputRC = |
6629 Input.ConstraintVT); | 6233 TLI.getRegForInlineAsmConstraint(TRI, Input.ConstraintCode, |
6234 Input.ConstraintVT); | |
6630 if ((OpInfo.ConstraintVT.isInteger() != | 6235 if ((OpInfo.ConstraintVT.isInteger() != |
6631 Input.ConstraintVT.isInteger()) || | 6236 Input.ConstraintVT.isInteger()) || |
6632 (MatchRC.second != InputRC.second)) { | 6237 (MatchRC.second != InputRC.second)) { |
6633 report_fatal_error("Unsupported asm: input constraint" | 6238 report_fatal_error("Unsupported asm: input constraint" |
6634 " with a matching output constraint of" | 6239 " with a matching output constraint of" |
6664 // If the operand is a float, integer, or vector constant, spill to a | 6269 // If the operand is a float, integer, or vector constant, spill to a |
6665 // constant pool entry to get its address. | 6270 // constant pool entry to get its address. |
6666 const Value *OpVal = OpInfo.CallOperandVal; | 6271 const Value *OpVal = OpInfo.CallOperandVal; |
6667 if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || | 6272 if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || |
6668 isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { | 6273 isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { |
6669 OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), | 6274 OpInfo.CallOperand = DAG.getConstantPool( |
6670 TLI.getPointerTy()); | 6275 cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout())); |
6671 } else { | 6276 } else { |
6672 // Otherwise, create a stack slot and emit a store to it before the | 6277 // Otherwise, create a stack slot and emit a store to it before the |
6673 // asm. | 6278 // asm. |
6674 Type *Ty = OpVal->getType(); | 6279 Type *Ty = OpVal->getType(); |
6675 uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); | 6280 auto &DL = DAG.getDataLayout(); |
6676 unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty); | 6281 uint64_t TySize = DL.getTypeAllocSize(Ty); |
6282 unsigned Align = DL.getPrefTypeAlignment(Ty); | |
6677 MachineFunction &MF = DAG.getMachineFunction(); | 6283 MachineFunction &MF = DAG.getMachineFunction(); |
6678 int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); | 6284 int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); |
6679 SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); | 6285 SDValue StackSlot = |
6680 Chain = DAG.getStore(Chain, getCurSDLoc(), | 6286 DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout())); |
6681 OpInfo.CallOperand, StackSlot, | 6287 Chain = DAG.getStore( |
6682 MachinePointerInfo::getFixedStack(SSFI), | 6288 Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, |
6683 false, false, 0); | 6289 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), |
6290 false, false, 0); | |
6684 OpInfo.CallOperand = StackSlot; | 6291 OpInfo.CallOperand = StackSlot; |
6685 } | 6292 } |
6686 | 6293 |
6687 // There is no longer a Value* corresponding to this operand. | 6294 // There is no longer a Value* corresponding to this operand. |
6688 OpInfo.CallOperandVal = nullptr; | 6295 OpInfo.CallOperandVal = nullptr; |
6709 } | 6316 } |
6710 | 6317 |
6711 // AsmNodeOperands - The operands for the ISD::INLINEASM node. | 6318 // AsmNodeOperands - The operands for the ISD::INLINEASM node. |
6712 std::vector<SDValue> AsmNodeOperands; | 6319 std::vector<SDValue> AsmNodeOperands; |
6713 AsmNodeOperands.push_back(SDValue()); // reserve space for input chain | 6320 AsmNodeOperands.push_back(SDValue()); // reserve space for input chain |
6714 AsmNodeOperands.push_back( | 6321 AsmNodeOperands.push_back(DAG.getTargetExternalSymbol( |
6715 DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), | 6322 IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout()))); |
6716 TLI.getPointerTy())); | |
6717 | 6323 |
6718 // If we have a !srcloc metadata node associated with it, we want to attach | 6324 // If we have a !srcloc metadata node associated with it, we want to attach |
6719 // this to the ultimately generated inline asm machineinstr. To do this, we | 6325 // this to the ultimately generated inline asm machineinstr. To do this, we |
6720 // pass in the third operand as this (potentially null) inline asm MDNode. | 6326 // pass in the third operand as this (potentially null) inline asm MDNode. |
6721 const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); | 6327 const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); |
6751 else if (OpInfo.Type == InlineAsm::isClobber) | 6357 else if (OpInfo.Type == InlineAsm::isClobber) |
6752 ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); | 6358 ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); |
6753 } | 6359 } |
6754 } | 6360 } |
6755 | 6361 |
6756 AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, | 6362 AsmNodeOperands.push_back(DAG.getTargetConstant( |
6757 TLI.getPointerTy())); | 6363 ExtraInfo, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); |
6758 | 6364 |
6759 // Loop over all of the inputs, copying the operand values into the | 6365 // Loop over all of the inputs, copying the operand values into the |
6760 // appropriate registers and processing the output regs. | 6366 // appropriate registers and processing the output regs. |
6761 RegsForValue RetValRegs; | 6367 RegsForValue RetValRegs; |
6762 | 6368 |
6771 if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && | 6377 if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && |
6772 OpInfo.ConstraintType != TargetLowering::C_Register) { | 6378 OpInfo.ConstraintType != TargetLowering::C_Register) { |
6773 // Memory output, or 'other' output (e.g. 'X' constraint). | 6379 // Memory output, or 'other' output (e.g. 'X' constraint). |
6774 assert(OpInfo.isIndirect && "Memory output must be indirect operand"); | 6380 assert(OpInfo.isIndirect && "Memory output must be indirect operand"); |
6775 | 6381 |
6382 unsigned ConstraintID = | |
6383 TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); | |
6384 assert(ConstraintID != InlineAsm::Constraint_Unknown && | |
6385 "Failed to convert memory constraint code to constraint id."); | |
6386 | |
6776 // Add information to the INLINEASM node to know about this output. | 6387 // Add information to the INLINEASM node to know about this output. |
6777 unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); | 6388 unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); |
6778 AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, | 6389 OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); |
6779 TLI.getPointerTy())); | 6390 AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(), |
6391 MVT::i32)); | |
6780 AsmNodeOperands.push_back(OpInfo.CallOperand); | 6392 AsmNodeOperands.push_back(OpInfo.CallOperand); |
6781 break; | 6393 break; |
6782 } | 6394 } |
6783 | 6395 |
6784 // Otherwise, this is a register or register class output. | 6396 // Otherwise, this is a register or register class output. |
6809 // set. | 6421 // set. |
6810 OpInfo.AssignedRegs | 6422 OpInfo.AssignedRegs |
6811 .AddInlineAsmOperands(OpInfo.isEarlyClobber | 6423 .AddInlineAsmOperands(OpInfo.isEarlyClobber |
6812 ? InlineAsm::Kind_RegDefEarlyClobber | 6424 ? InlineAsm::Kind_RegDefEarlyClobber |
6813 : InlineAsm::Kind_RegDef, | 6425 : InlineAsm::Kind_RegDef, |
6814 false, 0, DAG, AsmNodeOperands); | 6426 false, 0, getCurSDLoc(), DAG, AsmNodeOperands); |
6815 break; | 6427 break; |
6816 } | 6428 } |
6817 case InlineAsm::isInput: { | 6429 case InlineAsm::isInput: { |
6818 SDValue InOperandVal = OpInfo.CallOperand; | 6430 SDValue InOperandVal = OpInfo.CallOperand; |
6819 | 6431 |
6864 "inline asm error: This value" | 6476 "inline asm error: This value" |
6865 " type register class is not natively supported!"); | 6477 " type register class is not natively supported!"); |
6866 return; | 6478 return; |
6867 } | 6479 } |
6868 } | 6480 } |
6481 SDLoc dl = getCurSDLoc(); | |
6869 // Use the produced MatchedRegs object to | 6482 // Use the produced MatchedRegs object to |
6870 MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), | 6483 MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, |
6871 Chain, &Flag, CS.getInstruction()); | 6484 Chain, &Flag, CS.getInstruction()); |
6872 MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, | 6485 MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, |
6873 true, OpInfo.getMatchedOperand(), | 6486 true, OpInfo.getMatchedOperand(), dl, |
6874 DAG, AsmNodeOperands); | 6487 DAG, AsmNodeOperands); |
6875 break; | 6488 break; |
6876 } | 6489 } |
6877 | 6490 |
6878 assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); | 6491 assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); |
6879 assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && | 6492 assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && |
6880 "Unexpected number of operands"); | 6493 "Unexpected number of operands"); |
6881 // Add information to the INLINEASM node to know about this input. | 6494 // Add information to the INLINEASM node to know about this input. |
6882 // See InlineAsm.h isUseOperandTiedToDef. | 6495 // See InlineAsm.h isUseOperandTiedToDef. |
6496 OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag); | |
6883 OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, | 6497 OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, |
6884 OpInfo.getMatchedOperand()); | 6498 OpInfo.getMatchedOperand()); |
6885 AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, | 6499 AsmNodeOperands.push_back(DAG.getTargetConstant( |
6886 TLI.getPointerTy())); | 6500 OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); |
6887 AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); | 6501 AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); |
6888 break; | 6502 break; |
6889 } | 6503 } |
6890 | 6504 |
6891 // Treat indirect 'X' constraint as memory. | 6505 // Treat indirect 'X' constraint as memory. |
6906 } | 6520 } |
6907 | 6521 |
6908 // Add information to the INLINEASM node to know about this input. | 6522 // Add information to the INLINEASM node to know about this input. |
6909 unsigned ResOpType = | 6523 unsigned ResOpType = |
6910 InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); | 6524 InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); |
6911 AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, | 6525 AsmNodeOperands.push_back(DAG.getTargetConstant( |
6912 TLI.getPointerTy())); | 6526 ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); |
6913 AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); | 6527 AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); |
6914 break; | 6528 break; |
6915 } | 6529 } |
6916 | 6530 |
6917 if (OpInfo.ConstraintType == TargetLowering::C_Memory) { | 6531 if (OpInfo.ConstraintType == TargetLowering::C_Memory) { |
6918 assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); | 6532 assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); |
6919 assert(InOperandVal.getValueType() == TLI.getPointerTy() && | 6533 assert(InOperandVal.getValueType() == |
6534 TLI.getPointerTy(DAG.getDataLayout()) && | |
6920 "Memory operands expect pointer values"); | 6535 "Memory operands expect pointer values"); |
6536 | |
6537 unsigned ConstraintID = | |
6538 TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); | |
6539 assert(ConstraintID != InlineAsm::Constraint_Unknown && | |
6540 "Failed to convert memory constraint code to constraint id."); | |
6921 | 6541 |
6922 // Add information to the INLINEASM node to know about this input. | 6542 // Add information to the INLINEASM node to know about this input. |
6923 unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); | 6543 unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); |
6544 ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID); | |
6924 AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, | 6545 AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, |
6925 TLI.getPointerTy())); | 6546 getCurSDLoc(), |
6547 MVT::i32)); | |
6926 AsmNodeOperands.push_back(InOperandVal); | 6548 AsmNodeOperands.push_back(InOperandVal); |
6927 break; | 6549 break; |
6928 } | 6550 } |
6929 | 6551 |
6930 assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || | 6552 assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || |
6948 "couldn't allocate input reg for constraint '" + | 6570 "couldn't allocate input reg for constraint '" + |
6949 Twine(OpInfo.ConstraintCode) + "'"); | 6571 Twine(OpInfo.ConstraintCode) + "'"); |
6950 return; | 6572 return; |
6951 } | 6573 } |
6952 | 6574 |
6953 OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), | 6575 SDLoc dl = getCurSDLoc(); |
6576 | |
6577 OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, | |
6954 Chain, &Flag, CS.getInstruction()); | 6578 Chain, &Flag, CS.getInstruction()); |
6955 | 6579 |
6956 OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, | 6580 OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, |
6957 DAG, AsmNodeOperands); | 6581 dl, DAG, AsmNodeOperands); |
6958 break; | 6582 break; |
6959 } | 6583 } |
6960 case InlineAsm::isClobber: { | 6584 case InlineAsm::isClobber: { |
6961 // Add the clobbered value to the operand list, so that the register | 6585 // Add the clobbered value to the operand list, so that the register |
6962 // allocator is aware that the physreg got clobbered. | 6586 // allocator is aware that the physreg got clobbered. |
6963 if (!OpInfo.AssignedRegs.Regs.empty()) | 6587 if (!OpInfo.AssignedRegs.Regs.empty()) |
6964 OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, | 6588 OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, |
6965 false, 0, DAG, | 6589 false, 0, getCurSDLoc(), DAG, |
6966 AsmNodeOperands); | 6590 AsmNodeOperands); |
6967 break; | 6591 break; |
6968 } | 6592 } |
6969 } | 6593 } |
6970 } | 6594 } |
6983 SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), | 6607 SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), |
6984 Chain, &Flag, CS.getInstruction()); | 6608 Chain, &Flag, CS.getInstruction()); |
6985 | 6609 |
6986 // FIXME: Why don't we do this for inline asms with MRVs? | 6610 // FIXME: Why don't we do this for inline asms with MRVs? |
6987 if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { | 6611 if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { |
6988 EVT ResultType = TLI.getValueType(CS.getType()); | 6612 EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType()); |
6989 | 6613 |
6990 // If any of the results of the inline asm is a vector, it may have the | 6614 // If any of the results of the inline asm is a vector, it may have the |
6991 // wrong width/num elts. This can happen for register classes that can | 6615 // wrong width/num elts. This can happen for register classes that can |
6992 // contain multiple different value types. The preg or vreg allocated may | 6616 // contain multiple different value types. The preg or vreg allocated may |
6993 // not have the same VT as was expected. Convert it to the right type | 6617 // not have the same VT as was expected. Convert it to the right type |
7049 DAG.getSrcValue(I.getArgOperand(0)))); | 6673 DAG.getSrcValue(I.getArgOperand(0)))); |
7050 } | 6674 } |
7051 | 6675 |
7052 void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { | 6676 void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { |
7053 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 6677 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
7054 const DataLayout &DL = *TLI.getDataLayout(); | 6678 const DataLayout &DL = DAG.getDataLayout(); |
7055 SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurSDLoc(), | 6679 SDValue V = DAG.getVAArg(TLI.getValueType(DAG.getDataLayout(), I.getType()), |
7056 getRoot(), getValue(I.getOperand(0)), | 6680 getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), |
7057 DAG.getSrcValue(I.getOperand(0)), | 6681 DAG.getSrcValue(I.getOperand(0)), |
7058 DL.getABITypeAlignment(I.getType())); | 6682 DL.getABITypeAlignment(I.getType())); |
7059 setValue(&I, V); | 6683 setValue(&I, V); |
7060 DAG.setRoot(V.getValue(1)); | 6684 DAG.setRoot(V.getValue(1)); |
7061 } | 6685 } |
7081 /// \return A tuple of <return-value, token-chain> | 6705 /// \return A tuple of <return-value, token-chain> |
7082 /// | 6706 /// |
7083 /// This is a helper for lowering intrinsics that follow a target calling | 6707 /// This is a helper for lowering intrinsics that follow a target calling |
7084 /// convention or require stack pointer adjustment. Only a subset of the | 6708 /// convention or require stack pointer adjustment. Only a subset of the |
7085 /// intrinsic's operands need to participate in the calling convention. | 6709 /// intrinsic's operands need to participate in the calling convention. |
7086 std::pair<SDValue, SDValue> | 6710 std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands( |
7087 SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, | 6711 ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, |
7088 unsigned NumArgs, SDValue Callee, | 6712 Type *ReturnTy, const BasicBlock *EHPadBB, bool IsPatchPoint) { |
7089 bool UseVoidTy, | |
7090 MachineBasicBlock *LandingPad, | |
7091 bool IsPatchPoint) { | |
7092 TargetLowering::ArgListTy Args; | 6713 TargetLowering::ArgListTy Args; |
7093 Args.reserve(NumArgs); | 6714 Args.reserve(NumArgs); |
7094 | 6715 |
7095 // Populate the argument list. | 6716 // Populate the argument list. |
7096 // Attributes for args start at offset 1, after the return attribute. | 6717 // Attributes for args start at offset 1, after the return attribute. |
7105 Entry.Ty = V->getType(); | 6726 Entry.Ty = V->getType(); |
7106 Entry.setAttributes(&CS, AttrI); | 6727 Entry.setAttributes(&CS, AttrI); |
7107 Args.push_back(Entry); | 6728 Args.push_back(Entry); |
7108 } | 6729 } |
7109 | 6730 |
7110 Type *retTy = UseVoidTy ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); | |
7111 TargetLowering::CallLoweringInfo CLI(DAG); | 6731 TargetLowering::CallLoweringInfo CLI(DAG); |
7112 CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) | 6732 CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) |
7113 .setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) | 6733 .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs) |
7114 .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); | 6734 .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); |
7115 | 6735 |
7116 return lowerInvokable(CLI, LandingPad); | 6736 return lowerInvokable(CLI, EHPadBB); |
7117 } | 6737 } |
7118 | 6738 |
7119 /// \brief Add a stack map intrinsic call's live variable operands to a stackmap | 6739 /// \brief Add a stack map intrinsic call's live variable operands to a stackmap |
7120 /// or patchpoint target node's operand list. | 6740 /// or patchpoint target node's operand list. |
7121 /// | 6741 /// |
7132 /// location is valid at any point during execution (this is similar to the | 6752 /// location is valid at any point during execution (this is similar to the |
7133 /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were | 6753 /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were |
7134 /// only available in a register, then the runtime would need to trap when | 6754 /// only available in a register, then the runtime would need to trap when |
7135 /// execution reaches the StackMap in order to read the alloca's location. | 6755 /// execution reaches the StackMap in order to read the alloca's location. |
7136 static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, | 6756 static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, |
7137 SmallVectorImpl<SDValue> &Ops, | 6757 SDLoc DL, SmallVectorImpl<SDValue> &Ops, |
7138 SelectionDAGBuilder &Builder) { | 6758 SelectionDAGBuilder &Builder) { |
7139 for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { | 6759 for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { |
7140 SDValue OpVal = Builder.getValue(CS.getArgument(i)); | 6760 SDValue OpVal = Builder.getValue(CS.getArgument(i)); |
7141 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { | 6761 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { |
7142 Ops.push_back( | 6762 Ops.push_back( |
7143 Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); | 6763 Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); |
7144 Ops.push_back( | 6764 Ops.push_back( |
7145 Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); | 6765 Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64)); |
7146 } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { | 6766 } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { |
7147 const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); | 6767 const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); |
7148 Ops.push_back( | 6768 Ops.push_back(Builder.DAG.getTargetFrameIndex( |
7149 Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); | 6769 FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout()))); |
7150 } else | 6770 } else |
7151 Ops.push_back(OpVal); | 6771 Ops.push_back(OpVal); |
7152 } | 6772 } |
7153 } | 6773 } |
7154 | 6774 |
7162 SDValue Chain, InFlag, Callee, NullPtr; | 6782 SDValue Chain, InFlag, Callee, NullPtr; |
7163 SmallVector<SDValue, 32> Ops; | 6783 SmallVector<SDValue, 32> Ops; |
7164 | 6784 |
7165 SDLoc DL = getCurSDLoc(); | 6785 SDLoc DL = getCurSDLoc(); |
7166 Callee = getValue(CI.getCalledValue()); | 6786 Callee = getValue(CI.getCalledValue()); |
7167 NullPtr = DAG.getIntPtrConstant(0, true); | 6787 NullPtr = DAG.getIntPtrConstant(0, DL, true); |
7168 | 6788 |
7169 // The stackmap intrinsic only records the live variables (the arguemnts | 6789 // The stackmap intrinsic only records the live variables (the arguemnts |
7170 // passed to it) and emits NOPS (if requested). Unlike the patchpoint | 6790 // passed to it) and emits NOPS (if requested). Unlike the patchpoint |
7171 // intrinsic, this won't be lowered to a function call. This means we don't | 6791 // intrinsic, this won't be lowered to a function call. This means we don't |
7172 // have to worry about calling conventions and target specific lowering code. | 6792 // have to worry about calling conventions and target specific lowering code. |
7180 InFlag = Chain.getValue(1); | 6800 InFlag = Chain.getValue(1); |
7181 | 6801 |
7182 // Add the <id> and <numBytes> constants. | 6802 // Add the <id> and <numBytes> constants. |
7183 SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); | 6803 SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); |
7184 Ops.push_back(DAG.getTargetConstant( | 6804 Ops.push_back(DAG.getTargetConstant( |
7185 cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); | 6805 cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64)); |
7186 SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); | 6806 SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); |
7187 Ops.push_back(DAG.getTargetConstant( | 6807 Ops.push_back(DAG.getTargetConstant( |
7188 cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); | 6808 cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL, |
6809 MVT::i32)); | |
7189 | 6810 |
7190 // Push live variables for the stack map. | 6811 // Push live variables for the stack map. |
7191 addStackMapLiveVars(&CI, 2, Ops, *this); | 6812 addStackMapLiveVars(&CI, 2, DL, Ops, *this); |
7192 | 6813 |
7193 // We are not pushing any register mask info here on the operands list, | 6814 // We are not pushing any register mask info here on the operands list, |
7194 // because the stackmap doesn't clobber anything. | 6815 // because the stackmap doesn't clobber anything. |
7195 | 6816 |
7196 // Push the chain and the glue flag. | 6817 // Push the chain and the glue flag. |
7214 FuncInfo.MF->getFrameInfo()->setHasStackMap(); | 6835 FuncInfo.MF->getFrameInfo()->setHasStackMap(); |
7215 } | 6836 } |
7216 | 6837 |
7217 /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. | 6838 /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. |
7218 void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, | 6839 void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, |
7219 MachineBasicBlock *LandingPad) { | 6840 const BasicBlock *EHPadBB) { |
7220 // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, | 6841 // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, |
7221 // i32 <numBytes>, | 6842 // i32 <numBytes>, |
7222 // i8* <target>, | 6843 // i8* <target>, |
7223 // i32 <numArgs>, | 6844 // i32 <numArgs>, |
7224 // [Args...], | 6845 // [Args...], |
7225 // [live variables...]) | 6846 // [live variables...]) |
7226 | 6847 |
7227 CallingConv::ID CC = CS.getCallingConv(); | 6848 CallingConv::ID CC = CS.getCallingConv(); |
7228 bool IsAnyRegCC = CC == CallingConv::AnyReg; | 6849 bool IsAnyRegCC = CC == CallingConv::AnyReg; |
7229 bool HasDef = !CS->getType()->isVoidTy(); | 6850 bool HasDef = !CS->getType()->isVoidTy(); |
7230 SDValue Callee = getValue(CS->getOperand(2)); // <target> | 6851 SDLoc dl = getCurSDLoc(); |
6852 SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos)); | |
6853 | |
6854 // Handle immediate and symbolic callees. | |
6855 if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee)) | |
6856 Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl, | |
6857 /*isTarget=*/true); | |
6858 else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee)) | |
6859 Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(), | |
6860 SDLoc(SymbolicCallee), | |
6861 SymbolicCallee->getValueType(0)); | |
7231 | 6862 |
7232 // Get the real number of arguments participating in the call <numArgs> | 6863 // Get the real number of arguments participating in the call <numArgs> |
7233 SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos)); | 6864 SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos)); |
7234 unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue(); | 6865 unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue(); |
7235 | 6866 |
7239 assert(CS.arg_size() >= NumMetaOpers + NumArgs && | 6870 assert(CS.arg_size() >= NumMetaOpers + NumArgs && |
7240 "Not enough arguments provided to the patchpoint intrinsic"); | 6871 "Not enough arguments provided to the patchpoint intrinsic"); |
7241 | 6872 |
7242 // For AnyRegCC the arguments are lowered later on manually. | 6873 // For AnyRegCC the arguments are lowered later on manually. |
7243 unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; | 6874 unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; |
7244 std::pair<SDValue, SDValue> Result = | 6875 Type *ReturnTy = |
7245 lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, | 6876 IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); |
7246 LandingPad, true); | 6877 std::pair<SDValue, SDValue> Result = lowerCallOperands( |
6878 CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, EHPadBB, true); | |
7247 | 6879 |
7248 SDNode *CallEnd = Result.second.getNode(); | 6880 SDNode *CallEnd = Result.second.getNode(); |
7249 if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) | 6881 if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) |
7250 CallEnd = CallEnd->getOperand(0).getNode(); | 6882 CallEnd = CallEnd->getOperand(0).getNode(); |
7251 | 6883 |
7260 SmallVector<SDValue, 8> Ops; | 6892 SmallVector<SDValue, 8> Ops; |
7261 | 6893 |
7262 // Add the <id> and <numBytes> constants. | 6894 // Add the <id> and <numBytes> constants. |
7263 SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos)); | 6895 SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos)); |
7264 Ops.push_back(DAG.getTargetConstant( | 6896 Ops.push_back(DAG.getTargetConstant( |
7265 cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); | 6897 cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64)); |
7266 SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos)); | 6898 SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos)); |
7267 Ops.push_back(DAG.getTargetConstant( | 6899 Ops.push_back(DAG.getTargetConstant( |
7268 cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); | 6900 cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl, |
7269 | 6901 MVT::i32)); |
7270 // Assume that the Callee is a constant address. | 6902 |
7271 // FIXME: handle function symbols in the future. | 6903 // Add the callee. |
7272 Ops.push_back( | 6904 Ops.push_back(Callee); |
7273 DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(), | |
7274 /*isTarget=*/true)); | |
7275 | 6905 |
7276 // Adjust <numArgs> to account for any arguments that have been passed on the | 6906 // Adjust <numArgs> to account for any arguments that have been passed on the |
7277 // stack instead. | 6907 // stack instead. |
7278 // Call Node: Chain, Target, {Args}, RegMask, [Glue] | 6908 // Call Node: Chain, Target, {Args}, RegMask, [Glue] |
7279 unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3); | 6909 unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3); |
7280 NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs; | 6910 NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs; |
7281 Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); | 6911 Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32)); |
7282 | 6912 |
7283 // Add the calling convention | 6913 // Add the calling convention |
7284 Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32)); | 6914 Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32)); |
7285 | 6915 |
7286 // Add the arguments we omitted previously. The register allocator should | 6916 // Add the arguments we omitted previously. The register allocator should |
7287 // place these in any free register. | 6917 // place these in any free register. |
7288 if (IsAnyRegCC) | 6918 if (IsAnyRegCC) |
7289 for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) | 6919 for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) |
7292 // Push the arguments from the call instruction up to the register mask. | 6922 // Push the arguments from the call instruction up to the register mask. |
7293 SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; | 6923 SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; |
7294 Ops.append(Call->op_begin() + 2, e); | 6924 Ops.append(Call->op_begin() + 2, e); |
7295 | 6925 |
7296 // Push live variables for the stack map. | 6926 // Push live variables for the stack map. |
7297 addStackMapLiveVars(CS, NumMetaOpers + NumArgs, Ops, *this); | 6927 addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this); |
7298 | 6928 |
7299 // Push the register mask info. | 6929 // Push the register mask info. |
7300 if (HasGlue) | 6930 if (HasGlue) |
7301 Ops.push_back(*(Call->op_end()-2)); | 6931 Ops.push_back(*(Call->op_end()-2)); |
7302 else | 6932 else |
7313 SDVTList NodeTys; | 6943 SDVTList NodeTys; |
7314 if (IsAnyRegCC && HasDef) { | 6944 if (IsAnyRegCC && HasDef) { |
7315 // Create the return types based on the intrinsic definition | 6945 // Create the return types based on the intrinsic definition |
7316 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 6946 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
7317 SmallVector<EVT, 3> ValueVTs; | 6947 SmallVector<EVT, 3> ValueVTs; |
7318 ComputeValueVTs(TLI, CS->getType(), ValueVTs); | 6948 ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs); |
7319 assert(ValueVTs.size() == 1 && "Expected only one return value type."); | 6949 assert(ValueVTs.size() == 1 && "Expected only one return value type."); |
7320 | 6950 |
7321 // There is always a chain and a glue type at the end | 6951 // There is always a chain and a glue type at the end |
7322 ValueVTs.push_back(MVT::Other); | 6952 ValueVTs.push_back(MVT::Other); |
7323 ValueVTs.push_back(MVT::Glue); | 6953 ValueVTs.push_back(MVT::Glue); |
7325 } else | 6955 } else |
7326 NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); | 6956 NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
7327 | 6957 |
7328 // Replace the target specific call node with a PATCHPOINT node. | 6958 // Replace the target specific call node with a PATCHPOINT node. |
7329 MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, | 6959 MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, |
7330 getCurSDLoc(), NodeTys, Ops); | 6960 dl, NodeTys, Ops); |
7331 | 6961 |
7332 // Update the NodeMap. | 6962 // Update the NodeMap. |
7333 if (HasDef) { | 6963 if (HasDef) { |
7334 if (IsAnyRegCC) | 6964 if (IsAnyRegCC) |
7335 setValue(CS.getInstruction(), SDValue(MN, 0)); | 6965 setValue(CS.getInstruction(), SDValue(MN, 0)); |
7377 // Handle the incoming return values from the call. | 7007 // Handle the incoming return values from the call. |
7378 CLI.Ins.clear(); | 7008 CLI.Ins.clear(); |
7379 Type *OrigRetTy = CLI.RetTy; | 7009 Type *OrigRetTy = CLI.RetTy; |
7380 SmallVector<EVT, 4> RetTys; | 7010 SmallVector<EVT, 4> RetTys; |
7381 SmallVector<uint64_t, 4> Offsets; | 7011 SmallVector<uint64_t, 4> Offsets; |
7382 ComputeValueVTs(*this, CLI.RetTy, RetTys, &Offsets); | 7012 auto &DL = CLI.DAG.getDataLayout(); |
7013 ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); | |
7383 | 7014 |
7384 SmallVector<ISD::OutputArg, 4> Outs; | 7015 SmallVector<ISD::OutputArg, 4> Outs; |
7385 GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this); | 7016 GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL); |
7386 | 7017 |
7387 bool CanLowerReturn = | 7018 bool CanLowerReturn = |
7388 this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), | 7019 this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), |
7389 CLI.IsVarArg, Outs, CLI.RetTy->getContext()); | 7020 CLI.IsVarArg, Outs, CLI.RetTy->getContext()); |
7390 | 7021 |
7392 int DemoteStackIdx = -100; | 7023 int DemoteStackIdx = -100; |
7393 if (!CanLowerReturn) { | 7024 if (!CanLowerReturn) { |
7394 // FIXME: equivalent assert? | 7025 // FIXME: equivalent assert? |
7395 // assert(!CS.hasInAllocaArgument() && | 7026 // assert(!CS.hasInAllocaArgument() && |
7396 // "sret demotion is incompatible with inalloca"); | 7027 // "sret demotion is incompatible with inalloca"); |
7397 uint64_t TySize = getDataLayout()->getTypeAllocSize(CLI.RetTy); | 7028 uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy); |
7398 unsigned Align = getDataLayout()->getPrefTypeAlignment(CLI.RetTy); | 7029 unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy); |
7399 MachineFunction &MF = CLI.DAG.getMachineFunction(); | 7030 MachineFunction &MF = CLI.DAG.getMachineFunction(); |
7400 DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); | 7031 DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); |
7401 Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); | 7032 Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); |
7402 | 7033 |
7403 DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy()); | 7034 DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL)); |
7404 ArgListEntry Entry; | 7035 ArgListEntry Entry; |
7405 Entry.Node = DemoteStackSlot; | 7036 Entry.Node = DemoteStackSlot; |
7406 Entry.Ty = StackSlotPtrType; | 7037 Entry.Ty = StackSlotPtrType; |
7407 Entry.isSExt = false; | 7038 Entry.isSExt = false; |
7408 Entry.isZExt = false; | 7039 Entry.isZExt = false; |
7412 Entry.isByVal = false; | 7043 Entry.isByVal = false; |
7413 Entry.isReturned = false; | 7044 Entry.isReturned = false; |
7414 Entry.Alignment = Align; | 7045 Entry.Alignment = Align; |
7415 CLI.getArgs().insert(CLI.getArgs().begin(), Entry); | 7046 CLI.getArgs().insert(CLI.getArgs().begin(), Entry); |
7416 CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); | 7047 CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); |
7048 | |
7049 // sret demotion isn't compatible with tail-calls, since the sret argument | |
7050 // points into the callers stack frame. | |
7051 CLI.IsTailCall = false; | |
7417 } else { | 7052 } else { |
7418 for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { | 7053 for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { |
7419 EVT VT = RetTys[I]; | 7054 EVT VT = RetTys[I]; |
7420 MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); | 7055 MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); |
7421 unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); | 7056 unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); |
7439 CLI.Outs.clear(); | 7074 CLI.Outs.clear(); |
7440 CLI.OutVals.clear(); | 7075 CLI.OutVals.clear(); |
7441 ArgListTy &Args = CLI.getArgs(); | 7076 ArgListTy &Args = CLI.getArgs(); |
7442 for (unsigned i = 0, e = Args.size(); i != e; ++i) { | 7077 for (unsigned i = 0, e = Args.size(); i != e; ++i) { |
7443 SmallVector<EVT, 4> ValueVTs; | 7078 SmallVector<EVT, 4> ValueVTs; |
7444 ComputeValueVTs(*this, Args[i].Ty, ValueVTs); | 7079 ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); |
7445 Type *FinalType = Args[i].Ty; | 7080 Type *FinalType = Args[i].Ty; |
7446 if (Args[i].isByVal) | 7081 if (Args[i].isByVal) |
7447 FinalType = cast<PointerType>(Args[i].Ty)->getElementType(); | 7082 FinalType = cast<PointerType>(Args[i].Ty)->getElementType(); |
7448 bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( | 7083 bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( |
7449 FinalType, CLI.CallConv, CLI.IsVarArg); | 7084 FinalType, CLI.CallConv, CLI.IsVarArg); |
7452 EVT VT = ValueVTs[Value]; | 7087 EVT VT = ValueVTs[Value]; |
7453 Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); | 7088 Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); |
7454 SDValue Op = SDValue(Args[i].Node.getNode(), | 7089 SDValue Op = SDValue(Args[i].Node.getNode(), |
7455 Args[i].Node.getResNo() + Value); | 7090 Args[i].Node.getResNo() + Value); |
7456 ISD::ArgFlagsTy Flags; | 7091 ISD::ArgFlagsTy Flags; |
7457 unsigned OriginalAlignment = getDataLayout()->getABITypeAlignment(ArgTy); | 7092 unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); |
7458 | 7093 |
7459 if (Args[i].isZExt) | 7094 if (Args[i].isZExt) |
7460 Flags.setZExt(); | 7095 Flags.setZExt(); |
7461 if (Args[i].isSExt) | 7096 if (Args[i].isSExt) |
7462 Flags.setSExt(); | 7097 Flags.setSExt(); |
7476 Flags.setByVal(); | 7111 Flags.setByVal(); |
7477 } | 7112 } |
7478 if (Args[i].isByVal || Args[i].isInAlloca) { | 7113 if (Args[i].isByVal || Args[i].isInAlloca) { |
7479 PointerType *Ty = cast<PointerType>(Args[i].Ty); | 7114 PointerType *Ty = cast<PointerType>(Args[i].Ty); |
7480 Type *ElementTy = Ty->getElementType(); | 7115 Type *ElementTy = Ty->getElementType(); |
7481 Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); | 7116 Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); |
7482 // For ByVal, alignment should come from FE. BE will guess if this | 7117 // For ByVal, alignment should come from FE. BE will guess if this |
7483 // info is not there but there are cases it cannot get right. | 7118 // info is not there but there are cases it cannot get right. |
7484 unsigned FrameAlign; | 7119 unsigned FrameAlign; |
7485 if (Args[i].Alignment) | 7120 if (Args[i].Alignment) |
7486 FrameAlign = Args[i].Alignment; | 7121 FrameAlign = Args[i].Alignment; |
7487 else | 7122 else |
7488 FrameAlign = getByValTypeAlignment(ElementTy); | 7123 FrameAlign = getByValTypeAlignment(ElementTy, DL); |
7489 Flags.setByValAlign(FrameAlign); | 7124 Flags.setByValAlign(FrameAlign); |
7490 } | 7125 } |
7491 if (Args[i].isNest) | 7126 if (Args[i].isNest) |
7492 Flags.setNest(); | 7127 Flags.setNest(); |
7493 if (NeedsRegBlock) { | 7128 if (NeedsRegBlock) |
7494 Flags.setInConsecutiveRegs(); | 7129 Flags.setInConsecutiveRegs(); |
7495 if (Value == NumValues - 1) | |
7496 Flags.setInConsecutiveRegsLast(); | |
7497 } | |
7498 Flags.setOrigAlign(OriginalAlignment); | 7130 Flags.setOrigAlign(OriginalAlignment); |
7499 | 7131 |
7500 MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); | 7132 MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); |
7501 unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); | 7133 unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); |
7502 SmallVector<SDValue, 4> Parts(NumParts); | 7134 SmallVector<SDValue, 4> Parts(NumParts); |
7541 MyFlags.Flags.setOrigAlign(1); | 7173 MyFlags.Flags.setOrigAlign(1); |
7542 | 7174 |
7543 CLI.Outs.push_back(MyFlags); | 7175 CLI.Outs.push_back(MyFlags); |
7544 CLI.OutVals.push_back(Parts[j]); | 7176 CLI.OutVals.push_back(Parts[j]); |
7545 } | 7177 } |
7178 | |
7179 if (NeedsRegBlock && Value == NumValues - 1) | |
7180 CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast(); | |
7546 } | 7181 } |
7547 } | 7182 } |
7548 | 7183 |
7549 SmallVector<SDValue, 4> InVals; | 7184 SmallVector<SDValue, 4> InVals; |
7550 CLI.Chain = LowerCall(CLI, InVals); | 7185 CLI.Chain = LowerCall(CLI, InVals); |
7578 // The instruction result is the result of loading from the | 7213 // The instruction result is the result of loading from the |
7579 // hidden sret parameter. | 7214 // hidden sret parameter. |
7580 SmallVector<EVT, 1> PVTs; | 7215 SmallVector<EVT, 1> PVTs; |
7581 Type *PtrRetTy = PointerType::getUnqual(OrigRetTy); | 7216 Type *PtrRetTy = PointerType::getUnqual(OrigRetTy); |
7582 | 7217 |
7583 ComputeValueVTs(*this, PtrRetTy, PVTs); | 7218 ComputeValueVTs(*this, DL, PtrRetTy, PVTs); |
7584 assert(PVTs.size() == 1 && "Pointers should fit in one register"); | 7219 assert(PVTs.size() == 1 && "Pointers should fit in one register"); |
7585 EVT PtrVT = PVTs[0]; | 7220 EVT PtrVT = PVTs[0]; |
7586 | 7221 |
7587 unsigned NumValues = RetTys.size(); | 7222 unsigned NumValues = RetTys.size(); |
7588 ReturnValues.resize(NumValues); | 7223 ReturnValues.resize(NumValues); |
7589 SmallVector<SDValue, 4> Chains(NumValues); | 7224 SmallVector<SDValue, 4> Chains(NumValues); |
7590 | 7225 |
7591 for (unsigned i = 0; i < NumValues; ++i) { | 7226 for (unsigned i = 0; i < NumValues; ++i) { |
7592 SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, | 7227 SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, |
7593 CLI.DAG.getConstant(Offsets[i], PtrVT)); | 7228 CLI.DAG.getConstant(Offsets[i], CLI.DL, |
7229 PtrVT)); | |
7594 SDValue L = CLI.DAG.getLoad( | 7230 SDValue L = CLI.DAG.getLoad( |
7595 RetTys[i], CLI.DL, CLI.Chain, Add, | 7231 RetTys[i], CLI.DL, CLI.Chain, Add, |
7596 MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, | 7232 MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), |
7597 false, false, 1); | 7233 DemoteStackIdx, Offsets[i]), |
7234 false, false, false, 1); | |
7598 ReturnValues[i] = L; | 7235 ReturnValues[i] = L; |
7599 Chains[i] = L.getValue(1); | 7236 Chains[i] = L.getValue(1); |
7600 } | 7237 } |
7601 | 7238 |
7602 CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains); | 7239 CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains); |
7651 cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && | 7288 cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && |
7652 "Copy from a reg to the same reg!"); | 7289 "Copy from a reg to the same reg!"); |
7653 assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); | 7290 assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); |
7654 | 7291 |
7655 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 7292 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
7656 RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); | 7293 RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, |
7294 V->getType()); | |
7657 SDValue Chain = DAG.getEntryNode(); | 7295 SDValue Chain = DAG.getEntryNode(); |
7658 | 7296 |
7659 ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == | 7297 ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == |
7660 FuncInfo.PreferredExtendType.end()) | 7298 FuncInfo.PreferredExtendType.end()) |
7661 ? ISD::ANY_EXTEND | 7299 ? ISD::ANY_EXTEND |
7684 } | 7322 } |
7685 | 7323 |
7686 void SelectionDAGISel::LowerArguments(const Function &F) { | 7324 void SelectionDAGISel::LowerArguments(const Function &F) { |
7687 SelectionDAG &DAG = SDB->DAG; | 7325 SelectionDAG &DAG = SDB->DAG; |
7688 SDLoc dl = SDB->getCurSDLoc(); | 7326 SDLoc dl = SDB->getCurSDLoc(); |
7689 const DataLayout *DL = TLI->getDataLayout(); | 7327 const DataLayout &DL = DAG.getDataLayout(); |
7690 SmallVector<ISD::InputArg, 16> Ins; | 7328 SmallVector<ISD::InputArg, 16> Ins; |
7691 | 7329 |
7692 if (!FuncInfo->CanLowerReturn) { | 7330 if (!FuncInfo->CanLowerReturn) { |
7693 // Put in an sret pointer parameter before all the other parameters. | 7331 // Put in an sret pointer parameter before all the other parameters. |
7694 SmallVector<EVT, 1> ValueVTs; | 7332 SmallVector<EVT, 1> ValueVTs; |
7695 ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); | 7333 ComputeValueVTs(*TLI, DAG.getDataLayout(), |
7334 PointerType::getUnqual(F.getReturnType()), ValueVTs); | |
7696 | 7335 |
7697 // NOTE: Assuming that a pointer will never break down to more than one VT | 7336 // NOTE: Assuming that a pointer will never break down to more than one VT |
7698 // or one register. | 7337 // or one register. |
7699 ISD::ArgFlagsTy Flags; | 7338 ISD::ArgFlagsTy Flags; |
7700 Flags.setSRet(); | 7339 Flags.setSRet(); |
7707 // Set up the incoming argument description vector. | 7346 // Set up the incoming argument description vector. |
7708 unsigned Idx = 1; | 7347 unsigned Idx = 1; |
7709 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); | 7348 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); |
7710 I != E; ++I, ++Idx) { | 7349 I != E; ++I, ++Idx) { |
7711 SmallVector<EVT, 4> ValueVTs; | 7350 SmallVector<EVT, 4> ValueVTs; |
7712 ComputeValueVTs(*TLI, I->getType(), ValueVTs); | 7351 ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); |
7713 bool isArgValueUsed = !I->use_empty(); | 7352 bool isArgValueUsed = !I->use_empty(); |
7714 unsigned PartBase = 0; | 7353 unsigned PartBase = 0; |
7715 Type *FinalType = I->getType(); | 7354 Type *FinalType = I->getType(); |
7716 if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) | 7355 if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) |
7717 FinalType = cast<PointerType>(FinalType)->getElementType(); | 7356 FinalType = cast<PointerType>(FinalType)->getElementType(); |
7720 for (unsigned Value = 0, NumValues = ValueVTs.size(); | 7359 for (unsigned Value = 0, NumValues = ValueVTs.size(); |
7721 Value != NumValues; ++Value) { | 7360 Value != NumValues; ++Value) { |
7722 EVT VT = ValueVTs[Value]; | 7361 EVT VT = ValueVTs[Value]; |
7723 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); | 7362 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); |
7724 ISD::ArgFlagsTy Flags; | 7363 ISD::ArgFlagsTy Flags; |
7725 unsigned OriginalAlignment = DL->getABITypeAlignment(ArgTy); | 7364 unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); |
7726 | 7365 |
7727 if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) | 7366 if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) |
7728 Flags.setZExt(); | 7367 Flags.setZExt(); |
7729 if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) | 7368 if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) |
7730 Flags.setSExt(); | 7369 Flags.setSExt(); |
7744 Flags.setByVal(); | 7383 Flags.setByVal(); |
7745 } | 7384 } |
7746 if (Flags.isByVal() || Flags.isInAlloca()) { | 7385 if (Flags.isByVal() || Flags.isInAlloca()) { |
7747 PointerType *Ty = cast<PointerType>(I->getType()); | 7386 PointerType *Ty = cast<PointerType>(I->getType()); |
7748 Type *ElementTy = Ty->getElementType(); | 7387 Type *ElementTy = Ty->getElementType(); |
7749 Flags.setByValSize(DL->getTypeAllocSize(ElementTy)); | 7388 Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); |
7750 // For ByVal, alignment should be passed from FE. BE will guess if | 7389 // For ByVal, alignment should be passed from FE. BE will guess if |
7751 // this info is not there but there are cases it cannot get right. | 7390 // this info is not there but there are cases it cannot get right. |
7752 unsigned FrameAlign; | 7391 unsigned FrameAlign; |
7753 if (F.getParamAlignment(Idx)) | 7392 if (F.getParamAlignment(Idx)) |
7754 FrameAlign = F.getParamAlignment(Idx); | 7393 FrameAlign = F.getParamAlignment(Idx); |
7755 else | 7394 else |
7756 FrameAlign = TLI->getByValTypeAlignment(ElementTy); | 7395 FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); |
7757 Flags.setByValAlign(FrameAlign); | 7396 Flags.setByValAlign(FrameAlign); |
7758 } | 7397 } |
7759 if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) | 7398 if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) |
7760 Flags.setNest(); | 7399 Flags.setNest(); |
7761 if (NeedsRegBlock) { | 7400 if (NeedsRegBlock) |
7762 Flags.setInConsecutiveRegs(); | 7401 Flags.setInConsecutiveRegs(); |
7763 if (Value == NumValues - 1) | |
7764 Flags.setInConsecutiveRegsLast(); | |
7765 } | |
7766 Flags.setOrigAlign(OriginalAlignment); | 7402 Flags.setOrigAlign(OriginalAlignment); |
7767 | 7403 |
7768 MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); | 7404 MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); |
7769 unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); | 7405 unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); |
7770 for (unsigned i = 0; i != NumRegs; ++i) { | 7406 for (unsigned i = 0; i != NumRegs; ++i) { |
7775 // if it isn't first piece, alignment must be 1 | 7411 // if it isn't first piece, alignment must be 1 |
7776 else if (i > 0) | 7412 else if (i > 0) |
7777 MyFlags.Flags.setOrigAlign(1); | 7413 MyFlags.Flags.setOrigAlign(1); |
7778 Ins.push_back(MyFlags); | 7414 Ins.push_back(MyFlags); |
7779 } | 7415 } |
7416 if (NeedsRegBlock && Value == NumValues - 1) | |
7417 Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast(); | |
7780 PartBase += VT.getStoreSize(); | 7418 PartBase += VT.getStoreSize(); |
7781 } | 7419 } |
7782 } | 7420 } |
7783 | 7421 |
7784 // Call the target to set up the argument values. | 7422 // Call the target to set up the argument values. |
7808 Idx = 1; | 7446 Idx = 1; |
7809 if (!FuncInfo->CanLowerReturn) { | 7447 if (!FuncInfo->CanLowerReturn) { |
7810 // Create a virtual register for the sret pointer, and put in a copy | 7448 // Create a virtual register for the sret pointer, and put in a copy |
7811 // from the sret argument into it. | 7449 // from the sret argument into it. |
7812 SmallVector<EVT, 1> ValueVTs; | 7450 SmallVector<EVT, 1> ValueVTs; |
7813 ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); | 7451 ComputeValueVTs(*TLI, DAG.getDataLayout(), |
7452 PointerType::getUnqual(F.getReturnType()), ValueVTs); | |
7814 MVT VT = ValueVTs[0].getSimpleVT(); | 7453 MVT VT = ValueVTs[0].getSimpleVT(); |
7815 MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); | 7454 MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); |
7816 ISD::NodeType AssertOp = ISD::DELETED_NODE; | 7455 ISD::NodeType AssertOp = ISD::DELETED_NODE; |
7817 SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, | 7456 SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, |
7818 RegVT, VT, nullptr, AssertOp); | 7457 RegVT, VT, nullptr, AssertOp); |
7832 | 7471 |
7833 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; | 7472 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; |
7834 ++I, ++Idx) { | 7473 ++I, ++Idx) { |
7835 SmallVector<SDValue, 4> ArgValues; | 7474 SmallVector<SDValue, 4> ArgValues; |
7836 SmallVector<EVT, 4> ValueVTs; | 7475 SmallVector<EVT, 4> ValueVTs; |
7837 ComputeValueVTs(*TLI, I->getType(), ValueVTs); | 7476 ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); |
7838 unsigned NumValues = ValueVTs.size(); | 7477 unsigned NumValues = ValueVTs.size(); |
7839 | 7478 |
7840 // If this argument is unused then remember its value. It is used to generate | 7479 // If this argument is unused then remember its value. It is used to generate |
7841 // debugging information. | 7480 // debugging information. |
7842 if (I->use_empty() && NumValues) { | 7481 if (I->use_empty() && NumValues) { |
7909 } | 7548 } |
7910 | 7549 |
7911 assert(i == InVals.size() && "Argument register count mismatch!"); | 7550 assert(i == InVals.size() && "Argument register count mismatch!"); |
7912 | 7551 |
7913 // Finally, if the target has anything special to do, allow it to do so. | 7552 // Finally, if the target has anything special to do, allow it to do so. |
7914 // FIXME: this should insert code into the DAG! | |
7915 EmitFunctionEntryCode(); | 7553 EmitFunctionEntryCode(); |
7916 } | 7554 } |
7917 | 7555 |
7918 /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to | 7556 /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to |
7919 /// ensure constants are generated when needed. Remember the virtual registers | 7557 /// ensure constants are generated when needed. Remember the virtual registers |
7926 SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { | 7564 SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { |
7927 const TerminatorInst *TI = LLVMBB->getTerminator(); | 7565 const TerminatorInst *TI = LLVMBB->getTerminator(); |
7928 | 7566 |
7929 SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; | 7567 SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; |
7930 | 7568 |
7931 // Check successor nodes' PHI nodes that expect a constant to be available | 7569 // Check PHI nodes in successors that expect a value to be available from this |
7932 // from this block. | 7570 // block. |
7933 for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { | 7571 for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { |
7934 const BasicBlock *SuccBB = TI->getSuccessor(succ); | 7572 const BasicBlock *SuccBB = TI->getSuccessor(succ); |
7935 if (!isa<PHINode>(SuccBB->begin())) continue; | 7573 if (!isa<PHINode>(SuccBB->begin())) continue; |
7936 MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; | 7574 MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; |
7937 | 7575 |
7980 | 7618 |
7981 // Remember that this register needs to added to the machine PHI node as | 7619 // Remember that this register needs to added to the machine PHI node as |
7982 // the input for this MBB. | 7620 // the input for this MBB. |
7983 SmallVector<EVT, 4> ValueVTs; | 7621 SmallVector<EVT, 4> ValueVTs; |
7984 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | 7622 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
7985 ComputeValueVTs(TLI, PN->getType(), ValueVTs); | 7623 ComputeValueVTs(TLI, DAG.getDataLayout(), PN->getType(), ValueVTs); |
7986 for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { | 7624 for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { |
7987 EVT VT = ValueVTs[vti]; | 7625 EVT VT = ValueVTs[vti]; |
7988 unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); | 7626 unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); |
7989 for (unsigned i = 0, e = NumRegisters; i != e; ++i) | 7627 for (unsigned i = 0, e = NumRegisters; i != e; ++i) |
7990 FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); | 7628 FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); |
8014 // Add it as a successor of ParentMBB. | 7652 // Add it as a successor of ParentMBB. |
8015 ParentMBB->addSuccessor( | 7653 ParentMBB->addSuccessor( |
8016 SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely)); | 7654 SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely)); |
8017 return SuccMBB; | 7655 return SuccMBB; |
8018 } | 7656 } |
7657 | |
7658 MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { | |
7659 MachineFunction::iterator I = MBB; | |
7660 if (++I == FuncInfo.MF->end()) | |
7661 return nullptr; | |
7662 return I; | |
7663 } | |
7664 | |
7665 /// During lowering new call nodes can be created (such as memset, etc.). | |
7666 /// Those will become new roots of the current DAG, but complications arise | |
7667 /// when they are tail calls. In such cases, the call lowering will update | |
7668 /// the root, but the builder still needs to know that a tail call has been | |
7669 /// lowered in order to avoid generating an additional return. | |
7670 void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { | |
7671 // If the node is null, we do have a tail call. | |
7672 if (MaybeTC.getNode() != nullptr) | |
7673 DAG.setRoot(MaybeTC); | |
7674 else | |
7675 HasTailCall = true; | |
7676 } | |
7677 | |
7678 bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, | |
7679 unsigned *TotalCases, unsigned First, | |
7680 unsigned Last) { | |
7681 assert(Last >= First); | |
7682 assert(TotalCases[Last] >= TotalCases[First]); | |
7683 | |
7684 APInt LowCase = Clusters[First].Low->getValue(); | |
7685 APInt HighCase = Clusters[Last].High->getValue(); | |
7686 assert(LowCase.getBitWidth() == HighCase.getBitWidth()); | |
7687 | |
7688 // FIXME: A range of consecutive cases has 100% density, but only requires one | |
7689 // comparison to lower. We should discriminate against such consecutive ranges | |
7690 // in jump tables. | |
7691 | |
7692 uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100); | |
7693 uint64_t Range = Diff + 1; | |
7694 | |
7695 uint64_t NumCases = | |
7696 TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]); | |
7697 | |
7698 assert(NumCases < UINT64_MAX / 100); | |
7699 assert(Range >= NumCases); | |
7700 | |
7701 return NumCases * 100 >= Range * MinJumpTableDensity; | |
7702 } | |
7703 | |
7704 static inline bool areJTsAllowed(const TargetLowering &TLI) { | |
7705 return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || | |
7706 TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); | |
7707 } | |
7708 | |
7709 bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, | |
7710 unsigned First, unsigned Last, | |
7711 const SwitchInst *SI, | |
7712 MachineBasicBlock *DefaultMBB, | |
7713 CaseCluster &JTCluster) { | |
7714 assert(First <= Last); | |
7715 | |
7716 uint32_t Weight = 0; | |
7717 unsigned NumCmps = 0; | |
7718 std::vector<MachineBasicBlock*> Table; | |
7719 DenseMap<MachineBasicBlock*, uint32_t> JTWeights; | |
7720 for (unsigned I = First; I <= Last; ++I) { | |
7721 assert(Clusters[I].Kind == CC_Range); | |
7722 Weight += Clusters[I].Weight; | |
7723 assert(Weight >= Clusters[I].Weight && "Weight overflow!"); | |
7724 APInt Low = Clusters[I].Low->getValue(); | |
7725 APInt High = Clusters[I].High->getValue(); | |
7726 NumCmps += (Low == High) ? 1 : 2; | |
7727 if (I != First) { | |
7728 // Fill the gap between this and the previous cluster. | |
7729 APInt PreviousHigh = Clusters[I - 1].High->getValue(); | |
7730 assert(PreviousHigh.slt(Low)); | |
7731 uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1; | |
7732 for (uint64_t J = 0; J < Gap; J++) | |
7733 Table.push_back(DefaultMBB); | |
7734 } | |
7735 uint64_t ClusterSize = (High - Low).getLimitedValue() + 1; | |
7736 for (uint64_t J = 0; J < ClusterSize; ++J) | |
7737 Table.push_back(Clusters[I].MBB); | |
7738 JTWeights[Clusters[I].MBB] += Clusters[I].Weight; | |
7739 } | |
7740 | |
7741 unsigned NumDests = JTWeights.size(); | |
7742 if (isSuitableForBitTests(NumDests, NumCmps, | |
7743 Clusters[First].Low->getValue(), | |
7744 Clusters[Last].High->getValue())) { | |
7745 // Clusters[First..Last] should be lowered as bit tests instead. | |
7746 return false; | |
7747 } | |
7748 | |
7749 // Create the MBB that will load from and jump through the table. | |
7750 // Note: We create it here, but it's not inserted into the function yet. | |
7751 MachineFunction *CurMF = FuncInfo.MF; | |
7752 MachineBasicBlock *JumpTableMBB = | |
7753 CurMF->CreateMachineBasicBlock(SI->getParent()); | |
7754 | |
7755 // Add successors. Note: use table order for determinism. | |
7756 SmallPtrSet<MachineBasicBlock *, 8> Done; | |
7757 for (MachineBasicBlock *Succ : Table) { | |
7758 if (Done.count(Succ)) | |
7759 continue; | |
7760 addSuccessorWithWeight(JumpTableMBB, Succ, JTWeights[Succ]); | |
7761 Done.insert(Succ); | |
7762 } | |
7763 | |
7764 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | |
7765 unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding()) | |
7766 ->createJumpTableIndex(Table); | |
7767 | |
7768 // Set up the jump table info. | |
7769 JumpTable JT(-1U, JTI, JumpTableMBB, nullptr); | |
7770 JumpTableHeader JTH(Clusters[First].Low->getValue(), | |
7771 Clusters[Last].High->getValue(), SI->getCondition(), | |
7772 nullptr, false); | |
7773 JTCases.emplace_back(std::move(JTH), std::move(JT)); | |
7774 | |
7775 JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High, | |
7776 JTCases.size() - 1, Weight); | |
7777 return true; | |
7778 } | |
7779 | |
7780 void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, | |
7781 const SwitchInst *SI, | |
7782 MachineBasicBlock *DefaultMBB) { | |
7783 #ifndef NDEBUG | |
7784 // Clusters must be non-empty, sorted, and only contain Range clusters. | |
7785 assert(!Clusters.empty()); | |
7786 for (CaseCluster &C : Clusters) | |
7787 assert(C.Kind == CC_Range); | |
7788 for (unsigned i = 1, e = Clusters.size(); i < e; ++i) | |
7789 assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue())); | |
7790 #endif | |
7791 | |
7792 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | |
7793 if (!areJTsAllowed(TLI)) | |
7794 return; | |
7795 | |
7796 const int64_t N = Clusters.size(); | |
7797 const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries(); | |
7798 | |
7799 // TotalCases[i]: Total nbr of cases in Clusters[0..i]. | |
7800 SmallVector<unsigned, 8> TotalCases(N); | |
7801 | |
7802 for (unsigned i = 0; i < N; ++i) { | |
7803 APInt Hi = Clusters[i].High->getValue(); | |
7804 APInt Lo = Clusters[i].Low->getValue(); | |
7805 TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; | |
7806 if (i != 0) | |
7807 TotalCases[i] += TotalCases[i - 1]; | |
7808 } | |
7809 | |
7810 if (N >= MinJumpTableSize && isDense(Clusters, &TotalCases[0], 0, N - 1)) { | |
7811 // Cheap case: the whole range might be suitable for jump table. | |
7812 CaseCluster JTCluster; | |
7813 if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { | |
7814 Clusters[0] = JTCluster; | |
7815 Clusters.resize(1); | |
7816 return; | |
7817 } | |
7818 } | |
7819 | |
7820 // The algorithm below is not suitable for -O0. | |
7821 if (TM.getOptLevel() == CodeGenOpt::None) | |
7822 return; | |
7823 | |
7824 // Split Clusters into minimum number of dense partitions. The algorithm uses | |
7825 // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code | |
7826 // for the Case Statement'" (1994), but builds the MinPartitions array in | |
7827 // reverse order to make it easier to reconstruct the partitions in ascending | |
7828 // order. In the choice between two optimal partitionings, it picks the one | |
7829 // which yields more jump tables. | |
7830 | |
7831 // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. | |
7832 SmallVector<unsigned, 8> MinPartitions(N); | |
7833 // LastElement[i] is the last element of the partition starting at i. | |
7834 SmallVector<unsigned, 8> LastElement(N); | |
7835 // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1]. | |
7836 SmallVector<unsigned, 8> NumTables(N); | |
7837 | |
7838 // Base case: There is only one way to partition Clusters[N-1]. | |
7839 MinPartitions[N - 1] = 1; | |
7840 LastElement[N - 1] = N - 1; | |
7841 assert(MinJumpTableSize > 1); | |
7842 NumTables[N - 1] = 0; | |
7843 | |
7844 // Note: loop indexes are signed to avoid underflow. | |
7845 for (int64_t i = N - 2; i >= 0; i--) { | |
7846 // Find optimal partitioning of Clusters[i..N-1]. | |
7847 // Baseline: Put Clusters[i] into a partition on its own. | |
7848 MinPartitions[i] = MinPartitions[i + 1] + 1; | |
7849 LastElement[i] = i; | |
7850 NumTables[i] = NumTables[i + 1]; | |
7851 | |
7852 // Search for a solution that results in fewer partitions. | |
7853 for (int64_t j = N - 1; j > i; j--) { | |
7854 // Try building a partition from Clusters[i..j]. | |
7855 if (isDense(Clusters, &TotalCases[0], i, j)) { | |
7856 unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); | |
7857 bool IsTable = j - i + 1 >= MinJumpTableSize; | |
7858 unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]); | |
7859 | |
7860 // If this j leads to fewer partitions, or same number of partitions | |
7861 // with more lookup tables, it is a better partitioning. | |
7862 if (NumPartitions < MinPartitions[i] || | |
7863 (NumPartitions == MinPartitions[i] && Tables > NumTables[i])) { | |
7864 MinPartitions[i] = NumPartitions; | |
7865 LastElement[i] = j; | |
7866 NumTables[i] = Tables; | |
7867 } | |
7868 } | |
7869 } | |
7870 } | |
7871 | |
7872 // Iterate over the partitions, replacing some with jump tables in-place. | |
7873 unsigned DstIndex = 0; | |
7874 for (unsigned First = 0, Last; First < N; First = Last + 1) { | |
7875 Last = LastElement[First]; | |
7876 assert(Last >= First); | |
7877 assert(DstIndex <= First); | |
7878 unsigned NumClusters = Last - First + 1; | |
7879 | |
7880 CaseCluster JTCluster; | |
7881 if (NumClusters >= MinJumpTableSize && | |
7882 buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) { | |
7883 Clusters[DstIndex++] = JTCluster; | |
7884 } else { | |
7885 for (unsigned I = First; I <= Last; ++I) | |
7886 std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I])); | |
7887 } | |
7888 } | |
7889 Clusters.resize(DstIndex); | |
7890 } | |
7891 | |
7892 bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) { | |
7893 // FIXME: Using the pointer type doesn't seem ideal. | |
7894 uint64_t BW = DAG.getDataLayout().getPointerSizeInBits(); | |
7895 uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; | |
7896 return Range <= BW; | |
7897 } | |
7898 | |
7899 bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests, | |
7900 unsigned NumCmps, | |
7901 const APInt &Low, | |
7902 const APInt &High) { | |
7903 // FIXME: I don't think NumCmps is the correct metric: a single case and a | |
7904 // range of cases both require only one branch to lower. Just looking at the | |
7905 // number of clusters and destinations should be enough to decide whether to | |
7906 // build bit tests. | |
7907 | |
7908 // To lower a range with bit tests, the range must fit the bitwidth of a | |
7909 // machine word. | |
7910 if (!rangeFitsInWord(Low, High)) | |
7911 return false; | |
7912 | |
7913 // Decide whether it's profitable to lower this range with bit tests. Each | |
7914 // destination requires a bit test and branch, and there is an overall range | |
7915 // check branch. For a small number of clusters, separate comparisons might be | |
7916 // cheaper, and for many destinations, splitting the range might be better. | |
7917 return (NumDests == 1 && NumCmps >= 3) || | |
7918 (NumDests == 2 && NumCmps >= 5) || | |
7919 (NumDests == 3 && NumCmps >= 6); | |
7920 } | |
7921 | |
7922 bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, | |
7923 unsigned First, unsigned Last, | |
7924 const SwitchInst *SI, | |
7925 CaseCluster &BTCluster) { | |
7926 assert(First <= Last); | |
7927 if (First == Last) | |
7928 return false; | |
7929 | |
7930 BitVector Dests(FuncInfo.MF->getNumBlockIDs()); | |
7931 unsigned NumCmps = 0; | |
7932 for (int64_t I = First; I <= Last; ++I) { | |
7933 assert(Clusters[I].Kind == CC_Range); | |
7934 Dests.set(Clusters[I].MBB->getNumber()); | |
7935 NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2; | |
7936 } | |
7937 unsigned NumDests = Dests.count(); | |
7938 | |
7939 APInt Low = Clusters[First].Low->getValue(); | |
7940 APInt High = Clusters[Last].High->getValue(); | |
7941 assert(Low.slt(High)); | |
7942 | |
7943 if (!isSuitableForBitTests(NumDests, NumCmps, Low, High)) | |
7944 return false; | |
7945 | |
7946 APInt LowBound; | |
7947 APInt CmpRange; | |
7948 | |
7949 const int BitWidth = DAG.getTargetLoweringInfo() | |
7950 .getPointerTy(DAG.getDataLayout()) | |
7951 .getSizeInBits(); | |
7952 assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); | |
7953 | |
7954 // Check if the clusters cover a contiguous range such that no value in the | |
7955 // range will jump to the default statement. | |
7956 bool ContiguousRange = true; | |
7957 for (int64_t I = First + 1; I <= Last; ++I) { | |
7958 if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) { | |
7959 ContiguousRange = false; | |
7960 break; | |
7961 } | |
7962 } | |
7963 | |
7964 if (Low.isStrictlyPositive() && High.slt(BitWidth)) { | |
7965 // Optimize the case where all the case values fit in a word without having | |
7966 // to subtract minValue. In this case, we can optimize away the subtraction. | |
7967 LowBound = APInt::getNullValue(Low.getBitWidth()); | |
7968 CmpRange = High; | |
7969 ContiguousRange = false; | |
7970 } else { | |
7971 LowBound = Low; | |
7972 CmpRange = High - Low; | |
7973 } | |
7974 | |
7975 CaseBitsVector CBV; | |
7976 uint32_t TotalWeight = 0; | |
7977 for (unsigned i = First; i <= Last; ++i) { | |
7978 // Find the CaseBits for this destination. | |
7979 unsigned j; | |
7980 for (j = 0; j < CBV.size(); ++j) | |
7981 if (CBV[j].BB == Clusters[i].MBB) | |
7982 break; | |
7983 if (j == CBV.size()) | |
7984 CBV.push_back(CaseBits(0, Clusters[i].MBB, 0, 0)); | |
7985 CaseBits *CB = &CBV[j]; | |
7986 | |
7987 // Update Mask, Bits and ExtraWeight. | |
7988 uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue(); | |
7989 uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue(); | |
7990 assert(Hi >= Lo && Hi < 64 && "Invalid bit case!"); | |
7991 CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo; | |
7992 CB->Bits += Hi - Lo + 1; | |
7993 CB->ExtraWeight += Clusters[i].Weight; | |
7994 TotalWeight += Clusters[i].Weight; | |
7995 assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!"); | |
7996 } | |
7997 | |
7998 BitTestInfo BTI; | |
7999 std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { | |
8000 // Sort by weight first, number of bits second. | |
8001 if (a.ExtraWeight != b.ExtraWeight) | |
8002 return a.ExtraWeight > b.ExtraWeight; | |
8003 return a.Bits > b.Bits; | |
8004 }); | |
8005 | |
8006 for (auto &CB : CBV) { | |
8007 MachineBasicBlock *BitTestBB = | |
8008 FuncInfo.MF->CreateMachineBasicBlock(SI->getParent()); | |
8009 BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight)); | |
8010 } | |
8011 BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), | |
8012 SI->getCondition(), -1U, MVT::Other, false, | |
8013 ContiguousRange, nullptr, nullptr, std::move(BTI), | |
8014 TotalWeight); | |
8015 | |
8016 BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, | |
8017 BitTestCases.size() - 1, TotalWeight); | |
8018 return true; | |
8019 } | |
8020 | |
8021 void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, | |
8022 const SwitchInst *SI) { | |
8023 // Partition Clusters into as few subsets as possible, where each subset has a | |
8024 // range that fits in a machine word and has <= 3 unique destinations. | |
8025 | |
8026 #ifndef NDEBUG | |
8027 // Clusters must be sorted and contain Range or JumpTable clusters. | |
8028 assert(!Clusters.empty()); | |
8029 assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable); | |
8030 for (const CaseCluster &C : Clusters) | |
8031 assert(C.Kind == CC_Range || C.Kind == CC_JumpTable); | |
8032 for (unsigned i = 1; i < Clusters.size(); ++i) | |
8033 assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue())); | |
8034 #endif | |
8035 | |
8036 // The algorithm below is not suitable for -O0. | |
8037 if (TM.getOptLevel() == CodeGenOpt::None) | |
8038 return; | |
8039 | |
8040 // If target does not have legal shift left, do not emit bit tests at all. | |
8041 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | |
8042 EVT PTy = TLI.getPointerTy(DAG.getDataLayout()); | |
8043 if (!TLI.isOperationLegal(ISD::SHL, PTy)) | |
8044 return; | |
8045 | |
8046 int BitWidth = PTy.getSizeInBits(); | |
8047 const int64_t N = Clusters.size(); | |
8048 | |
8049 // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. | |
8050 SmallVector<unsigned, 8> MinPartitions(N); | |
8051 // LastElement[i] is the last element of the partition starting at i. | |
8052 SmallVector<unsigned, 8> LastElement(N); | |
8053 | |
8054 // FIXME: This might not be the best algorithm for finding bit test clusters. | |
8055 | |
8056 // Base case: There is only one way to partition Clusters[N-1]. | |
8057 MinPartitions[N - 1] = 1; | |
8058 LastElement[N - 1] = N - 1; | |
8059 | |
8060 // Note: loop indexes are signed to avoid underflow. | |
8061 for (int64_t i = N - 2; i >= 0; --i) { | |
8062 // Find optimal partitioning of Clusters[i..N-1]. | |
8063 // Baseline: Put Clusters[i] into a partition on its own. | |
8064 MinPartitions[i] = MinPartitions[i + 1] + 1; | |
8065 LastElement[i] = i; | |
8066 | |
8067 // Search for a solution that results in fewer partitions. | |
8068 // Note: the search is limited by BitWidth, reducing time complexity. | |
8069 for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) { | |
8070 // Try building a partition from Clusters[i..j]. | |
8071 | |
8072 // Check the range. | |
8073 if (!rangeFitsInWord(Clusters[i].Low->getValue(), | |
8074 Clusters[j].High->getValue())) | |
8075 continue; | |
8076 | |
8077 // Check nbr of destinations and cluster types. | |
8078 // FIXME: This works, but doesn't seem very efficient. | |
8079 bool RangesOnly = true; | |
8080 BitVector Dests(FuncInfo.MF->getNumBlockIDs()); | |
8081 for (int64_t k = i; k <= j; k++) { | |
8082 if (Clusters[k].Kind != CC_Range) { | |
8083 RangesOnly = false; | |
8084 break; | |
8085 } | |
8086 Dests.set(Clusters[k].MBB->getNumber()); | |
8087 } | |
8088 if (!RangesOnly || Dests.count() > 3) | |
8089 break; | |
8090 | |
8091 // Check if it's a better partition. | |
8092 unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); | |
8093 if (NumPartitions < MinPartitions[i]) { | |
8094 // Found a better partition. | |
8095 MinPartitions[i] = NumPartitions; | |
8096 LastElement[i] = j; | |
8097 } | |
8098 } | |
8099 } | |
8100 | |
8101 // Iterate over the partitions, replacing with bit-test clusters in-place. | |
8102 unsigned DstIndex = 0; | |
8103 for (unsigned First = 0, Last; First < N; First = Last + 1) { | |
8104 Last = LastElement[First]; | |
8105 assert(First <= Last); | |
8106 assert(DstIndex <= First); | |
8107 | |
8108 CaseCluster BitTestCluster; | |
8109 if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) { | |
8110 Clusters[DstIndex++] = BitTestCluster; | |
8111 } else { | |
8112 size_t NumClusters = Last - First + 1; | |
8113 std::memmove(&Clusters[DstIndex], &Clusters[First], | |
8114 sizeof(Clusters[0]) * NumClusters); | |
8115 DstIndex += NumClusters; | |
8116 } | |
8117 } | |
8118 Clusters.resize(DstIndex); | |
8119 } | |
8120 | |
8121 void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, | |
8122 MachineBasicBlock *SwitchMBB, | |
8123 MachineBasicBlock *DefaultMBB) { | |
8124 MachineFunction *CurMF = FuncInfo.MF; | |
8125 MachineBasicBlock *NextMBB = nullptr; | |
8126 MachineFunction::iterator BBI = W.MBB; | |
8127 if (++BBI != FuncInfo.MF->end()) | |
8128 NextMBB = BBI; | |
8129 | |
8130 unsigned Size = W.LastCluster - W.FirstCluster + 1; | |
8131 | |
8132 BranchProbabilityInfo *BPI = FuncInfo.BPI; | |
8133 | |
8134 if (Size == 2 && W.MBB == SwitchMBB) { | |
8135 // If any two of the cases has the same destination, and if one value | |
8136 // is the same as the other, but has one bit unset that the other has set, | |
8137 // use bit manipulation to do two compares at once. For example: | |
8138 // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" | |
8139 // TODO: This could be extended to merge any 2 cases in switches with 3 | |
8140 // cases. | |
8141 // TODO: Handle cases where W.CaseBB != SwitchBB. | |
8142 CaseCluster &Small = *W.FirstCluster; | |
8143 CaseCluster &Big = *W.LastCluster; | |
8144 | |
8145 if (Small.Low == Small.High && Big.Low == Big.High && | |
8146 Small.MBB == Big.MBB) { | |
8147 const APInt &SmallValue = Small.Low->getValue(); | |
8148 const APInt &BigValue = Big.Low->getValue(); | |
8149 | |
8150 // Check that there is only one bit different. | |
8151 APInt CommonBit = BigValue ^ SmallValue; | |
8152 if (CommonBit.isPowerOf2()) { | |
8153 SDValue CondLHS = getValue(Cond); | |
8154 EVT VT = CondLHS.getValueType(); | |
8155 SDLoc DL = getCurSDLoc(); | |
8156 | |
8157 SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, | |
8158 DAG.getConstant(CommonBit, DL, VT)); | |
8159 SDValue Cond = DAG.getSetCC( | |
8160 DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT), | |
8161 ISD::SETEQ); | |
8162 | |
8163 // Update successor info. | |
8164 // Both Small and Big will jump to Small.BB, so we sum up the weights. | |
8165 addSuccessorWithWeight(SwitchMBB, Small.MBB, Small.Weight + Big.Weight); | |
8166 addSuccessorWithWeight( | |
8167 SwitchMBB, DefaultMBB, | |
8168 // The default destination is the first successor in IR. | |
8169 BPI ? BPI->getEdgeWeight(SwitchMBB->getBasicBlock(), (unsigned)0) | |
8170 : 0); | |
8171 | |
8172 // Insert the true branch. | |
8173 SDValue BrCond = | |
8174 DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond, | |
8175 DAG.getBasicBlock(Small.MBB)); | |
8176 // Insert the false branch. | |
8177 BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, | |
8178 DAG.getBasicBlock(DefaultMBB)); | |
8179 | |
8180 DAG.setRoot(BrCond); | |
8181 return; | |
8182 } | |
8183 } | |
8184 } | |
8185 | |
8186 if (TM.getOptLevel() != CodeGenOpt::None) { | |
8187 // Order cases by weight so the most likely case will be checked first. | |
8188 std::sort(W.FirstCluster, W.LastCluster + 1, | |
8189 [](const CaseCluster &a, const CaseCluster &b) { | |
8190 return a.Weight > b.Weight; | |
8191 }); | |
8192 | |
8193 // Rearrange the case blocks so that the last one falls through if possible | |
8194 // without without changing the order of weights. | |
8195 for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) { | |
8196 --I; | |
8197 if (I->Weight > W.LastCluster->Weight) | |
8198 break; | |
8199 if (I->Kind == CC_Range && I->MBB == NextMBB) { | |
8200 std::swap(*I, *W.LastCluster); | |
8201 break; | |
8202 } | |
8203 } | |
8204 } | |
8205 | |
8206 // Compute total weight. | |
8207 uint32_t DefaultWeight = W.DefaultWeight; | |
8208 uint32_t UnhandledWeights = DefaultWeight; | |
8209 for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) { | |
8210 UnhandledWeights += I->Weight; | |
8211 assert(UnhandledWeights >= I->Weight && "Weight overflow!"); | |
8212 } | |
8213 | |
8214 MachineBasicBlock *CurMBB = W.MBB; | |
8215 for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) { | |
8216 MachineBasicBlock *Fallthrough; | |
8217 if (I == W.LastCluster) { | |
8218 // For the last cluster, fall through to the default destination. | |
8219 Fallthrough = DefaultMBB; | |
8220 } else { | |
8221 Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock()); | |
8222 CurMF->insert(BBI, Fallthrough); | |
8223 // Put Cond in a virtual register to make it available from the new blocks. | |
8224 ExportFromCurrentBlock(Cond); | |
8225 } | |
8226 UnhandledWeights -= I->Weight; | |
8227 | |
8228 switch (I->Kind) { | |
8229 case CC_JumpTable: { | |
8230 // FIXME: Optimize away range check based on pivot comparisons. | |
8231 JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first; | |
8232 JumpTable *JT = &JTCases[I->JTCasesIndex].second; | |
8233 | |
8234 // The jump block hasn't been inserted yet; insert it here. | |
8235 MachineBasicBlock *JumpMBB = JT->MBB; | |
8236 CurMF->insert(BBI, JumpMBB); | |
8237 | |
8238 uint32_t JumpWeight = I->Weight; | |
8239 uint32_t FallthroughWeight = UnhandledWeights; | |
8240 | |
8241 // If the default statement is a target of the jump table, we evenly | |
8242 // distribute the default weight to successors of CurMBB. Also update | |
8243 // the weight on the edge from JumpMBB to Fallthrough. | |
8244 for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(), | |
8245 SE = JumpMBB->succ_end(); | |
8246 SI != SE; ++SI) { | |
8247 if (*SI == DefaultMBB) { | |
8248 JumpWeight += DefaultWeight / 2; | |
8249 FallthroughWeight -= DefaultWeight / 2; | |
8250 JumpMBB->setSuccWeight(SI, DefaultWeight / 2); | |
8251 break; | |
8252 } | |
8253 } | |
8254 | |
8255 addSuccessorWithWeight(CurMBB, Fallthrough, FallthroughWeight); | |
8256 addSuccessorWithWeight(CurMBB, JumpMBB, JumpWeight); | |
8257 | |
8258 // The jump table header will be inserted in our current block, do the | |
8259 // range check, and fall through to our fallthrough block. | |
8260 JTH->HeaderBB = CurMBB; | |
8261 JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader. | |
8262 | |
8263 // If we're in the right place, emit the jump table header right now. | |
8264 if (CurMBB == SwitchMBB) { | |
8265 visitJumpTableHeader(*JT, *JTH, SwitchMBB); | |
8266 JTH->Emitted = true; | |
8267 } | |
8268 break; | |
8269 } | |
8270 case CC_BitTests: { | |
8271 // FIXME: Optimize away range check based on pivot comparisons. | |
8272 BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex]; | |
8273 | |
8274 // The bit test blocks haven't been inserted yet; insert them here. | |
8275 for (BitTestCase &BTC : BTB->Cases) | |
8276 CurMF->insert(BBI, BTC.ThisBB); | |
8277 | |
8278 // Fill in fields of the BitTestBlock. | |
8279 BTB->Parent = CurMBB; | |
8280 BTB->Default = Fallthrough; | |
8281 | |
8282 BTB->DefaultWeight = UnhandledWeights; | |
8283 // If the cases in bit test don't form a contiguous range, we evenly | |
8284 // distribute the weight on the edge to Fallthrough to two successors | |
8285 // of CurMBB. | |
8286 if (!BTB->ContiguousRange) { | |
8287 BTB->Weight += DefaultWeight / 2; | |
8288 BTB->DefaultWeight -= DefaultWeight / 2; | |
8289 } | |
8290 | |
8291 // If we're in the right place, emit the bit test header right now. | |
8292 if (CurMBB == SwitchMBB) { | |
8293 visitBitTestHeader(*BTB, SwitchMBB); | |
8294 BTB->Emitted = true; | |
8295 } | |
8296 break; | |
8297 } | |
8298 case CC_Range: { | |
8299 const Value *RHS, *LHS, *MHS; | |
8300 ISD::CondCode CC; | |
8301 if (I->Low == I->High) { | |
8302 // Check Cond == I->Low. | |
8303 CC = ISD::SETEQ; | |
8304 LHS = Cond; | |
8305 RHS=I->Low; | |
8306 MHS = nullptr; | |
8307 } else { | |
8308 // Check I->Low <= Cond <= I->High. | |
8309 CC = ISD::SETLE; | |
8310 LHS = I->Low; | |
8311 MHS = Cond; | |
8312 RHS = I->High; | |
8313 } | |
8314 | |
8315 // The false weight is the sum of all unhandled cases. | |
8316 CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight, | |
8317 UnhandledWeights); | |
8318 | |
8319 if (CurMBB == SwitchMBB) | |
8320 visitSwitchCase(CB, SwitchMBB); | |
8321 else | |
8322 SwitchCases.push_back(CB); | |
8323 | |
8324 break; | |
8325 } | |
8326 } | |
8327 CurMBB = Fallthrough; | |
8328 } | |
8329 } | |
8330 | |
8331 unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC, | |
8332 CaseClusterIt First, | |
8333 CaseClusterIt Last) { | |
8334 return std::count_if(First, Last + 1, [&](const CaseCluster &X) { | |
8335 if (X.Weight != CC.Weight) | |
8336 return X.Weight > CC.Weight; | |
8337 | |
8338 // Ties are broken by comparing the case value. | |
8339 return X.Low->getValue().slt(CC.Low->getValue()); | |
8340 }); | |
8341 } | |
8342 | |
8343 void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, | |
8344 const SwitchWorkListItem &W, | |
8345 Value *Cond, | |
8346 MachineBasicBlock *SwitchMBB) { | |
8347 assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) && | |
8348 "Clusters not sorted?"); | |
8349 | |
8350 assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!"); | |
8351 | |
8352 // Balance the tree based on branch weights to create a near-optimal (in terms | |
8353 // of search time given key frequency) binary search tree. See e.g. Kurt | |
8354 // Mehlhorn "Nearly Optimal Binary Search Trees" (1975). | |
8355 CaseClusterIt LastLeft = W.FirstCluster; | |
8356 CaseClusterIt FirstRight = W.LastCluster; | |
8357 uint32_t LeftWeight = LastLeft->Weight + W.DefaultWeight / 2; | |
8358 uint32_t RightWeight = FirstRight->Weight + W.DefaultWeight / 2; | |
8359 | |
8360 // Move LastLeft and FirstRight towards each other from opposite directions to | |
8361 // find a partitioning of the clusters which balances the weight on both | |
8362 // sides. If LeftWeight and RightWeight are equal, alternate which side is | |
8363 // taken to ensure 0-weight nodes are distributed evenly. | |
8364 unsigned I = 0; | |
8365 while (LastLeft + 1 < FirstRight) { | |
8366 if (LeftWeight < RightWeight || (LeftWeight == RightWeight && (I & 1))) | |
8367 LeftWeight += (++LastLeft)->Weight; | |
8368 else | |
8369 RightWeight += (--FirstRight)->Weight; | |
8370 I++; | |
8371 } | |
8372 | |
8373 for (;;) { | |
8374 // Our binary search tree differs from a typical BST in that ours can have up | |
8375 // to three values in each leaf. The pivot selection above doesn't take that | |
8376 // into account, which means the tree might require more nodes and be less | |
8377 // efficient. We compensate for this here. | |
8378 | |
8379 unsigned NumLeft = LastLeft - W.FirstCluster + 1; | |
8380 unsigned NumRight = W.LastCluster - FirstRight + 1; | |
8381 | |
8382 if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) { | |
8383 // If one side has less than 3 clusters, and the other has more than 3, | |
8384 // consider taking a cluster from the other side. | |
8385 | |
8386 if (NumLeft < NumRight) { | |
8387 // Consider moving the first cluster on the right to the left side. | |
8388 CaseCluster &CC = *FirstRight; | |
8389 unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); | |
8390 unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); | |
8391 if (LeftSideRank <= RightSideRank) { | |
8392 // Moving the cluster to the left does not demote it. | |
8393 ++LastLeft; | |
8394 ++FirstRight; | |
8395 continue; | |
8396 } | |
8397 } else { | |
8398 assert(NumRight < NumLeft); | |
8399 // Consider moving the last element on the left to the right side. | |
8400 CaseCluster &CC = *LastLeft; | |
8401 unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); | |
8402 unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); | |
8403 if (RightSideRank <= LeftSideRank) { | |
8404 // Moving the cluster to the right does not demot it. | |
8405 --LastLeft; | |
8406 --FirstRight; | |
8407 continue; | |
8408 } | |
8409 } | |
8410 } | |
8411 break; | |
8412 } | |
8413 | |
8414 assert(LastLeft + 1 == FirstRight); | |
8415 assert(LastLeft >= W.FirstCluster); | |
8416 assert(FirstRight <= W.LastCluster); | |
8417 | |
8418 // Use the first element on the right as pivot since we will make less-than | |
8419 // comparisons against it. | |
8420 CaseClusterIt PivotCluster = FirstRight; | |
8421 assert(PivotCluster > W.FirstCluster); | |
8422 assert(PivotCluster <= W.LastCluster); | |
8423 | |
8424 CaseClusterIt FirstLeft = W.FirstCluster; | |
8425 CaseClusterIt LastRight = W.LastCluster; | |
8426 | |
8427 const ConstantInt *Pivot = PivotCluster->Low; | |
8428 | |
8429 // New blocks will be inserted immediately after the current one. | |
8430 MachineFunction::iterator BBI = W.MBB; | |
8431 ++BBI; | |
8432 | |
8433 // We will branch to the LHS if Value < Pivot. If LHS is a single cluster, | |
8434 // we can branch to its destination directly if it's squeezed exactly in | |
8435 // between the known lower bound and Pivot - 1. | |
8436 MachineBasicBlock *LeftMBB; | |
8437 if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range && | |
8438 FirstLeft->Low == W.GE && | |
8439 (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) { | |
8440 LeftMBB = FirstLeft->MBB; | |
8441 } else { | |
8442 LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); | |
8443 FuncInfo.MF->insert(BBI, LeftMBB); | |
8444 WorkList.push_back( | |
8445 {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultWeight / 2}); | |
8446 // Put Cond in a virtual register to make it available from the new blocks. | |
8447 ExportFromCurrentBlock(Cond); | |
8448 } | |
8449 | |
8450 // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a | |
8451 // single cluster, RHS.Low == Pivot, and we can branch to its destination | |
8452 // directly if RHS.High equals the current upper bound. | |
8453 MachineBasicBlock *RightMBB; | |
8454 if (FirstRight == LastRight && FirstRight->Kind == CC_Range && | |
8455 W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) { | |
8456 RightMBB = FirstRight->MBB; | |
8457 } else { | |
8458 RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); | |
8459 FuncInfo.MF->insert(BBI, RightMBB); | |
8460 WorkList.push_back( | |
8461 {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultWeight / 2}); | |
8462 // Put Cond in a virtual register to make it available from the new blocks. | |
8463 ExportFromCurrentBlock(Cond); | |
8464 } | |
8465 | |
8466 // Create the CaseBlock record that will be used to lower the branch. | |
8467 CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, | |
8468 LeftWeight, RightWeight); | |
8469 | |
8470 if (W.MBB == SwitchMBB) | |
8471 visitSwitchCase(CB, SwitchMBB); | |
8472 else | |
8473 SwitchCases.push_back(CB); | |
8474 } | |
8475 | |
8476 void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { | |
8477 // Extract cases from the switch. | |
8478 BranchProbabilityInfo *BPI = FuncInfo.BPI; | |
8479 CaseClusterVector Clusters; | |
8480 Clusters.reserve(SI.getNumCases()); | |
8481 for (auto I : SI.cases()) { | |
8482 MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()]; | |
8483 const ConstantInt *CaseVal = I.getCaseValue(); | |
8484 uint32_t Weight = | |
8485 BPI ? BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()) : 0; | |
8486 Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight)); | |
8487 } | |
8488 | |
8489 MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()]; | |
8490 | |
8491 // Cluster adjacent cases with the same destination. We do this at all | |
8492 // optimization levels because it's cheap to do and will make codegen faster | |
8493 // if there are many clusters. | |
8494 sortAndRangeify(Clusters); | |
8495 | |
8496 if (TM.getOptLevel() != CodeGenOpt::None) { | |
8497 // Replace an unreachable default with the most popular destination. | |
8498 // FIXME: Exploit unreachable default more aggressively. | |
8499 bool UnreachableDefault = | |
8500 isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg()); | |
8501 if (UnreachableDefault && !Clusters.empty()) { | |
8502 DenseMap<const BasicBlock *, unsigned> Popularity; | |
8503 unsigned MaxPop = 0; | |
8504 const BasicBlock *MaxBB = nullptr; | |
8505 for (auto I : SI.cases()) { | |
8506 const BasicBlock *BB = I.getCaseSuccessor(); | |
8507 if (++Popularity[BB] > MaxPop) { | |
8508 MaxPop = Popularity[BB]; | |
8509 MaxBB = BB; | |
8510 } | |
8511 } | |
8512 // Set new default. | |
8513 assert(MaxPop > 0 && MaxBB); | |
8514 DefaultMBB = FuncInfo.MBBMap[MaxBB]; | |
8515 | |
8516 // Remove cases that were pointing to the destination that is now the | |
8517 // default. | |
8518 CaseClusterVector New; | |
8519 New.reserve(Clusters.size()); | |
8520 for (CaseCluster &CC : Clusters) { | |
8521 if (CC.MBB != DefaultMBB) | |
8522 New.push_back(CC); | |
8523 } | |
8524 Clusters = std::move(New); | |
8525 } | |
8526 } | |
8527 | |
8528 // If there is only the default destination, jump there directly. | |
8529 MachineBasicBlock *SwitchMBB = FuncInfo.MBB; | |
8530 if (Clusters.empty()) { | |
8531 SwitchMBB->addSuccessor(DefaultMBB); | |
8532 if (DefaultMBB != NextBlock(SwitchMBB)) { | |
8533 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, | |
8534 getControlRoot(), DAG.getBasicBlock(DefaultMBB))); | |
8535 } | |
8536 return; | |
8537 } | |
8538 | |
8539 findJumpTables(Clusters, &SI, DefaultMBB); | |
8540 findBitTestClusters(Clusters, &SI); | |
8541 | |
8542 DEBUG({ | |
8543 dbgs() << "Case clusters: "; | |
8544 for (const CaseCluster &C : Clusters) { | |
8545 if (C.Kind == CC_JumpTable) dbgs() << "JT:"; | |
8546 if (C.Kind == CC_BitTests) dbgs() << "BT:"; | |
8547 | |
8548 C.Low->getValue().print(dbgs(), true); | |
8549 if (C.Low != C.High) { | |
8550 dbgs() << '-'; | |
8551 C.High->getValue().print(dbgs(), true); | |
8552 } | |
8553 dbgs() << ' '; | |
8554 } | |
8555 dbgs() << '\n'; | |
8556 }); | |
8557 | |
8558 assert(!Clusters.empty()); | |
8559 SwitchWorkList WorkList; | |
8560 CaseClusterIt First = Clusters.begin(); | |
8561 CaseClusterIt Last = Clusters.end() - 1; | |
8562 uint32_t DefaultWeight = getEdgeWeight(SwitchMBB, DefaultMBB); | |
8563 WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultWeight}); | |
8564 | |
8565 while (!WorkList.empty()) { | |
8566 SwitchWorkListItem W = WorkList.back(); | |
8567 WorkList.pop_back(); | |
8568 unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; | |
8569 | |
8570 if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None) { | |
8571 // For optimized builds, lower large range as a balanced binary tree. | |
8572 splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); | |
8573 continue; | |
8574 } | |
8575 | |
8576 lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB); | |
8577 } | |
8578 } |