comparison lib/CodeGen/MachineScheduler.cpp @ 33:e4204d083e25

LLVM 3.5
author Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date Thu, 12 Dec 2013 14:32:10 +0900
parents 95c75e76d11b
children 54457678186b
comparison
equal deleted inserted replaced
1:f783a2dd24b1 33:e4204d083e25
1317 constrainLocalCopy(SU, DAG); 1317 constrainLocalCopy(SU, DAG);
1318 } 1318 }
1319 } 1319 }
1320 1320
1321 //===----------------------------------------------------------------------===// 1321 //===----------------------------------------------------------------------===//
1322 // GenericScheduler - Implementation of the generic MachineSchedStrategy. 1322 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1323 //===----------------------------------------------------------------------===// 1323 // and possibly other custom schedulers.
1324 1324 // ===----------------------------------------------------------------------===/
1325 namespace { 1325
1326 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance 1326 static const unsigned InvalidCycle = ~0U;
1327 /// the schedule. 1327
1328 class GenericScheduler : public MachineSchedStrategy { 1328 SchedBoundary::~SchedBoundary() { delete HazardRec; }
1329 public: 1329
1330 /// Represent the type of SchedCandidate found within a single queue. 1330 void SchedBoundary::reset() {
1331 /// pickNodeBidirectional depends on these listed by decreasing priority. 1331 // A new HazardRec is created for each DAG and owned by SchedBoundary.
1332 enum CandReason { 1332 // Destroying and reconstructing it is very expensive though. So keep
1333 NoCand, PhysRegCopy, RegExcess, RegCritical, Cluster, Weak, RegMax, 1333 // invalid, placeholder HazardRecs.
1334 ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, 1334 if (HazardRec && HazardRec->isEnabled()) {
1335 TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; 1335 delete HazardRec;
1336 1336 HazardRec = 0;
1337 }
1338 Available.clear();
1339 Pending.clear();
1340 CheckPending = false;
1341 NextSUs.clear();
1342 CurrCycle = 0;
1343 CurrMOps = 0;
1344 MinReadyCycle = UINT_MAX;
1345 ExpectedLatency = 0;
1346 DependentLatency = 0;
1347 RetiredMOps = 0;
1348 MaxExecutedResCount = 0;
1349 ZoneCritResIdx = 0;
1350 IsResourceLimited = false;
1351 ReservedCycles.clear();
1337 #ifndef NDEBUG 1352 #ifndef NDEBUG
1338 static const char *getReasonStr(GenericScheduler::CandReason Reason); 1353 MaxObservedLatency = 0;
1339 #endif 1354 #endif
1340 1355 // Reserve a zero-count for invalid CritResIdx.
1341 /// Policy for scheduling the next instruction in the candidate's zone. 1356 ExecutedResCounts.resize(1);
1342 struct CandPolicy { 1357 assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1343 bool ReduceLatency; 1358 }
1344 unsigned ReduceResIdx; 1359
1345 unsigned DemandResIdx; 1360 void SchedRemainder::
1346
1347 CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
1348 };
1349
1350 /// Status of an instruction's critical resource consumption.
1351 struct SchedResourceDelta {
1352 // Count critical resources in the scheduled region required by SU.
1353 unsigned CritResources;
1354
1355 // Count critical resources from another region consumed by SU.
1356 unsigned DemandedResources;
1357
1358 SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
1359
1360 bool operator==(const SchedResourceDelta &RHS) const {
1361 return CritResources == RHS.CritResources
1362 && DemandedResources == RHS.DemandedResources;
1363 }
1364 bool operator!=(const SchedResourceDelta &RHS) const {
1365 return !operator==(RHS);
1366 }
1367 };
1368
1369 /// Store the state used by GenericScheduler heuristics, required for the
1370 /// lifetime of one invocation of pickNode().
1371 struct SchedCandidate {
1372 CandPolicy Policy;
1373
1374 // The best SUnit candidate.
1375 SUnit *SU;
1376
1377 // The reason for this candidate.
1378 CandReason Reason;
1379
1380 // Set of reasons that apply to multiple candidates.
1381 uint32_t RepeatReasonSet;
1382
1383 // Register pressure values for the best candidate.
1384 RegPressureDelta RPDelta;
1385
1386 // Critical resource consumption of the best candidate.
1387 SchedResourceDelta ResDelta;
1388
1389 SchedCandidate(const CandPolicy &policy)
1390 : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {}
1391
1392 bool isValid() const { return SU; }
1393
1394 // Copy the status of another candidate without changing policy.
1395 void setBest(SchedCandidate &Best) {
1396 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
1397 SU = Best.SU;
1398 Reason = Best.Reason;
1399 RPDelta = Best.RPDelta;
1400 ResDelta = Best.ResDelta;
1401 }
1402
1403 bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); }
1404 void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); }
1405
1406 void initResourceDelta(const ScheduleDAGMI *DAG,
1407 const TargetSchedModel *SchedModel);
1408 };
1409
1410 /// Summarize the unscheduled region.
1411 struct SchedRemainder {
1412 // Critical path through the DAG in expected latency.
1413 unsigned CriticalPath;
1414 unsigned CyclicCritPath;
1415
1416 // Scaled count of micro-ops left to schedule.
1417 unsigned RemIssueCount;
1418
1419 bool IsAcyclicLatencyLimited;
1420
1421 // Unscheduled resources
1422 SmallVector<unsigned, 16> RemainingCounts;
1423
1424 void reset() {
1425 CriticalPath = 0;
1426 CyclicCritPath = 0;
1427 RemIssueCount = 0;
1428 IsAcyclicLatencyLimited = false;
1429 RemainingCounts.clear();
1430 }
1431
1432 SchedRemainder() { reset(); }
1433
1434 void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
1435 };
1436
1437 /// Each Scheduling boundary is associated with ready queues. It tracks the
1438 /// current cycle in the direction of movement, and maintains the state
1439 /// of "hazards" and other interlocks at the current cycle.
1440 struct SchedBoundary {
1441 ScheduleDAGMI *DAG;
1442 const TargetSchedModel *SchedModel;
1443 SchedRemainder *Rem;
1444
1445 ReadyQueue Available;
1446 ReadyQueue Pending;
1447 bool CheckPending;
1448
1449 // For heuristics, keep a list of the nodes that immediately depend on the
1450 // most recently scheduled node.
1451 SmallPtrSet<const SUnit*, 8> NextSUs;
1452
1453 ScheduleHazardRecognizer *HazardRec;
1454
1455 /// Number of cycles it takes to issue the instructions scheduled in this
1456 /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls.
1457 /// See getStalls().
1458 unsigned CurrCycle;
1459
1460 /// Micro-ops issued in the current cycle
1461 unsigned CurrMOps;
1462
1463 /// MinReadyCycle - Cycle of the soonest available instruction.
1464 unsigned MinReadyCycle;
1465
1466 // The expected latency of the critical path in this scheduled zone.
1467 unsigned ExpectedLatency;
1468
1469 // The latency of dependence chains leading into this zone.
1470 // For each node scheduled bottom-up: DLat = max DLat, N.Depth.
1471 // For each cycle scheduled: DLat -= 1.
1472 unsigned DependentLatency;
1473
1474 /// Count the scheduled (issued) micro-ops that can be retired by
1475 /// time=CurrCycle assuming the first scheduled instr is retired at time=0.
1476 unsigned RetiredMOps;
1477
1478 // Count scheduled resources that have been executed. Resources are
1479 // considered executed if they become ready in the time that it takes to
1480 // saturate any resource including the one in question. Counts are scaled
1481 // for direct comparison with other resources. Counts can be compared with
1482 // MOps * getMicroOpFactor and Latency * getLatencyFactor.
1483 SmallVector<unsigned, 16> ExecutedResCounts;
1484
1485 /// Cache the max count for a single resource.
1486 unsigned MaxExecutedResCount;
1487
1488 // Cache the critical resources ID in this scheduled zone.
1489 unsigned ZoneCritResIdx;
1490
1491 // Is the scheduled region resource limited vs. latency limited.
1492 bool IsResourceLimited;
1493
1494 #ifndef NDEBUG
1495 // Remember the greatest operand latency as an upper bound on the number of
1496 // times we should retry the pending queue because of a hazard.
1497 unsigned MaxObservedLatency;
1498 #endif
1499
1500 void reset() {
1501 // A new HazardRec is created for each DAG and owned by SchedBoundary.
1502 // Destroying and reconstructing it is very expensive though. So keep
1503 // invalid, placeholder HazardRecs.
1504 if (HazardRec && HazardRec->isEnabled()) {
1505 delete HazardRec;
1506 HazardRec = 0;
1507 }
1508 Available.clear();
1509 Pending.clear();
1510 CheckPending = false;
1511 NextSUs.clear();
1512 CurrCycle = 0;
1513 CurrMOps = 0;
1514 MinReadyCycle = UINT_MAX;
1515 ExpectedLatency = 0;
1516 DependentLatency = 0;
1517 RetiredMOps = 0;
1518 MaxExecutedResCount = 0;
1519 ZoneCritResIdx = 0;
1520 IsResourceLimited = false;
1521 #ifndef NDEBUG
1522 MaxObservedLatency = 0;
1523 #endif
1524 // Reserve a zero-count for invalid CritResIdx.
1525 ExecutedResCounts.resize(1);
1526 assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1527 }
1528
1529 /// Pending queues extend the ready queues with the same ID and the
1530 /// PendingFlag set.
1531 SchedBoundary(unsigned ID, const Twine &Name):
1532 DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
1533 Pending(ID << GenericScheduler::LogMaxQID, Name+".P"),
1534 HazardRec(0) {
1535 reset();
1536 }
1537
1538 ~SchedBoundary() { delete HazardRec; }
1539
1540 void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
1541 SchedRemainder *rem);
1542
1543 bool isTop() const {
1544 return Available.getID() == GenericScheduler::TopQID;
1545 }
1546
1547 #ifndef NDEBUG
1548 const char *getResourceName(unsigned PIdx) {
1549 if (!PIdx)
1550 return "MOps";
1551 return SchedModel->getProcResource(PIdx)->Name;
1552 }
1553 #endif
1554
1555 /// Get the number of latency cycles "covered" by the scheduled
1556 /// instructions. This is the larger of the critical path within the zone
1557 /// and the number of cycles required to issue the instructions.
1558 unsigned getScheduledLatency() const {
1559 return std::max(ExpectedLatency, CurrCycle);
1560 }
1561
1562 unsigned getUnscheduledLatency(SUnit *SU) const {
1563 return isTop() ? SU->getHeight() : SU->getDepth();
1564 }
1565
1566 unsigned getResourceCount(unsigned ResIdx) const {
1567 return ExecutedResCounts[ResIdx];
1568 }
1569
1570 /// Get the scaled count of scheduled micro-ops and resources, including
1571 /// executed resources.
1572 unsigned getCriticalCount() const {
1573 if (!ZoneCritResIdx)
1574 return RetiredMOps * SchedModel->getMicroOpFactor();
1575 return getResourceCount(ZoneCritResIdx);
1576 }
1577
1578 /// Get a scaled count for the minimum execution time of the scheduled
1579 /// micro-ops that are ready to execute by getExecutedCount. Notice the
1580 /// feedback loop.
1581 unsigned getExecutedCount() const {
1582 return std::max(CurrCycle * SchedModel->getLatencyFactor(),
1583 MaxExecutedResCount);
1584 }
1585
1586 bool checkHazard(SUnit *SU);
1587
1588 unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs);
1589
1590 unsigned getOtherResourceCount(unsigned &OtherCritIdx);
1591
1592 void setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone);
1593
1594 void releaseNode(SUnit *SU, unsigned ReadyCycle);
1595
1596 void bumpCycle(unsigned NextCycle);
1597
1598 void incExecutedResources(unsigned PIdx, unsigned Count);
1599
1600 unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle);
1601
1602 void bumpNode(SUnit *SU);
1603
1604 void releasePending();
1605
1606 void removeReady(SUnit *SU);
1607
1608 SUnit *pickOnlyChoice();
1609
1610 #ifndef NDEBUG
1611 void dumpScheduledState();
1612 #endif
1613 };
1614
1615 private:
1616 const MachineSchedContext *Context;
1617 ScheduleDAGMI *DAG;
1618 const TargetSchedModel *SchedModel;
1619 const TargetRegisterInfo *TRI;
1620
1621 // State of the top and bottom scheduled instruction boundaries.
1622 SchedRemainder Rem;
1623 SchedBoundary Top;
1624 SchedBoundary Bot;
1625
1626 MachineSchedPolicy RegionPolicy;
1627 public:
1628 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
1629 enum {
1630 TopQID = 1,
1631 BotQID = 2,
1632 LogMaxQID = 2
1633 };
1634
1635 GenericScheduler(const MachineSchedContext *C):
1636 Context(C), DAG(0), SchedModel(0), TRI(0),
1637 Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
1638
1639 virtual void initPolicy(MachineBasicBlock::iterator Begin,
1640 MachineBasicBlock::iterator End,
1641 unsigned NumRegionInstrs);
1642
1643 bool shouldTrackPressure() const { return RegionPolicy.ShouldTrackPressure; }
1644
1645 virtual void initialize(ScheduleDAGMI *dag);
1646
1647 virtual SUnit *pickNode(bool &IsTopNode);
1648
1649 virtual void schedNode(SUnit *SU, bool IsTopNode);
1650
1651 virtual void releaseTopNode(SUnit *SU);
1652
1653 virtual void releaseBottomNode(SUnit *SU);
1654
1655 virtual void registerRoots();
1656
1657 protected:
1658 void checkAcyclicLatency();
1659
1660 void tryCandidate(SchedCandidate &Cand,
1661 SchedCandidate &TryCand,
1662 SchedBoundary &Zone,
1663 const RegPressureTracker &RPTracker,
1664 RegPressureTracker &TempTracker);
1665
1666 SUnit *pickNodeBidirectional(bool &IsTopNode);
1667
1668 void pickNodeFromQueue(SchedBoundary &Zone,
1669 const RegPressureTracker &RPTracker,
1670 SchedCandidate &Candidate);
1671
1672 void reschedulePhysRegCopies(SUnit *SU, bool isTop);
1673
1674 #ifndef NDEBUG
1675 void traceCandidate(const SchedCandidate &Cand);
1676 #endif
1677 };
1678 } // namespace
1679
1680 void GenericScheduler::SchedRemainder::
1681 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { 1361 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1682 reset(); 1362 reset();
1683 if (!SchedModel->hasInstrSchedModel()) 1363 if (!SchedModel->hasInstrSchedModel())
1684 return; 1364 return;
1685 RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); 1365 RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1696 RemainingCounts[PIdx] += (Factor * PI->Cycles); 1376 RemainingCounts[PIdx] += (Factor * PI->Cycles);
1697 } 1377 }
1698 } 1378 }
1699 } 1379 }
1700 1380
1701 void GenericScheduler::SchedBoundary:: 1381 void SchedBoundary::
1702 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { 1382 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1703 reset(); 1383 reset();
1704 DAG = dag; 1384 DAG = dag;
1705 SchedModel = smodel; 1385 SchedModel = smodel;
1706 Rem = rem; 1386 Rem = rem;
1707 if (SchedModel->hasInstrSchedModel()) 1387 if (SchedModel->hasInstrSchedModel()) {
1708 ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); 1388 ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1709 } 1389 ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
1710 1390 }
1711 /// Initialize the per-region scheduling policy. 1391 }
1712 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, 1392
1713 MachineBasicBlock::iterator End, 1393 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1714 unsigned NumRegionInstrs) { 1394 /// these "soft stalls" differently than the hard stall cycles based on CPU
1715 const TargetMachine &TM = Context->MF->getTarget(); 1395 /// resources and computed by checkHazard(). A fully in-order model
1716 1396 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1717 // Avoid setting up the register pressure tracker for small regions to save 1397 /// available for scheduling until they are ready. However, a weaker in-order
1718 // compile time. As a rough heuristic, only track pressure when the number of 1398 /// model may use this for heuristics. For example, if a processor has in-order
1719 // schedulable instructions exceeds half the integer register file. 1399 /// behavior when reading certain resources, this may come into play.
1720 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( 1400 unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
1721 TM.getTargetLowering()->getRegClassFor(MVT::i32)); 1401 if (!SU->isUnbuffered)
1722 1402 return 0;
1723 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); 1403
1724 1404 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1725 // For generic targets, we default to bottom-up, because it's simpler and more 1405 if (ReadyCycle > CurrCycle)
1726 // compile-time optimizations have been implemented in that direction. 1406 return ReadyCycle - CurrCycle;
1727 RegionPolicy.OnlyBottomUp = true; 1407 return 0;
1728 1408 }
1729 // Allow the subtarget to override default policy. 1409
1730 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 1410 /// Compute the next cycle at which the given processor resource can be
1731 ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs); 1411 /// scheduled.
1732 1412 unsigned SchedBoundary::
1733 // After subtarget overrides, apply command line options. 1413 getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1734 if (!EnableRegPressure) 1414 unsigned NextUnreserved = ReservedCycles[PIdx];
1735 RegionPolicy.ShouldTrackPressure = false; 1415 // If this resource has never been used, always return cycle zero.
1736 1416 if (NextUnreserved == InvalidCycle)
1737 // Check -misched-topdown/bottomup can force or unforce scheduling direction. 1417 return 0;
1738 // e.g. -misched-bottomup=false allows scheduling in both directions. 1418 // For bottom-up scheduling add the cycles needed for the current operation.
1739 assert((!ForceTopDown || !ForceBottomUp) && 1419 if (!isTop())
1740 "-misched-topdown incompatible with -misched-bottomup"); 1420 NextUnreserved += Cycles;
1741 if (ForceBottomUp.getNumOccurrences() > 0) { 1421 return NextUnreserved;
1742 RegionPolicy.OnlyBottomUp = ForceBottomUp;
1743 if (RegionPolicy.OnlyBottomUp)
1744 RegionPolicy.OnlyTopDown = false;
1745 }
1746 if (ForceTopDown.getNumOccurrences() > 0) {
1747 RegionPolicy.OnlyTopDown = ForceTopDown;
1748 if (RegionPolicy.OnlyTopDown)
1749 RegionPolicy.OnlyBottomUp = false;
1750 }
1751 }
1752
1753 void GenericScheduler::initialize(ScheduleDAGMI *dag) {
1754 DAG = dag;
1755 SchedModel = DAG->getSchedModel();
1756 TRI = DAG->TRI;
1757
1758 Rem.init(DAG, SchedModel);
1759 Top.init(DAG, SchedModel, &Rem);
1760 Bot.init(DAG, SchedModel, &Rem);
1761
1762 // Initialize resource counts.
1763
1764 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
1765 // are disabled, then these HazardRecs will be disabled.
1766 const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
1767 const TargetMachine &TM = DAG->MF.getTarget();
1768 if (!Top.HazardRec) {
1769 Top.HazardRec =
1770 TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1771 }
1772 if (!Bot.HazardRec) {
1773 Bot.HazardRec =
1774 TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1775 }
1776 }
1777
1778 void GenericScheduler::releaseTopNode(SUnit *SU) {
1779 if (SU->isScheduled)
1780 return;
1781
1782 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1783 I != E; ++I) {
1784 if (I->isWeak())
1785 continue;
1786 unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
1787 unsigned Latency = I->getLatency();
1788 #ifndef NDEBUG
1789 Top.MaxObservedLatency = std::max(Latency, Top.MaxObservedLatency);
1790 #endif
1791 if (SU->TopReadyCycle < PredReadyCycle + Latency)
1792 SU->TopReadyCycle = PredReadyCycle + Latency;
1793 }
1794 Top.releaseNode(SU, SU->TopReadyCycle);
1795 }
1796
1797 void GenericScheduler::releaseBottomNode(SUnit *SU) {
1798 if (SU->isScheduled)
1799 return;
1800
1801 assert(SU->getInstr() && "Scheduled SUnit must have instr");
1802
1803 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1804 I != E; ++I) {
1805 if (I->isWeak())
1806 continue;
1807 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
1808 unsigned Latency = I->getLatency();
1809 #ifndef NDEBUG
1810 Bot.MaxObservedLatency = std::max(Latency, Bot.MaxObservedLatency);
1811 #endif
1812 if (SU->BotReadyCycle < SuccReadyCycle + Latency)
1813 SU->BotReadyCycle = SuccReadyCycle + Latency;
1814 }
1815 Bot.releaseNode(SU, SU->BotReadyCycle);
1816 }
1817
1818 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
1819 /// critical path by more cycles than it takes to drain the instruction buffer.
1820 /// We estimate an upper bounds on in-flight instructions as:
1821 ///
1822 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
1823 /// InFlightIterations = AcyclicPath / CyclesPerIteration
1824 /// InFlightResources = InFlightIterations * LoopResources
1825 ///
1826 /// TODO: Check execution resources in addition to IssueCount.
1827 void GenericScheduler::checkAcyclicLatency() {
1828 if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
1829 return;
1830
1831 // Scaled number of cycles per loop iteration.
1832 unsigned IterCount =
1833 std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
1834 Rem.RemIssueCount);
1835 // Scaled acyclic critical path.
1836 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
1837 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
1838 unsigned InFlightCount =
1839 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
1840 unsigned BufferLimit =
1841 SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
1842
1843 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
1844
1845 DEBUG(dbgs() << "IssueCycles="
1846 << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
1847 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
1848 << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
1849 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
1850 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
1851 if (Rem.IsAcyclicLatencyLimited)
1852 dbgs() << " ACYCLIC LATENCY LIMIT\n");
1853 }
1854
1855 void GenericScheduler::registerRoots() {
1856 Rem.CriticalPath = DAG->ExitSU.getDepth();
1857
1858 // Some roots may not feed into ExitSU. Check all of them in case.
1859 for (std::vector<SUnit*>::const_iterator
1860 I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
1861 if ((*I)->getDepth() > Rem.CriticalPath)
1862 Rem.CriticalPath = (*I)->getDepth();
1863 }
1864 DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
1865
1866 if (EnableCyclicPath) {
1867 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
1868 checkAcyclicLatency();
1869 }
1870 } 1422 }
1871 1423
1872 /// Does this SU have a hazard within the current instruction group. 1424 /// Does this SU have a hazard within the current instruction group.
1873 /// 1425 ///
1874 /// The scheduler supports two modes of hazard recognition. The first is the 1426 /// The scheduler supports two modes of hazard recognition. The first is the
1880 /// simple counters that the scheduler itself maintains. It explicitly checks 1432 /// simple counters that the scheduler itself maintains. It explicitly checks
1881 /// for instruction dispatch limitations, including the number of micro-ops that 1433 /// for instruction dispatch limitations, including the number of micro-ops that
1882 /// can dispatch per cycle. 1434 /// can dispatch per cycle.
1883 /// 1435 ///
1884 /// TODO: Also check whether the SU must start a new group. 1436 /// TODO: Also check whether the SU must start a new group.
1885 bool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) { 1437 bool SchedBoundary::checkHazard(SUnit *SU) {
1886 if (HazardRec->isEnabled()) 1438 if (HazardRec->isEnabled())
1887 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 1439 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
1888 1440
1889 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 1441 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1890 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { 1442 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1891 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" 1443 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
1892 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); 1444 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1893 return true; 1445 return true;
1894 } 1446 }
1447 if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
1448 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1449 for (TargetSchedModel::ProcResIter
1450 PI = SchedModel->getWriteProcResBegin(SC),
1451 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1452 if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle)
1453 return true;
1454 }
1455 }
1895 return false; 1456 return false;
1896 } 1457 }
1897 1458
1898 // Find the unscheduled node in ReadySUs with the highest latency. 1459 // Find the unscheduled node in ReadySUs with the highest latency.
1899 unsigned GenericScheduler::SchedBoundary:: 1460 unsigned SchedBoundary::
1900 findMaxLatency(ArrayRef<SUnit*> ReadySUs) { 1461 findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
1901 SUnit *LateSU = 0; 1462 SUnit *LateSU = 0;
1902 unsigned RemLatency = 0; 1463 unsigned RemLatency = 0;
1903 for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end(); 1464 for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
1904 I != E; ++I) { 1465 I != E; ++I) {
1916 } 1477 }
1917 1478
1918 // Count resources in this zone and the remaining unscheduled 1479 // Count resources in this zone and the remaining unscheduled
1919 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical 1480 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
1920 // resource index, or zero if the zone is issue limited. 1481 // resource index, or zero if the zone is issue limited.
1921 unsigned GenericScheduler::SchedBoundary:: 1482 unsigned SchedBoundary::
1922 getOtherResourceCount(unsigned &OtherCritIdx) { 1483 getOtherResourceCount(unsigned &OtherCritIdx) {
1923 OtherCritIdx = 0; 1484 OtherCritIdx = 0;
1924 if (!SchedModel->hasInstrSchedModel()) 1485 if (!SchedModel->hasInstrSchedModel())
1925 return 0; 1486 return 0;
1926 1487
1937 } 1498 }
1938 } 1499 }
1939 if (OtherCritIdx) { 1500 if (OtherCritIdx) {
1940 DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: " 1501 DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: "
1941 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) 1502 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
1942 << " " << getResourceName(OtherCritIdx) << "\n"); 1503 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
1943 } 1504 }
1944 return OtherCritCount; 1505 return OtherCritCount;
1945 } 1506 }
1946 1507
1947 /// Set the CandPolicy for this zone given the current resources and latencies 1508 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
1948 /// inside and outside the zone. 1509 if (ReadyCycle < MinReadyCycle)
1949 void GenericScheduler::SchedBoundary::setPolicy(CandPolicy &Policy, 1510 MinReadyCycle = ReadyCycle;
1950 SchedBoundary &OtherZone) { 1511
1951 // Now that potential stalls have been considered, apply preemptive heuristics 1512 // Check for interlocks first. For the purpose of other heuristics, an
1952 // based on the the total latency and resources inside and outside this 1513 // instruction that cannot issue appears as if it's not in the ReadyQueue.
1953 // zone. 1514 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
1515 if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
1516 Pending.push(SU);
1517 else
1518 Available.push(SU);
1519
1520 // Record this node as an immediate dependent of the scheduled node.
1521 NextSUs.insert(SU);
1522 }
1523
1524 void SchedBoundary::releaseTopNode(SUnit *SU) {
1525 if (SU->isScheduled)
1526 return;
1527
1528 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1529 I != E; ++I) {
1530 if (I->isWeak())
1531 continue;
1532 unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
1533 unsigned Latency = I->getLatency();
1534 #ifndef NDEBUG
1535 MaxObservedLatency = std::max(Latency, MaxObservedLatency);
1536 #endif
1537 if (SU->TopReadyCycle < PredReadyCycle + Latency)
1538 SU->TopReadyCycle = PredReadyCycle + Latency;
1539 }
1540 releaseNode(SU, SU->TopReadyCycle);
1541 }
1542
1543 void SchedBoundary::releaseBottomNode(SUnit *SU) {
1544 if (SU->isScheduled)
1545 return;
1546
1547 assert(SU->getInstr() && "Scheduled SUnit must have instr");
1548
1549 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1550 I != E; ++I) {
1551 if (I->isWeak())
1552 continue;
1553 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
1554 unsigned Latency = I->getLatency();
1555 #ifndef NDEBUG
1556 MaxObservedLatency = std::max(Latency, MaxObservedLatency);
1557 #endif
1558 if (SU->BotReadyCycle < SuccReadyCycle + Latency)
1559 SU->BotReadyCycle = SuccReadyCycle + Latency;
1560 }
1561 releaseNode(SU, SU->BotReadyCycle);
1562 }
1563
1564 /// Move the boundary of scheduled code by one cycle.
1565 void SchedBoundary::bumpCycle(unsigned NextCycle) {
1566 if (SchedModel->getMicroOpBufferSize() == 0) {
1567 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
1568 if (MinReadyCycle > NextCycle)
1569 NextCycle = MinReadyCycle;
1570 }
1571 // Update the current micro-ops, which will issue in the next cycle.
1572 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
1573 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
1574
1575 // Decrement DependentLatency based on the next cycle.
1576 if ((NextCycle - CurrCycle) > DependentLatency)
1577 DependentLatency = 0;
1578 else
1579 DependentLatency -= (NextCycle - CurrCycle);
1580
1581 if (!HazardRec->isEnabled()) {
1582 // Bypass HazardRec virtual calls.
1583 CurrCycle = NextCycle;
1584 }
1585 else {
1586 // Bypass getHazardType calls in case of long latency.
1587 for (; CurrCycle != NextCycle; ++CurrCycle) {
1588 if (isTop())
1589 HazardRec->AdvanceCycle();
1590 else
1591 HazardRec->RecedeCycle();
1592 }
1593 }
1594 CheckPending = true;
1595 unsigned LFactor = SchedModel->getLatencyFactor();
1596 IsResourceLimited =
1597 (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
1598 > (int)LFactor;
1599
1600 DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
1601 }
1602
1603 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
1604 ExecutedResCounts[PIdx] += Count;
1605 if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
1606 MaxExecutedResCount = ExecutedResCounts[PIdx];
1607 }
1608
1609 /// Add the given processor resource to this scheduled zone.
1610 ///
1611 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
1612 /// during which this resource is consumed.
1613 ///
1614 /// \return the next cycle at which the instruction may execute without
1615 /// oversubscribing resources.
1616 unsigned SchedBoundary::
1617 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
1618 unsigned Factor = SchedModel->getResourceFactor(PIdx);
1619 unsigned Count = Factor * Cycles;
1620 DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx)
1621 << " +" << Cycles << "x" << Factor << "u\n");
1622
1623 // Update Executed resources counts.
1624 incExecutedResources(PIdx, Count);
1625 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
1626 Rem->RemainingCounts[PIdx] -= Count;
1627
1628 // Check if this resource exceeds the current critical resource. If so, it
1629 // becomes the critical resource.
1630 if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
1631 ZoneCritResIdx = PIdx;
1632 DEBUG(dbgs() << " *** Critical resource "
1633 << SchedModel->getResourceName(PIdx) << ": "
1634 << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
1635 }
1636 // For reserved resources, record the highest cycle using the resource.
1637 unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
1638 if (NextAvailable > CurrCycle) {
1639 DEBUG(dbgs() << " Resource conflict: "
1640 << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
1641 << NextAvailable << "\n");
1642 }
1643 return NextAvailable;
1644 }
1645
1646 /// Move the boundary of scheduled code by one SUnit.
1647 void SchedBoundary::bumpNode(SUnit *SU) {
1648 // Update the reservation table.
1649 if (HazardRec->isEnabled()) {
1650 if (!isTop() && SU->isCall) {
1651 // Calls are scheduled with their preceding instructions. For bottom-up
1652 // scheduling, clear the pipeline state before emitting.
1653 HazardRec->Reset();
1654 }
1655 HazardRec->EmitInstruction(SU);
1656 }
1657 // checkHazard should prevent scheduling multiple instructions per cycle that
1658 // exceed the issue width.
1659 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1660 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
1661 assert(
1662 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
1663 "Cannot schedule this instruction's MicroOps in the current cycle.");
1664
1665 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1666 DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
1667
1668 unsigned NextCycle = CurrCycle;
1669 switch (SchedModel->getMicroOpBufferSize()) {
1670 case 0:
1671 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
1672 break;
1673 case 1:
1674 if (ReadyCycle > NextCycle) {
1675 NextCycle = ReadyCycle;
1676 DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
1677 }
1678 break;
1679 default:
1680 // We don't currently model the OOO reorder buffer, so consider all
1681 // scheduled MOps to be "retired". We do loosely model in-order resource
1682 // latency. If this instruction uses an in-order resource, account for any
1683 // likely stall cycles.
1684 if (SU->isUnbuffered && ReadyCycle > NextCycle)
1685 NextCycle = ReadyCycle;
1686 break;
1687 }
1688 RetiredMOps += IncMOps;
1689
1690 // Update resource counts and critical resource.
1691 if (SchedModel->hasInstrSchedModel()) {
1692 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
1693 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
1694 Rem->RemIssueCount -= DecRemIssue;
1695 if (ZoneCritResIdx) {
1696 // Scale scheduled micro-ops for comparing with the critical resource.
1697 unsigned ScaledMOps =
1698 RetiredMOps * SchedModel->getMicroOpFactor();
1699
1700 // If scaled micro-ops are now more than the previous critical resource by
1701 // a full cycle, then micro-ops issue becomes critical.
1702 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
1703 >= (int)SchedModel->getLatencyFactor()) {
1704 ZoneCritResIdx = 0;
1705 DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
1706 << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
1707 }
1708 }
1709 for (TargetSchedModel::ProcResIter
1710 PI = SchedModel->getWriteProcResBegin(SC),
1711 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1712 unsigned RCycle =
1713 countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
1714 if (RCycle > NextCycle)
1715 NextCycle = RCycle;
1716 }
1717 if (SU->hasReservedResource) {
1718 // For reserved resources, record the highest cycle using the resource.
1719 // For top-down scheduling, this is the cycle in which we schedule this
1720 // instruction plus the number of cycles the operations reserves the
1721 // resource. For bottom-up is it simply the instruction's cycle.
1722 for (TargetSchedModel::ProcResIter
1723 PI = SchedModel->getWriteProcResBegin(SC),
1724 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1725 unsigned PIdx = PI->ProcResourceIdx;
1726 if (SchedModel->getProcResource(PIdx)->BufferSize == 0)
1727 ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle;
1728 }
1729 }
1730 }
1731 // Update ExpectedLatency and DependentLatency.
1732 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
1733 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
1734 if (SU->getDepth() > TopLatency) {
1735 TopLatency = SU->getDepth();
1736 DEBUG(dbgs() << " " << Available.getName()
1737 << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
1738 }
1739 if (SU->getHeight() > BotLatency) {
1740 BotLatency = SU->getHeight();
1741 DEBUG(dbgs() << " " << Available.getName()
1742 << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
1743 }
1744 // If we stall for any reason, bump the cycle.
1745 if (NextCycle > CurrCycle) {
1746 bumpCycle(NextCycle);
1747 }
1748 else {
1749 // After updating ZoneCritResIdx and ExpectedLatency, check if we're
1750 // resource limited. If a stall occured, bumpCycle does this.
1751 unsigned LFactor = SchedModel->getLatencyFactor();
1752 IsResourceLimited =
1753 (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
1754 > (int)LFactor;
1755 }
1756 // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
1757 // resets CurrMOps. Loop to handle instructions with more MOps than issue in
1758 // one cycle. Since we commonly reach the max MOps here, opportunistically
1759 // bump the cycle to avoid uselessly checking everything in the readyQ.
1760 CurrMOps += IncMOps;
1761 while (CurrMOps >= SchedModel->getIssueWidth()) {
1762 bumpCycle(++NextCycle);
1763 DEBUG(dbgs() << " *** Max MOps " << CurrMOps
1764 << " at cycle " << CurrCycle << '\n');
1765 }
1766 DEBUG(dumpScheduledState());
1767 }
1768
1769 /// Release pending ready nodes in to the available queue. This makes them
1770 /// visible to heuristics.
1771 void SchedBoundary::releasePending() {
1772 // If the available queue is empty, it is safe to reset MinReadyCycle.
1773 if (Available.empty())
1774 MinReadyCycle = UINT_MAX;
1775
1776 // Check to see if any of the pending instructions are ready to issue. If
1777 // so, add them to the available queue.
1778 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
1779 for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
1780 SUnit *SU = *(Pending.begin()+i);
1781 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
1782
1783 if (ReadyCycle < MinReadyCycle)
1784 MinReadyCycle = ReadyCycle;
1785
1786 if (!IsBuffered && ReadyCycle > CurrCycle)
1787 continue;
1788
1789 if (checkHazard(SU))
1790 continue;
1791
1792 Available.push(SU);
1793 Pending.remove(Pending.begin()+i);
1794 --i; --e;
1795 }
1796 DEBUG(if (!Pending.empty()) Pending.dump());
1797 CheckPending = false;
1798 }
1799
1800 /// Remove SU from the ready set for this boundary.
1801 void SchedBoundary::removeReady(SUnit *SU) {
1802 if (Available.isInQueue(SU))
1803 Available.remove(Available.find(SU));
1804 else {
1805 assert(Pending.isInQueue(SU) && "bad ready count");
1806 Pending.remove(Pending.find(SU));
1807 }
1808 }
1809
1810 /// If this queue only has one ready candidate, return it. As a side effect,
1811 /// defer any nodes that now hit a hazard, and advance the cycle until at least
1812 /// one node is ready. If multiple instructions are ready, return NULL.
1813 SUnit *SchedBoundary::pickOnlyChoice() {
1814 if (CheckPending)
1815 releasePending();
1816
1817 if (CurrMOps > 0) {
1818 // Defer any ready instrs that now have a hazard.
1819 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
1820 if (checkHazard(*I)) {
1821 Pending.push(*I);
1822 I = Available.remove(I);
1823 continue;
1824 }
1825 ++I;
1826 }
1827 }
1828 for (unsigned i = 0; Available.empty(); ++i) {
1829 assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) &&
1830 "permanent hazard"); (void)i;
1831 bumpCycle(CurrCycle + 1);
1832 releasePending();
1833 }
1834 if (Available.size() == 1)
1835 return *Available.begin();
1836 return NULL;
1837 }
1838
1839 #ifndef NDEBUG
1840 // This is useful information to dump after bumpNode.
1841 // Note that the Queue contents are more useful before pickNodeFromQueue.
1842 void SchedBoundary::dumpScheduledState() {
1843 unsigned ResFactor;
1844 unsigned ResCount;
1845 if (ZoneCritResIdx) {
1846 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
1847 ResCount = getResourceCount(ZoneCritResIdx);
1848 }
1849 else {
1850 ResFactor = SchedModel->getMicroOpFactor();
1851 ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
1852 }
1853 unsigned LFactor = SchedModel->getLatencyFactor();
1854 dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
1855 << " Retired: " << RetiredMOps;
1856 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
1857 dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
1858 << ResCount / ResFactor << " "
1859 << SchedModel->getResourceName(ZoneCritResIdx)
1860 << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
1861 << (IsResourceLimited ? " - Resource" : " - Latency")
1862 << " limited.\n";
1863 }
1864 #endif
1865
1866 //===----------------------------------------------------------------------===//
1867 // GenericScheduler - Implementation of the generic MachineSchedStrategy.
1868 //===----------------------------------------------------------------------===//
1869
1870 namespace {
1871 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance
1872 /// the schedule.
1873 class GenericScheduler : public MachineSchedStrategy {
1874 public:
1875 /// Represent the type of SchedCandidate found within a single queue.
1876 /// pickNodeBidirectional depends on these listed by decreasing priority.
1877 enum CandReason {
1878 NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax,
1879 ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
1880 TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
1881
1882 #ifndef NDEBUG
1883 static const char *getReasonStr(GenericScheduler::CandReason Reason);
1884 #endif
1885
1886 /// Policy for scheduling the next instruction in the candidate's zone.
1887 struct CandPolicy {
1888 bool ReduceLatency;
1889 unsigned ReduceResIdx;
1890 unsigned DemandResIdx;
1891
1892 CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
1893 };
1894
1895 /// Status of an instruction's critical resource consumption.
1896 struct SchedResourceDelta {
1897 // Count critical resources in the scheduled region required by SU.
1898 unsigned CritResources;
1899
1900 // Count critical resources from another region consumed by SU.
1901 unsigned DemandedResources;
1902
1903 SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
1904
1905 bool operator==(const SchedResourceDelta &RHS) const {
1906 return CritResources == RHS.CritResources
1907 && DemandedResources == RHS.DemandedResources;
1908 }
1909 bool operator!=(const SchedResourceDelta &RHS) const {
1910 return !operator==(RHS);
1911 }
1912 };
1913
1914 /// Store the state used by GenericScheduler heuristics, required for the
1915 /// lifetime of one invocation of pickNode().
1916 struct SchedCandidate {
1917 CandPolicy Policy;
1918
1919 // The best SUnit candidate.
1920 SUnit *SU;
1921
1922 // The reason for this candidate.
1923 CandReason Reason;
1924
1925 // Set of reasons that apply to multiple candidates.
1926 uint32_t RepeatReasonSet;
1927
1928 // Register pressure values for the best candidate.
1929 RegPressureDelta RPDelta;
1930
1931 // Critical resource consumption of the best candidate.
1932 SchedResourceDelta ResDelta;
1933
1934 SchedCandidate(const CandPolicy &policy)
1935 : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {}
1936
1937 bool isValid() const { return SU; }
1938
1939 // Copy the status of another candidate without changing policy.
1940 void setBest(SchedCandidate &Best) {
1941 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
1942 SU = Best.SU;
1943 Reason = Best.Reason;
1944 RPDelta = Best.RPDelta;
1945 ResDelta = Best.ResDelta;
1946 }
1947
1948 bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); }
1949 void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); }
1950
1951 void initResourceDelta(const ScheduleDAGMI *DAG,
1952 const TargetSchedModel *SchedModel);
1953 };
1954
1955 private:
1956 const MachineSchedContext *Context;
1957 ScheduleDAGMI *DAG;
1958 const TargetSchedModel *SchedModel;
1959 const TargetRegisterInfo *TRI;
1960
1961 // State of the top and bottom scheduled instruction boundaries.
1962 SchedRemainder Rem;
1963 SchedBoundary Top;
1964 SchedBoundary Bot;
1965
1966 MachineSchedPolicy RegionPolicy;
1967 public:
1968 GenericScheduler(const MachineSchedContext *C):
1969 Context(C), DAG(0), SchedModel(0), TRI(0),
1970 Top(SchedBoundary::TopQID, "TopQ"), Bot(SchedBoundary::BotQID, "BotQ") {}
1971
1972 virtual void initPolicy(MachineBasicBlock::iterator Begin,
1973 MachineBasicBlock::iterator End,
1974 unsigned NumRegionInstrs);
1975
1976 bool shouldTrackPressure() const { return RegionPolicy.ShouldTrackPressure; }
1977
1978 virtual void initialize(ScheduleDAGMI *dag);
1979
1980 virtual SUnit *pickNode(bool &IsTopNode);
1981
1982 virtual void schedNode(SUnit *SU, bool IsTopNode);
1983
1984 virtual void releaseTopNode(SUnit *SU) { Top.releaseTopNode(SU); }
1985
1986 virtual void releaseBottomNode(SUnit *SU) { Bot.releaseBottomNode(SU); }
1987
1988 virtual void registerRoots();
1989
1990 protected:
1991 void checkAcyclicLatency();
1992
1993 void setPolicy(CandPolicy &Policy, SchedBoundary &CurrZone,
1994 SchedBoundary &OtherZone);
1995
1996 void tryCandidate(SchedCandidate &Cand,
1997 SchedCandidate &TryCand,
1998 SchedBoundary &Zone,
1999 const RegPressureTracker &RPTracker,
2000 RegPressureTracker &TempTracker);
2001
2002 SUnit *pickNodeBidirectional(bool &IsTopNode);
2003
2004 void pickNodeFromQueue(SchedBoundary &Zone,
2005 const RegPressureTracker &RPTracker,
2006 SchedCandidate &Candidate);
2007
2008 void reschedulePhysRegCopies(SUnit *SU, bool isTop);
2009
2010 #ifndef NDEBUG
2011 void traceCandidate(const SchedCandidate &Cand);
2012 #endif
2013 };
2014 } // namespace
2015
2016 void GenericScheduler::initialize(ScheduleDAGMI *dag) {
2017 DAG = dag;
2018 SchedModel = DAG->getSchedModel();
2019 TRI = DAG->TRI;
2020
2021 Rem.init(DAG, SchedModel);
2022 Top.init(DAG, SchedModel, &Rem);
2023 Bot.init(DAG, SchedModel, &Rem);
2024
2025 // Initialize resource counts.
2026
2027 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2028 // are disabled, then these HazardRecs will be disabled.
2029 const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2030 const TargetMachine &TM = DAG->MF.getTarget();
2031 if (!Top.HazardRec) {
2032 Top.HazardRec =
2033 TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
2034 }
2035 if (!Bot.HazardRec) {
2036 Bot.HazardRec =
2037 TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
2038 }
2039 }
2040
2041 /// Initialize the per-region scheduling policy.
2042 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
2043 MachineBasicBlock::iterator End,
2044 unsigned NumRegionInstrs) {
2045 const TargetMachine &TM = Context->MF->getTarget();
2046
2047 // Avoid setting up the register pressure tracker for small regions to save
2048 // compile time. As a rough heuristic, only track pressure when the number of
2049 // schedulable instructions exceeds half the integer register file.
2050 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2051 TM.getTargetLowering()->getRegClassFor(MVT::i32));
2052
2053 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2054
2055 // For generic targets, we default to bottom-up, because it's simpler and more
2056 // compile-time optimizations have been implemented in that direction.
2057 RegionPolicy.OnlyBottomUp = true;
2058
2059 // Allow the subtarget to override default policy.
2060 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
2061 ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs);
2062
2063 // After subtarget overrides, apply command line options.
2064 if (!EnableRegPressure)
2065 RegionPolicy.ShouldTrackPressure = false;
2066
2067 // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2068 // e.g. -misched-bottomup=false allows scheduling in both directions.
2069 assert((!ForceTopDown || !ForceBottomUp) &&
2070 "-misched-topdown incompatible with -misched-bottomup");
2071 if (ForceBottomUp.getNumOccurrences() > 0) {
2072 RegionPolicy.OnlyBottomUp = ForceBottomUp;
2073 if (RegionPolicy.OnlyBottomUp)
2074 RegionPolicy.OnlyTopDown = false;
2075 }
2076 if (ForceTopDown.getNumOccurrences() > 0) {
2077 RegionPolicy.OnlyTopDown = ForceTopDown;
2078 if (RegionPolicy.OnlyTopDown)
2079 RegionPolicy.OnlyBottomUp = false;
2080 }
2081 }
2082
2083 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2084 /// critical path by more cycles than it takes to drain the instruction buffer.
2085 /// We estimate an upper bounds on in-flight instructions as:
2086 ///
2087 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2088 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2089 /// InFlightResources = InFlightIterations * LoopResources
2090 ///
2091 /// TODO: Check execution resources in addition to IssueCount.
2092 void GenericScheduler::checkAcyclicLatency() {
2093 if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2094 return;
2095
2096 // Scaled number of cycles per loop iteration.
2097 unsigned IterCount =
2098 std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2099 Rem.RemIssueCount);
2100 // Scaled acyclic critical path.
2101 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2102 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2103 unsigned InFlightCount =
2104 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2105 unsigned BufferLimit =
2106 SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2107
2108 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2109
2110 DEBUG(dbgs() << "IssueCycles="
2111 << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2112 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2113 << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
2114 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2115 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2116 if (Rem.IsAcyclicLatencyLimited)
2117 dbgs() << " ACYCLIC LATENCY LIMIT\n");
2118 }
2119
2120 void GenericScheduler::registerRoots() {
2121 Rem.CriticalPath = DAG->ExitSU.getDepth();
2122
2123 // Some roots may not feed into ExitSU. Check all of them in case.
2124 for (std::vector<SUnit*>::const_iterator
2125 I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
2126 if ((*I)->getDepth() > Rem.CriticalPath)
2127 Rem.CriticalPath = (*I)->getDepth();
2128 }
2129 DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
2130
2131 if (EnableCyclicPath) {
2132 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2133 checkAcyclicLatency();
2134 }
2135 }
2136
2137 /// Set the CandPolicy given a scheduling zone given the current resources and
2138 /// latencies inside and outside the zone.
2139 void GenericScheduler::setPolicy(CandPolicy &Policy, SchedBoundary &CurrZone,
2140 SchedBoundary &OtherZone) {
2141 // Apply preemptive heuristics based on the the total latency and resources
2142 // inside and outside this zone. Potential stalls should be considered before
2143 // following this policy.
1954 2144
1955 // Compute remaining latency. We need this both to determine whether the 2145 // Compute remaining latency. We need this both to determine whether the
1956 // overall schedule has become latency-limited and whether the instructions 2146 // overall schedule has become latency-limited and whether the instructions
1957 // outside this zone are resource or latency limited. 2147 // outside this zone are resource or latency limited.
1958 // 2148 //
1963 // 2153 //
1964 // The "independent" latency is the max ready queue depth: 2154 // The "independent" latency is the max ready queue depth:
1965 // ILat = max N.depth for N in Available|Pending 2155 // ILat = max N.depth for N in Available|Pending
1966 // 2156 //
1967 // RemainingLatency is the greater of independent and dependent latency. 2157 // RemainingLatency is the greater of independent and dependent latency.
1968 unsigned RemLatency = DependentLatency; 2158 unsigned RemLatency = CurrZone.getDependentLatency();
1969 RemLatency = std::max(RemLatency, findMaxLatency(Available.elements())); 2159 RemLatency = std::max(RemLatency,
1970 RemLatency = std::max(RemLatency, findMaxLatency(Pending.elements())); 2160 CurrZone.findMaxLatency(CurrZone.Available.elements()));
2161 RemLatency = std::max(RemLatency,
2162 CurrZone.findMaxLatency(CurrZone.Pending.elements()));
1971 2163
1972 // Compute the critical resource outside the zone. 2164 // Compute the critical resource outside the zone.
1973 unsigned OtherCritIdx; 2165 unsigned OtherCritIdx;
1974 unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx); 2166 unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx);
1975 2167
1976 bool OtherResLimited = false; 2168 bool OtherResLimited = false;
1977 if (SchedModel->hasInstrSchedModel()) { 2169 if (SchedModel->hasInstrSchedModel()) {
1978 unsigned LFactor = SchedModel->getLatencyFactor(); 2170 unsigned LFactor = SchedModel->getLatencyFactor();
1979 OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; 2171 OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
1980 } 2172 }
1981 if (!OtherResLimited && (RemLatency + CurrCycle > Rem->CriticalPath)) { 2173 if (!OtherResLimited
2174 && (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
1982 Policy.ReduceLatency |= true; 2175 Policy.ReduceLatency |= true;
1983 DEBUG(dbgs() << " " << Available.getName() << " RemainingLatency " 2176 DEBUG(dbgs() << " " << CurrZone.Available.getName() << " RemainingLatency "
1984 << RemLatency << " + " << CurrCycle << "c > CritPath " 2177 << RemLatency << " + " << CurrZone.getCurrCycle() << "c > CritPath "
1985 << Rem->CriticalPath << "\n"); 2178 << Rem.CriticalPath << "\n");
1986 } 2179 }
1987 // If the same resource is limiting inside and outside the zone, do nothing. 2180 // If the same resource is limiting inside and outside the zone, do nothing.
1988 if (ZoneCritResIdx == OtherCritIdx) 2181 if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
1989 return; 2182 return;
1990 2183
1991 DEBUG( 2184 DEBUG(
1992 if (IsResourceLimited) { 2185 if (CurrZone.isResourceLimited()) {
1993 dbgs() << " " << Available.getName() << " ResourceLimited: " 2186 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
1994 << getResourceName(ZoneCritResIdx) << "\n"; 2187 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
2188 << "\n";
1995 } 2189 }
1996 if (OtherResLimited) 2190 if (OtherResLimited)
1997 dbgs() << " RemainingLimit: " << getResourceName(OtherCritIdx) << "\n"; 2191 dbgs() << " RemainingLimit: "
1998 if (!IsResourceLimited && !OtherResLimited) 2192 << SchedModel->getResourceName(OtherCritIdx) << "\n";
2193 if (!CurrZone.isResourceLimited() && !OtherResLimited)
1999 dbgs() << " Latency limited both directions.\n"); 2194 dbgs() << " Latency limited both directions.\n");
2000 2195
2001 if (IsResourceLimited && !Policy.ReduceResIdx) 2196 if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2002 Policy.ReduceResIdx = ZoneCritResIdx; 2197 Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2003 2198
2004 if (OtherResLimited) 2199 if (OtherResLimited)
2005 Policy.DemandResIdx = OtherCritIdx; 2200 Policy.DemandResIdx = OtherCritIdx;
2006 } 2201 }
2007
2008 void GenericScheduler::SchedBoundary::releaseNode(SUnit *SU,
2009 unsigned ReadyCycle) {
2010 if (ReadyCycle < MinReadyCycle)
2011 MinReadyCycle = ReadyCycle;
2012
2013 // Check for interlocks first. For the purpose of other heuristics, an
2014 // instruction that cannot issue appears as if it's not in the ReadyQueue.
2015 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2016 if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
2017 Pending.push(SU);
2018 else
2019 Available.push(SU);
2020
2021 // Record this node as an immediate dependent of the scheduled node.
2022 NextSUs.insert(SU);
2023 }
2024
2025 /// Move the boundary of scheduled code by one cycle.
2026 void GenericScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) {
2027 if (SchedModel->getMicroOpBufferSize() == 0) {
2028 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
2029 if (MinReadyCycle > NextCycle)
2030 NextCycle = MinReadyCycle;
2031 }
2032 // Update the current micro-ops, which will issue in the next cycle.
2033 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2034 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2035
2036 // Decrement DependentLatency based on the next cycle.
2037 if ((NextCycle - CurrCycle) > DependentLatency)
2038 DependentLatency = 0;
2039 else
2040 DependentLatency -= (NextCycle - CurrCycle);
2041
2042 if (!HazardRec->isEnabled()) {
2043 // Bypass HazardRec virtual calls.
2044 CurrCycle = NextCycle;
2045 }
2046 else {
2047 // Bypass getHazardType calls in case of long latency.
2048 for (; CurrCycle != NextCycle; ++CurrCycle) {
2049 if (isTop())
2050 HazardRec->AdvanceCycle();
2051 else
2052 HazardRec->RecedeCycle();
2053 }
2054 }
2055 CheckPending = true;
2056 unsigned LFactor = SchedModel->getLatencyFactor();
2057 IsResourceLimited =
2058 (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
2059 > (int)LFactor;
2060
2061 DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
2062 }
2063
2064 void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx,
2065 unsigned Count) {
2066 ExecutedResCounts[PIdx] += Count;
2067 if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2068 MaxExecutedResCount = ExecutedResCounts[PIdx];
2069 }
2070
2071 /// Add the given processor resource to this scheduled zone.
2072 ///
2073 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2074 /// during which this resource is consumed.
2075 ///
2076 /// \return the next cycle at which the instruction may execute without
2077 /// oversubscribing resources.
2078 unsigned GenericScheduler::SchedBoundary::
2079 countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
2080 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2081 unsigned Count = Factor * Cycles;
2082 DEBUG(dbgs() << " " << getResourceName(PIdx)
2083 << " +" << Cycles << "x" << Factor << "u\n");
2084
2085 // Update Executed resources counts.
2086 incExecutedResources(PIdx, Count);
2087 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2088 Rem->RemainingCounts[PIdx] -= Count;
2089
2090 // Check if this resource exceeds the current critical resource. If so, it
2091 // becomes the critical resource.
2092 if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2093 ZoneCritResIdx = PIdx;
2094 DEBUG(dbgs() << " *** Critical resource "
2095 << getResourceName(PIdx) << ": "
2096 << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
2097 }
2098 // TODO: We don't yet model reserved resources. It's not hard though.
2099 return CurrCycle;
2100 }
2101
2102 /// Move the boundary of scheduled code by one SUnit.
2103 void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) {
2104 // Update the reservation table.
2105 if (HazardRec->isEnabled()) {
2106 if (!isTop() && SU->isCall) {
2107 // Calls are scheduled with their preceding instructions. For bottom-up
2108 // scheduling, clear the pipeline state before emitting.
2109 HazardRec->Reset();
2110 }
2111 HazardRec->EmitInstruction(SU);
2112 }
2113 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2114 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2115 CurrMOps += IncMOps;
2116 // checkHazard prevents scheduling multiple instructions per cycle that exceed
2117 // issue width. However, we commonly reach the maximum. In this case
2118 // opportunistically bump the cycle to avoid uselessly checking everything in
2119 // the readyQ. Furthermore, a single instruction may produce more than one
2120 // cycle's worth of micro-ops.
2121 //
2122 // TODO: Also check if this SU must end a dispatch group.
2123 unsigned NextCycle = CurrCycle;
2124 if (CurrMOps >= SchedModel->getIssueWidth()) {
2125 ++NextCycle;
2126 DEBUG(dbgs() << " *** Max MOps " << CurrMOps
2127 << " at cycle " << CurrCycle << '\n');
2128 }
2129 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2130 DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2131
2132 switch (SchedModel->getMicroOpBufferSize()) {
2133 case 0:
2134 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2135 break;
2136 case 1:
2137 if (ReadyCycle > NextCycle) {
2138 NextCycle = ReadyCycle;
2139 DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2140 }
2141 break;
2142 default:
2143 // We don't currently model the OOO reorder buffer, so consider all
2144 // scheduled MOps to be "retired".
2145 break;
2146 }
2147 RetiredMOps += IncMOps;
2148
2149 // Update resource counts and critical resource.
2150 if (SchedModel->hasInstrSchedModel()) {
2151 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2152 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2153 Rem->RemIssueCount -= DecRemIssue;
2154 if (ZoneCritResIdx) {
2155 // Scale scheduled micro-ops for comparing with the critical resource.
2156 unsigned ScaledMOps =
2157 RetiredMOps * SchedModel->getMicroOpFactor();
2158
2159 // If scaled micro-ops are now more than the previous critical resource by
2160 // a full cycle, then micro-ops issue becomes critical.
2161 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2162 >= (int)SchedModel->getLatencyFactor()) {
2163 ZoneCritResIdx = 0;
2164 DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2165 << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
2166 }
2167 }
2168 for (TargetSchedModel::ProcResIter
2169 PI = SchedModel->getWriteProcResBegin(SC),
2170 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2171 unsigned RCycle =
2172 countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle);
2173 if (RCycle > NextCycle)
2174 NextCycle = RCycle;
2175 }
2176 }
2177 // Update ExpectedLatency and DependentLatency.
2178 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2179 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2180 if (SU->getDepth() > TopLatency) {
2181 TopLatency = SU->getDepth();
2182 DEBUG(dbgs() << " " << Available.getName()
2183 << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
2184 }
2185 if (SU->getHeight() > BotLatency) {
2186 BotLatency = SU->getHeight();
2187 DEBUG(dbgs() << " " << Available.getName()
2188 << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
2189 }
2190 // If we stall for any reason, bump the cycle.
2191 if (NextCycle > CurrCycle) {
2192 bumpCycle(NextCycle);
2193 }
2194 else {
2195 // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2196 // resource limited. If a stall occured, bumpCycle does this.
2197 unsigned LFactor = SchedModel->getLatencyFactor();
2198 IsResourceLimited =
2199 (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
2200 > (int)LFactor;
2201 }
2202 DEBUG(dumpScheduledState());
2203 }
2204
2205 /// Release pending ready nodes in to the available queue. This makes them
2206 /// visible to heuristics.
2207 void GenericScheduler::SchedBoundary::releasePending() {
2208 // If the available queue is empty, it is safe to reset MinReadyCycle.
2209 if (Available.empty())
2210 MinReadyCycle = UINT_MAX;
2211
2212 // Check to see if any of the pending instructions are ready to issue. If
2213 // so, add them to the available queue.
2214 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2215 for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2216 SUnit *SU = *(Pending.begin()+i);
2217 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2218
2219 if (ReadyCycle < MinReadyCycle)
2220 MinReadyCycle = ReadyCycle;
2221
2222 if (!IsBuffered && ReadyCycle > CurrCycle)
2223 continue;
2224
2225 if (checkHazard(SU))
2226 continue;
2227
2228 Available.push(SU);
2229 Pending.remove(Pending.begin()+i);
2230 --i; --e;
2231 }
2232 DEBUG(if (!Pending.empty()) Pending.dump());
2233 CheckPending = false;
2234 }
2235
2236 /// Remove SU from the ready set for this boundary.
2237 void GenericScheduler::SchedBoundary::removeReady(SUnit *SU) {
2238 if (Available.isInQueue(SU))
2239 Available.remove(Available.find(SU));
2240 else {
2241 assert(Pending.isInQueue(SU) && "bad ready count");
2242 Pending.remove(Pending.find(SU));
2243 }
2244 }
2245
2246 /// If this queue only has one ready candidate, return it. As a side effect,
2247 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2248 /// one node is ready. If multiple instructions are ready, return NULL.
2249 SUnit *GenericScheduler::SchedBoundary::pickOnlyChoice() {
2250 if (CheckPending)
2251 releasePending();
2252
2253 if (CurrMOps > 0) {
2254 // Defer any ready instrs that now have a hazard.
2255 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2256 if (checkHazard(*I)) {
2257 Pending.push(*I);
2258 I = Available.remove(I);
2259 continue;
2260 }
2261 ++I;
2262 }
2263 }
2264 for (unsigned i = 0; Available.empty(); ++i) {
2265 assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) &&
2266 "permanent hazard"); (void)i;
2267 bumpCycle(CurrCycle + 1);
2268 releasePending();
2269 }
2270 if (Available.size() == 1)
2271 return *Available.begin();
2272 return NULL;
2273 }
2274
2275 #ifndef NDEBUG
2276 // This is useful information to dump after bumpNode.
2277 // Note that the Queue contents are more useful before pickNodeFromQueue.
2278 void GenericScheduler::SchedBoundary::dumpScheduledState() {
2279 unsigned ResFactor;
2280 unsigned ResCount;
2281 if (ZoneCritResIdx) {
2282 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2283 ResCount = getResourceCount(ZoneCritResIdx);
2284 }
2285 else {
2286 ResFactor = SchedModel->getMicroOpFactor();
2287 ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
2288 }
2289 unsigned LFactor = SchedModel->getLatencyFactor();
2290 dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2291 << " Retired: " << RetiredMOps;
2292 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2293 dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2294 << ResCount / ResFactor << " " << getResourceName(ZoneCritResIdx)
2295 << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2296 << (IsResourceLimited ? " - Resource" : " - Latency")
2297 << " limited.\n";
2298 }
2299 #endif
2300 2202
2301 void GenericScheduler::SchedCandidate:: 2203 void GenericScheduler::SchedCandidate::
2302 initResourceDelta(const ScheduleDAGMI *DAG, 2204 initResourceDelta(const ScheduleDAGMI *DAG,
2303 const TargetSchedModel *SchedModel) { 2205 const TargetSchedModel *SchedModel) {
2304 if (!Policy.ReduceResIdx && !Policy.DemandResIdx) 2206 if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2312 ResDelta.CritResources += PI->Cycles; 2214 ResDelta.CritResources += PI->Cycles;
2313 if (PI->ProcResourceIdx == Policy.DemandResIdx) 2215 if (PI->ProcResourceIdx == Policy.DemandResIdx)
2314 ResDelta.DemandedResources += PI->Cycles; 2216 ResDelta.DemandedResources += PI->Cycles;
2315 } 2217 }
2316 } 2218 }
2317
2318 2219
2319 /// Return true if this heuristic determines order. 2220 /// Return true if this heuristic determines order.
2320 static bool tryLess(int TryVal, int CandVal, 2221 static bool tryLess(int TryVal, int CandVal,
2321 GenericScheduler::SchedCandidate &TryCand, 2222 GenericScheduler::SchedCandidate &TryCand,
2322 GenericScheduler::SchedCandidate &Cand, 2223 GenericScheduler::SchedCandidate &Cand,
2407 return 0; 2308 return 0;
2408 } 2309 }
2409 2310
2410 static bool tryLatency(GenericScheduler::SchedCandidate &TryCand, 2311 static bool tryLatency(GenericScheduler::SchedCandidate &TryCand,
2411 GenericScheduler::SchedCandidate &Cand, 2312 GenericScheduler::SchedCandidate &Cand,
2412 GenericScheduler::SchedBoundary &Zone) { 2313 SchedBoundary &Zone) {
2413 if (Zone.isTop()) { 2314 if (Zone.isTop()) {
2414 if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { 2315 if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2415 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), 2316 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2416 TryCand, Cand, GenericScheduler::TopDepthReduce)) 2317 TryCand, Cand, GenericScheduler::TopDepthReduce))
2417 return true; 2318 return true;
2443 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 2344 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2444 /// \param Zone describes the scheduled zone that we are extending. 2345 /// \param Zone describes the scheduled zone that we are extending.
2445 /// \param RPTracker describes reg pressure within the scheduled zone. 2346 /// \param RPTracker describes reg pressure within the scheduled zone.
2446 /// \param TempTracker is a scratch pressure tracker to reuse in queries. 2347 /// \param TempTracker is a scratch pressure tracker to reuse in queries.
2447 void GenericScheduler::tryCandidate(SchedCandidate &Cand, 2348 void GenericScheduler::tryCandidate(SchedCandidate &Cand,
2448 SchedCandidate &TryCand, 2349 SchedCandidate &TryCand,
2449 SchedBoundary &Zone, 2350 SchedBoundary &Zone,
2450 const RegPressureTracker &RPTracker, 2351 const RegPressureTracker &RPTracker,
2451 RegPressureTracker &TempTracker) { 2352 RegPressureTracker &TempTracker) {
2452 2353
2453 if (DAG->isTrackingPressure()) { 2354 if (DAG->isTrackingPressure()) {
2454 // Always initialize TryCand's RPDelta. 2355 // Always initialize TryCand's RPDelta.
2455 if (Zone.isTop()) { 2356 if (Zone.isTop()) {
2456 TempTracker.getMaxDownwardPressureDelta( 2357 TempTracker.getMaxDownwardPressureDelta(
2508 return; 2409 return;
2509 2410
2510 // For loops that are acyclic path limited, aggressively schedule for latency. 2411 // For loops that are acyclic path limited, aggressively schedule for latency.
2511 // This can result in very long dependence chains scheduled in sequence, so 2412 // This can result in very long dependence chains scheduled in sequence, so
2512 // once every cycle (when CurrMOps == 0), switch to normal heuristics. 2413 // once every cycle (when CurrMOps == 0), switch to normal heuristics.
2513 if (Rem.IsAcyclicLatencyLimited && !Zone.CurrMOps 2414 if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps()
2514 && tryLatency(TryCand, Cand, Zone)) 2415 && tryLatency(TryCand, Cand, Zone))
2416 return;
2417
2418 // Prioritize instructions that read unbuffered resources by stall cycles.
2419 if (tryLess(Zone.getLatencyStallCycles(TryCand.SU),
2420 Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
2515 return; 2421 return;
2516 2422
2517 // Keep clustered nodes together to encourage downstream peephole 2423 // Keep clustered nodes together to encourage downstream peephole
2518 // optimizations which may reduce resource requirements. 2424 // optimizations which may reduce resource requirements.
2519 // 2425 //
2556 } 2462 }
2557 2463
2558 // Prefer immediate defs/users of the last scheduled instruction. This is a 2464 // Prefer immediate defs/users of the last scheduled instruction. This is a
2559 // local pressure avoidance strategy that also makes the machine code 2465 // local pressure avoidance strategy that also makes the machine code
2560 // readable. 2466 // readable.
2561 if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), 2467 if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU),
2562 TryCand, Cand, NextDefUse)) 2468 TryCand, Cand, NextDefUse))
2563 return; 2469 return;
2564 2470
2565 // Fall through to original instruction order. 2471 // Fall through to original instruction order.
2566 if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) 2472 if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
2575 switch (Reason) { 2481 switch (Reason) {
2576 case NoCand: return "NOCAND "; 2482 case NoCand: return "NOCAND ";
2577 case PhysRegCopy: return "PREG-COPY"; 2483 case PhysRegCopy: return "PREG-COPY";
2578 case RegExcess: return "REG-EXCESS"; 2484 case RegExcess: return "REG-EXCESS";
2579 case RegCritical: return "REG-CRIT "; 2485 case RegCritical: return "REG-CRIT ";
2486 case Stall: return "STALL ";
2580 case Cluster: return "CLUSTER "; 2487 case Cluster: return "CLUSTER ";
2581 case Weak: return "WEAK "; 2488 case Weak: return "WEAK ";
2582 case RegMax: return "REG-MAX "; 2489 case RegMax: return "REG-MAX ";
2583 case ResourceReduce: return "RES-REDUCE"; 2490 case ResourceReduce: return "RES-REDUCE";
2584 case ResourceDemand: return "RES-DEMAND"; 2491 case ResourceDemand: return "RES-DEMAND";
2649 /// 2556 ///
2650 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 2557 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
2651 /// DAG building. To adjust for the current scheduling location we need to 2558 /// DAG building. To adjust for the current scheduling location we need to
2652 /// maintain the number of vreg uses remaining to be top-scheduled. 2559 /// maintain the number of vreg uses remaining to be top-scheduled.
2653 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, 2560 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
2654 const RegPressureTracker &RPTracker, 2561 const RegPressureTracker &RPTracker,
2655 SchedCandidate &Cand) { 2562 SchedCandidate &Cand) {
2656 ReadyQueue &Q = Zone.Available; 2563 ReadyQueue &Q = Zone.Available;
2657 2564
2658 DEBUG(Q.dump()); 2565 DEBUG(Q.dump());
2659 2566
2660 // getMaxPressureDelta temporarily modifies the tracker. 2567 // getMaxPressureDelta temporarily modifies the tracker.
2696 return SU; 2603 return SU;
2697 } 2604 }
2698 CandPolicy NoPolicy; 2605 CandPolicy NoPolicy;
2699 SchedCandidate BotCand(NoPolicy); 2606 SchedCandidate BotCand(NoPolicy);
2700 SchedCandidate TopCand(NoPolicy); 2607 SchedCandidate TopCand(NoPolicy);
2701 Bot.setPolicy(BotCand.Policy, Top); 2608 // Set the bottom-up policy based on the state of the current bottom zone and
2702 Top.setPolicy(TopCand.Policy, Bot); 2609 // the instructions outside the zone, including the top zone.
2610 setPolicy(BotCand.Policy, Bot, Top);
2611 // Set the top-down policy based on the state of the current top zone and
2612 // the instructions outside the zone, including the bottom zone.
2613 setPolicy(TopCand.Policy, Top, Bot);
2703 2614
2704 // Prefer bottom scheduling when heuristics are silent. 2615 // Prefer bottom scheduling when heuristics are silent.
2705 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 2616 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2706 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 2617 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2707 2618
2814 /// 2725 ///
2815 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling 2726 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
2816 /// them here. See comments in biasPhysRegCopy. 2727 /// them here. See comments in biasPhysRegCopy.
2817 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { 2728 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
2818 if (IsTopNode) { 2729 if (IsTopNode) {
2819 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle); 2730 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
2820 Top.bumpNode(SU); 2731 Top.bumpNode(SU);
2821 if (SU->hasPhysRegUses) 2732 if (SU->hasPhysRegUses)
2822 reschedulePhysRegCopies(SU, true); 2733 reschedulePhysRegCopies(SU, true);
2823 } 2734 }
2824 else { 2735 else {
2825 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.CurrCycle); 2736 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
2826 Bot.bumpNode(SU); 2737 Bot.bumpNode(SU);
2827 if (SU->hasPhysRegDefs) 2738 if (SU->hasPhysRegDefs)
2828 reschedulePhysRegCopies(SU, false); 2739 reschedulePhysRegCopies(SU, false);
2829 } 2740 }
2830 } 2741 }