Mercurial > hg > Members > tobaru > cbc > CbC_llvm
comparison lib/CodeGen/MachineScheduler.cpp @ 33:e4204d083e25
LLVM 3.5
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 12 Dec 2013 14:32:10 +0900 |
parents | 95c75e76d11b |
children | 54457678186b |
comparison
equal
deleted
inserted
replaced
1:f783a2dd24b1 | 33:e4204d083e25 |
---|---|
1317 constrainLocalCopy(SU, DAG); | 1317 constrainLocalCopy(SU, DAG); |
1318 } | 1318 } |
1319 } | 1319 } |
1320 | 1320 |
1321 //===----------------------------------------------------------------------===// | 1321 //===----------------------------------------------------------------------===// |
1322 // GenericScheduler - Implementation of the generic MachineSchedStrategy. | 1322 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler |
1323 //===----------------------------------------------------------------------===// | 1323 // and possibly other custom schedulers. |
1324 | 1324 // ===----------------------------------------------------------------------===/ |
1325 namespace { | 1325 |
1326 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance | 1326 static const unsigned InvalidCycle = ~0U; |
1327 /// the schedule. | 1327 |
1328 class GenericScheduler : public MachineSchedStrategy { | 1328 SchedBoundary::~SchedBoundary() { delete HazardRec; } |
1329 public: | 1329 |
1330 /// Represent the type of SchedCandidate found within a single queue. | 1330 void SchedBoundary::reset() { |
1331 /// pickNodeBidirectional depends on these listed by decreasing priority. | 1331 // A new HazardRec is created for each DAG and owned by SchedBoundary. |
1332 enum CandReason { | 1332 // Destroying and reconstructing it is very expensive though. So keep |
1333 NoCand, PhysRegCopy, RegExcess, RegCritical, Cluster, Weak, RegMax, | 1333 // invalid, placeholder HazardRecs. |
1334 ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, | 1334 if (HazardRec && HazardRec->isEnabled()) { |
1335 TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; | 1335 delete HazardRec; |
1336 | 1336 HazardRec = 0; |
1337 } | |
1338 Available.clear(); | |
1339 Pending.clear(); | |
1340 CheckPending = false; | |
1341 NextSUs.clear(); | |
1342 CurrCycle = 0; | |
1343 CurrMOps = 0; | |
1344 MinReadyCycle = UINT_MAX; | |
1345 ExpectedLatency = 0; | |
1346 DependentLatency = 0; | |
1347 RetiredMOps = 0; | |
1348 MaxExecutedResCount = 0; | |
1349 ZoneCritResIdx = 0; | |
1350 IsResourceLimited = false; | |
1351 ReservedCycles.clear(); | |
1337 #ifndef NDEBUG | 1352 #ifndef NDEBUG |
1338 static const char *getReasonStr(GenericScheduler::CandReason Reason); | 1353 MaxObservedLatency = 0; |
1339 #endif | 1354 #endif |
1340 | 1355 // Reserve a zero-count for invalid CritResIdx. |
1341 /// Policy for scheduling the next instruction in the candidate's zone. | 1356 ExecutedResCounts.resize(1); |
1342 struct CandPolicy { | 1357 assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); |
1343 bool ReduceLatency; | 1358 } |
1344 unsigned ReduceResIdx; | 1359 |
1345 unsigned DemandResIdx; | 1360 void SchedRemainder:: |
1346 | |
1347 CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} | |
1348 }; | |
1349 | |
1350 /// Status of an instruction's critical resource consumption. | |
1351 struct SchedResourceDelta { | |
1352 // Count critical resources in the scheduled region required by SU. | |
1353 unsigned CritResources; | |
1354 | |
1355 // Count critical resources from another region consumed by SU. | |
1356 unsigned DemandedResources; | |
1357 | |
1358 SchedResourceDelta(): CritResources(0), DemandedResources(0) {} | |
1359 | |
1360 bool operator==(const SchedResourceDelta &RHS) const { | |
1361 return CritResources == RHS.CritResources | |
1362 && DemandedResources == RHS.DemandedResources; | |
1363 } | |
1364 bool operator!=(const SchedResourceDelta &RHS) const { | |
1365 return !operator==(RHS); | |
1366 } | |
1367 }; | |
1368 | |
1369 /// Store the state used by GenericScheduler heuristics, required for the | |
1370 /// lifetime of one invocation of pickNode(). | |
1371 struct SchedCandidate { | |
1372 CandPolicy Policy; | |
1373 | |
1374 // The best SUnit candidate. | |
1375 SUnit *SU; | |
1376 | |
1377 // The reason for this candidate. | |
1378 CandReason Reason; | |
1379 | |
1380 // Set of reasons that apply to multiple candidates. | |
1381 uint32_t RepeatReasonSet; | |
1382 | |
1383 // Register pressure values for the best candidate. | |
1384 RegPressureDelta RPDelta; | |
1385 | |
1386 // Critical resource consumption of the best candidate. | |
1387 SchedResourceDelta ResDelta; | |
1388 | |
1389 SchedCandidate(const CandPolicy &policy) | |
1390 : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {} | |
1391 | |
1392 bool isValid() const { return SU; } | |
1393 | |
1394 // Copy the status of another candidate without changing policy. | |
1395 void setBest(SchedCandidate &Best) { | |
1396 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); | |
1397 SU = Best.SU; | |
1398 Reason = Best.Reason; | |
1399 RPDelta = Best.RPDelta; | |
1400 ResDelta = Best.ResDelta; | |
1401 } | |
1402 | |
1403 bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); } | |
1404 void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); } | |
1405 | |
1406 void initResourceDelta(const ScheduleDAGMI *DAG, | |
1407 const TargetSchedModel *SchedModel); | |
1408 }; | |
1409 | |
1410 /// Summarize the unscheduled region. | |
1411 struct SchedRemainder { | |
1412 // Critical path through the DAG in expected latency. | |
1413 unsigned CriticalPath; | |
1414 unsigned CyclicCritPath; | |
1415 | |
1416 // Scaled count of micro-ops left to schedule. | |
1417 unsigned RemIssueCount; | |
1418 | |
1419 bool IsAcyclicLatencyLimited; | |
1420 | |
1421 // Unscheduled resources | |
1422 SmallVector<unsigned, 16> RemainingCounts; | |
1423 | |
1424 void reset() { | |
1425 CriticalPath = 0; | |
1426 CyclicCritPath = 0; | |
1427 RemIssueCount = 0; | |
1428 IsAcyclicLatencyLimited = false; | |
1429 RemainingCounts.clear(); | |
1430 } | |
1431 | |
1432 SchedRemainder() { reset(); } | |
1433 | |
1434 void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); | |
1435 }; | |
1436 | |
1437 /// Each Scheduling boundary is associated with ready queues. It tracks the | |
1438 /// current cycle in the direction of movement, and maintains the state | |
1439 /// of "hazards" and other interlocks at the current cycle. | |
1440 struct SchedBoundary { | |
1441 ScheduleDAGMI *DAG; | |
1442 const TargetSchedModel *SchedModel; | |
1443 SchedRemainder *Rem; | |
1444 | |
1445 ReadyQueue Available; | |
1446 ReadyQueue Pending; | |
1447 bool CheckPending; | |
1448 | |
1449 // For heuristics, keep a list of the nodes that immediately depend on the | |
1450 // most recently scheduled node. | |
1451 SmallPtrSet<const SUnit*, 8> NextSUs; | |
1452 | |
1453 ScheduleHazardRecognizer *HazardRec; | |
1454 | |
1455 /// Number of cycles it takes to issue the instructions scheduled in this | |
1456 /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. | |
1457 /// See getStalls(). | |
1458 unsigned CurrCycle; | |
1459 | |
1460 /// Micro-ops issued in the current cycle | |
1461 unsigned CurrMOps; | |
1462 | |
1463 /// MinReadyCycle - Cycle of the soonest available instruction. | |
1464 unsigned MinReadyCycle; | |
1465 | |
1466 // The expected latency of the critical path in this scheduled zone. | |
1467 unsigned ExpectedLatency; | |
1468 | |
1469 // The latency of dependence chains leading into this zone. | |
1470 // For each node scheduled bottom-up: DLat = max DLat, N.Depth. | |
1471 // For each cycle scheduled: DLat -= 1. | |
1472 unsigned DependentLatency; | |
1473 | |
1474 /// Count the scheduled (issued) micro-ops that can be retired by | |
1475 /// time=CurrCycle assuming the first scheduled instr is retired at time=0. | |
1476 unsigned RetiredMOps; | |
1477 | |
1478 // Count scheduled resources that have been executed. Resources are | |
1479 // considered executed if they become ready in the time that it takes to | |
1480 // saturate any resource including the one in question. Counts are scaled | |
1481 // for direct comparison with other resources. Counts can be compared with | |
1482 // MOps * getMicroOpFactor and Latency * getLatencyFactor. | |
1483 SmallVector<unsigned, 16> ExecutedResCounts; | |
1484 | |
1485 /// Cache the max count for a single resource. | |
1486 unsigned MaxExecutedResCount; | |
1487 | |
1488 // Cache the critical resources ID in this scheduled zone. | |
1489 unsigned ZoneCritResIdx; | |
1490 | |
1491 // Is the scheduled region resource limited vs. latency limited. | |
1492 bool IsResourceLimited; | |
1493 | |
1494 #ifndef NDEBUG | |
1495 // Remember the greatest operand latency as an upper bound on the number of | |
1496 // times we should retry the pending queue because of a hazard. | |
1497 unsigned MaxObservedLatency; | |
1498 #endif | |
1499 | |
1500 void reset() { | |
1501 // A new HazardRec is created for each DAG and owned by SchedBoundary. | |
1502 // Destroying and reconstructing it is very expensive though. So keep | |
1503 // invalid, placeholder HazardRecs. | |
1504 if (HazardRec && HazardRec->isEnabled()) { | |
1505 delete HazardRec; | |
1506 HazardRec = 0; | |
1507 } | |
1508 Available.clear(); | |
1509 Pending.clear(); | |
1510 CheckPending = false; | |
1511 NextSUs.clear(); | |
1512 CurrCycle = 0; | |
1513 CurrMOps = 0; | |
1514 MinReadyCycle = UINT_MAX; | |
1515 ExpectedLatency = 0; | |
1516 DependentLatency = 0; | |
1517 RetiredMOps = 0; | |
1518 MaxExecutedResCount = 0; | |
1519 ZoneCritResIdx = 0; | |
1520 IsResourceLimited = false; | |
1521 #ifndef NDEBUG | |
1522 MaxObservedLatency = 0; | |
1523 #endif | |
1524 // Reserve a zero-count for invalid CritResIdx. | |
1525 ExecutedResCounts.resize(1); | |
1526 assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); | |
1527 } | |
1528 | |
1529 /// Pending queues extend the ready queues with the same ID and the | |
1530 /// PendingFlag set. | |
1531 SchedBoundary(unsigned ID, const Twine &Name): | |
1532 DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), | |
1533 Pending(ID << GenericScheduler::LogMaxQID, Name+".P"), | |
1534 HazardRec(0) { | |
1535 reset(); | |
1536 } | |
1537 | |
1538 ~SchedBoundary() { delete HazardRec; } | |
1539 | |
1540 void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, | |
1541 SchedRemainder *rem); | |
1542 | |
1543 bool isTop() const { | |
1544 return Available.getID() == GenericScheduler::TopQID; | |
1545 } | |
1546 | |
1547 #ifndef NDEBUG | |
1548 const char *getResourceName(unsigned PIdx) { | |
1549 if (!PIdx) | |
1550 return "MOps"; | |
1551 return SchedModel->getProcResource(PIdx)->Name; | |
1552 } | |
1553 #endif | |
1554 | |
1555 /// Get the number of latency cycles "covered" by the scheduled | |
1556 /// instructions. This is the larger of the critical path within the zone | |
1557 /// and the number of cycles required to issue the instructions. | |
1558 unsigned getScheduledLatency() const { | |
1559 return std::max(ExpectedLatency, CurrCycle); | |
1560 } | |
1561 | |
1562 unsigned getUnscheduledLatency(SUnit *SU) const { | |
1563 return isTop() ? SU->getHeight() : SU->getDepth(); | |
1564 } | |
1565 | |
1566 unsigned getResourceCount(unsigned ResIdx) const { | |
1567 return ExecutedResCounts[ResIdx]; | |
1568 } | |
1569 | |
1570 /// Get the scaled count of scheduled micro-ops and resources, including | |
1571 /// executed resources. | |
1572 unsigned getCriticalCount() const { | |
1573 if (!ZoneCritResIdx) | |
1574 return RetiredMOps * SchedModel->getMicroOpFactor(); | |
1575 return getResourceCount(ZoneCritResIdx); | |
1576 } | |
1577 | |
1578 /// Get a scaled count for the minimum execution time of the scheduled | |
1579 /// micro-ops that are ready to execute by getExecutedCount. Notice the | |
1580 /// feedback loop. | |
1581 unsigned getExecutedCount() const { | |
1582 return std::max(CurrCycle * SchedModel->getLatencyFactor(), | |
1583 MaxExecutedResCount); | |
1584 } | |
1585 | |
1586 bool checkHazard(SUnit *SU); | |
1587 | |
1588 unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs); | |
1589 | |
1590 unsigned getOtherResourceCount(unsigned &OtherCritIdx); | |
1591 | |
1592 void setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone); | |
1593 | |
1594 void releaseNode(SUnit *SU, unsigned ReadyCycle); | |
1595 | |
1596 void bumpCycle(unsigned NextCycle); | |
1597 | |
1598 void incExecutedResources(unsigned PIdx, unsigned Count); | |
1599 | |
1600 unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle); | |
1601 | |
1602 void bumpNode(SUnit *SU); | |
1603 | |
1604 void releasePending(); | |
1605 | |
1606 void removeReady(SUnit *SU); | |
1607 | |
1608 SUnit *pickOnlyChoice(); | |
1609 | |
1610 #ifndef NDEBUG | |
1611 void dumpScheduledState(); | |
1612 #endif | |
1613 }; | |
1614 | |
1615 private: | |
1616 const MachineSchedContext *Context; | |
1617 ScheduleDAGMI *DAG; | |
1618 const TargetSchedModel *SchedModel; | |
1619 const TargetRegisterInfo *TRI; | |
1620 | |
1621 // State of the top and bottom scheduled instruction boundaries. | |
1622 SchedRemainder Rem; | |
1623 SchedBoundary Top; | |
1624 SchedBoundary Bot; | |
1625 | |
1626 MachineSchedPolicy RegionPolicy; | |
1627 public: | |
1628 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) | |
1629 enum { | |
1630 TopQID = 1, | |
1631 BotQID = 2, | |
1632 LogMaxQID = 2 | |
1633 }; | |
1634 | |
1635 GenericScheduler(const MachineSchedContext *C): | |
1636 Context(C), DAG(0), SchedModel(0), TRI(0), | |
1637 Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} | |
1638 | |
1639 virtual void initPolicy(MachineBasicBlock::iterator Begin, | |
1640 MachineBasicBlock::iterator End, | |
1641 unsigned NumRegionInstrs); | |
1642 | |
1643 bool shouldTrackPressure() const { return RegionPolicy.ShouldTrackPressure; } | |
1644 | |
1645 virtual void initialize(ScheduleDAGMI *dag); | |
1646 | |
1647 virtual SUnit *pickNode(bool &IsTopNode); | |
1648 | |
1649 virtual void schedNode(SUnit *SU, bool IsTopNode); | |
1650 | |
1651 virtual void releaseTopNode(SUnit *SU); | |
1652 | |
1653 virtual void releaseBottomNode(SUnit *SU); | |
1654 | |
1655 virtual void registerRoots(); | |
1656 | |
1657 protected: | |
1658 void checkAcyclicLatency(); | |
1659 | |
1660 void tryCandidate(SchedCandidate &Cand, | |
1661 SchedCandidate &TryCand, | |
1662 SchedBoundary &Zone, | |
1663 const RegPressureTracker &RPTracker, | |
1664 RegPressureTracker &TempTracker); | |
1665 | |
1666 SUnit *pickNodeBidirectional(bool &IsTopNode); | |
1667 | |
1668 void pickNodeFromQueue(SchedBoundary &Zone, | |
1669 const RegPressureTracker &RPTracker, | |
1670 SchedCandidate &Candidate); | |
1671 | |
1672 void reschedulePhysRegCopies(SUnit *SU, bool isTop); | |
1673 | |
1674 #ifndef NDEBUG | |
1675 void traceCandidate(const SchedCandidate &Cand); | |
1676 #endif | |
1677 }; | |
1678 } // namespace | |
1679 | |
1680 void GenericScheduler::SchedRemainder:: | |
1681 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { | 1361 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { |
1682 reset(); | 1362 reset(); |
1683 if (!SchedModel->hasInstrSchedModel()) | 1363 if (!SchedModel->hasInstrSchedModel()) |
1684 return; | 1364 return; |
1685 RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); | 1365 RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); |
1696 RemainingCounts[PIdx] += (Factor * PI->Cycles); | 1376 RemainingCounts[PIdx] += (Factor * PI->Cycles); |
1697 } | 1377 } |
1698 } | 1378 } |
1699 } | 1379 } |
1700 | 1380 |
1701 void GenericScheduler::SchedBoundary:: | 1381 void SchedBoundary:: |
1702 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { | 1382 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { |
1703 reset(); | 1383 reset(); |
1704 DAG = dag; | 1384 DAG = dag; |
1705 SchedModel = smodel; | 1385 SchedModel = smodel; |
1706 Rem = rem; | 1386 Rem = rem; |
1707 if (SchedModel->hasInstrSchedModel()) | 1387 if (SchedModel->hasInstrSchedModel()) { |
1708 ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); | 1388 ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); |
1709 } | 1389 ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle); |
1710 | 1390 } |
1711 /// Initialize the per-region scheduling policy. | 1391 } |
1712 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, | 1392 |
1713 MachineBasicBlock::iterator End, | 1393 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat |
1714 unsigned NumRegionInstrs) { | 1394 /// these "soft stalls" differently than the hard stall cycles based on CPU |
1715 const TargetMachine &TM = Context->MF->getTarget(); | 1395 /// resources and computed by checkHazard(). A fully in-order model |
1716 | 1396 /// (MicroOpBufferSize==0) will not make use of this since instructions are not |
1717 // Avoid setting up the register pressure tracker for small regions to save | 1397 /// available for scheduling until they are ready. However, a weaker in-order |
1718 // compile time. As a rough heuristic, only track pressure when the number of | 1398 /// model may use this for heuristics. For example, if a processor has in-order |
1719 // schedulable instructions exceeds half the integer register file. | 1399 /// behavior when reading certain resources, this may come into play. |
1720 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( | 1400 unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) { |
1721 TM.getTargetLowering()->getRegClassFor(MVT::i32)); | 1401 if (!SU->isUnbuffered) |
1722 | 1402 return 0; |
1723 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); | 1403 |
1724 | 1404 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); |
1725 // For generic targets, we default to bottom-up, because it's simpler and more | 1405 if (ReadyCycle > CurrCycle) |
1726 // compile-time optimizations have been implemented in that direction. | 1406 return ReadyCycle - CurrCycle; |
1727 RegionPolicy.OnlyBottomUp = true; | 1407 return 0; |
1728 | 1408 } |
1729 // Allow the subtarget to override default policy. | 1409 |
1730 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); | 1410 /// Compute the next cycle at which the given processor resource can be |
1731 ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs); | 1411 /// scheduled. |
1732 | 1412 unsigned SchedBoundary:: |
1733 // After subtarget overrides, apply command line options. | 1413 getNextResourceCycle(unsigned PIdx, unsigned Cycles) { |
1734 if (!EnableRegPressure) | 1414 unsigned NextUnreserved = ReservedCycles[PIdx]; |
1735 RegionPolicy.ShouldTrackPressure = false; | 1415 // If this resource has never been used, always return cycle zero. |
1736 | 1416 if (NextUnreserved == InvalidCycle) |
1737 // Check -misched-topdown/bottomup can force or unforce scheduling direction. | 1417 return 0; |
1738 // e.g. -misched-bottomup=false allows scheduling in both directions. | 1418 // For bottom-up scheduling add the cycles needed for the current operation. |
1739 assert((!ForceTopDown || !ForceBottomUp) && | 1419 if (!isTop()) |
1740 "-misched-topdown incompatible with -misched-bottomup"); | 1420 NextUnreserved += Cycles; |
1741 if (ForceBottomUp.getNumOccurrences() > 0) { | 1421 return NextUnreserved; |
1742 RegionPolicy.OnlyBottomUp = ForceBottomUp; | |
1743 if (RegionPolicy.OnlyBottomUp) | |
1744 RegionPolicy.OnlyTopDown = false; | |
1745 } | |
1746 if (ForceTopDown.getNumOccurrences() > 0) { | |
1747 RegionPolicy.OnlyTopDown = ForceTopDown; | |
1748 if (RegionPolicy.OnlyTopDown) | |
1749 RegionPolicy.OnlyBottomUp = false; | |
1750 } | |
1751 } | |
1752 | |
1753 void GenericScheduler::initialize(ScheduleDAGMI *dag) { | |
1754 DAG = dag; | |
1755 SchedModel = DAG->getSchedModel(); | |
1756 TRI = DAG->TRI; | |
1757 | |
1758 Rem.init(DAG, SchedModel); | |
1759 Top.init(DAG, SchedModel, &Rem); | |
1760 Bot.init(DAG, SchedModel, &Rem); | |
1761 | |
1762 // Initialize resource counts. | |
1763 | |
1764 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or | |
1765 // are disabled, then these HazardRecs will be disabled. | |
1766 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); | |
1767 const TargetMachine &TM = DAG->MF.getTarget(); | |
1768 if (!Top.HazardRec) { | |
1769 Top.HazardRec = | |
1770 TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); | |
1771 } | |
1772 if (!Bot.HazardRec) { | |
1773 Bot.HazardRec = | |
1774 TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); | |
1775 } | |
1776 } | |
1777 | |
1778 void GenericScheduler::releaseTopNode(SUnit *SU) { | |
1779 if (SU->isScheduled) | |
1780 return; | |
1781 | |
1782 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); | |
1783 I != E; ++I) { | |
1784 if (I->isWeak()) | |
1785 continue; | |
1786 unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; | |
1787 unsigned Latency = I->getLatency(); | |
1788 #ifndef NDEBUG | |
1789 Top.MaxObservedLatency = std::max(Latency, Top.MaxObservedLatency); | |
1790 #endif | |
1791 if (SU->TopReadyCycle < PredReadyCycle + Latency) | |
1792 SU->TopReadyCycle = PredReadyCycle + Latency; | |
1793 } | |
1794 Top.releaseNode(SU, SU->TopReadyCycle); | |
1795 } | |
1796 | |
1797 void GenericScheduler::releaseBottomNode(SUnit *SU) { | |
1798 if (SU->isScheduled) | |
1799 return; | |
1800 | |
1801 assert(SU->getInstr() && "Scheduled SUnit must have instr"); | |
1802 | |
1803 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); | |
1804 I != E; ++I) { | |
1805 if (I->isWeak()) | |
1806 continue; | |
1807 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; | |
1808 unsigned Latency = I->getLatency(); | |
1809 #ifndef NDEBUG | |
1810 Bot.MaxObservedLatency = std::max(Latency, Bot.MaxObservedLatency); | |
1811 #endif | |
1812 if (SU->BotReadyCycle < SuccReadyCycle + Latency) | |
1813 SU->BotReadyCycle = SuccReadyCycle + Latency; | |
1814 } | |
1815 Bot.releaseNode(SU, SU->BotReadyCycle); | |
1816 } | |
1817 | |
1818 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic | |
1819 /// critical path by more cycles than it takes to drain the instruction buffer. | |
1820 /// We estimate an upper bounds on in-flight instructions as: | |
1821 /// | |
1822 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) | |
1823 /// InFlightIterations = AcyclicPath / CyclesPerIteration | |
1824 /// InFlightResources = InFlightIterations * LoopResources | |
1825 /// | |
1826 /// TODO: Check execution resources in addition to IssueCount. | |
1827 void GenericScheduler::checkAcyclicLatency() { | |
1828 if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) | |
1829 return; | |
1830 | |
1831 // Scaled number of cycles per loop iteration. | |
1832 unsigned IterCount = | |
1833 std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), | |
1834 Rem.RemIssueCount); | |
1835 // Scaled acyclic critical path. | |
1836 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); | |
1837 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop | |
1838 unsigned InFlightCount = | |
1839 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; | |
1840 unsigned BufferLimit = | |
1841 SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); | |
1842 | |
1843 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; | |
1844 | |
1845 DEBUG(dbgs() << "IssueCycles=" | |
1846 << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " | |
1847 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() | |
1848 << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount | |
1849 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() | |
1850 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; | |
1851 if (Rem.IsAcyclicLatencyLimited) | |
1852 dbgs() << " ACYCLIC LATENCY LIMIT\n"); | |
1853 } | |
1854 | |
1855 void GenericScheduler::registerRoots() { | |
1856 Rem.CriticalPath = DAG->ExitSU.getDepth(); | |
1857 | |
1858 // Some roots may not feed into ExitSU. Check all of them in case. | |
1859 for (std::vector<SUnit*>::const_iterator | |
1860 I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { | |
1861 if ((*I)->getDepth() > Rem.CriticalPath) | |
1862 Rem.CriticalPath = (*I)->getDepth(); | |
1863 } | |
1864 DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); | |
1865 | |
1866 if (EnableCyclicPath) { | |
1867 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); | |
1868 checkAcyclicLatency(); | |
1869 } | |
1870 } | 1422 } |
1871 | 1423 |
1872 /// Does this SU have a hazard within the current instruction group. | 1424 /// Does this SU have a hazard within the current instruction group. |
1873 /// | 1425 /// |
1874 /// The scheduler supports two modes of hazard recognition. The first is the | 1426 /// The scheduler supports two modes of hazard recognition. The first is the |
1880 /// simple counters that the scheduler itself maintains. It explicitly checks | 1432 /// simple counters that the scheduler itself maintains. It explicitly checks |
1881 /// for instruction dispatch limitations, including the number of micro-ops that | 1433 /// for instruction dispatch limitations, including the number of micro-ops that |
1882 /// can dispatch per cycle. | 1434 /// can dispatch per cycle. |
1883 /// | 1435 /// |
1884 /// TODO: Also check whether the SU must start a new group. | 1436 /// TODO: Also check whether the SU must start a new group. |
1885 bool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) { | 1437 bool SchedBoundary::checkHazard(SUnit *SU) { |
1886 if (HazardRec->isEnabled()) | 1438 if (HazardRec->isEnabled()) |
1887 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; | 1439 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; |
1888 | 1440 |
1889 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); | 1441 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); |
1890 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { | 1442 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { |
1891 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" | 1443 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" |
1892 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); | 1444 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); |
1893 return true; | 1445 return true; |
1894 } | 1446 } |
1447 if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) { | |
1448 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); | |
1449 for (TargetSchedModel::ProcResIter | |
1450 PI = SchedModel->getWriteProcResBegin(SC), | |
1451 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { | |
1452 if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle) | |
1453 return true; | |
1454 } | |
1455 } | |
1895 return false; | 1456 return false; |
1896 } | 1457 } |
1897 | 1458 |
1898 // Find the unscheduled node in ReadySUs with the highest latency. | 1459 // Find the unscheduled node in ReadySUs with the highest latency. |
1899 unsigned GenericScheduler::SchedBoundary:: | 1460 unsigned SchedBoundary:: |
1900 findMaxLatency(ArrayRef<SUnit*> ReadySUs) { | 1461 findMaxLatency(ArrayRef<SUnit*> ReadySUs) { |
1901 SUnit *LateSU = 0; | 1462 SUnit *LateSU = 0; |
1902 unsigned RemLatency = 0; | 1463 unsigned RemLatency = 0; |
1903 for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end(); | 1464 for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end(); |
1904 I != E; ++I) { | 1465 I != E; ++I) { |
1916 } | 1477 } |
1917 | 1478 |
1918 // Count resources in this zone and the remaining unscheduled | 1479 // Count resources in this zone and the remaining unscheduled |
1919 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical | 1480 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical |
1920 // resource index, or zero if the zone is issue limited. | 1481 // resource index, or zero if the zone is issue limited. |
1921 unsigned GenericScheduler::SchedBoundary:: | 1482 unsigned SchedBoundary:: |
1922 getOtherResourceCount(unsigned &OtherCritIdx) { | 1483 getOtherResourceCount(unsigned &OtherCritIdx) { |
1923 OtherCritIdx = 0; | 1484 OtherCritIdx = 0; |
1924 if (!SchedModel->hasInstrSchedModel()) | 1485 if (!SchedModel->hasInstrSchedModel()) |
1925 return 0; | 1486 return 0; |
1926 | 1487 |
1937 } | 1498 } |
1938 } | 1499 } |
1939 if (OtherCritIdx) { | 1500 if (OtherCritIdx) { |
1940 DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: " | 1501 DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: " |
1941 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) | 1502 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) |
1942 << " " << getResourceName(OtherCritIdx) << "\n"); | 1503 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n"); |
1943 } | 1504 } |
1944 return OtherCritCount; | 1505 return OtherCritCount; |
1945 } | 1506 } |
1946 | 1507 |
1947 /// Set the CandPolicy for this zone given the current resources and latencies | 1508 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { |
1948 /// inside and outside the zone. | 1509 if (ReadyCycle < MinReadyCycle) |
1949 void GenericScheduler::SchedBoundary::setPolicy(CandPolicy &Policy, | 1510 MinReadyCycle = ReadyCycle; |
1950 SchedBoundary &OtherZone) { | 1511 |
1951 // Now that potential stalls have been considered, apply preemptive heuristics | 1512 // Check for interlocks first. For the purpose of other heuristics, an |
1952 // based on the the total latency and resources inside and outside this | 1513 // instruction that cannot issue appears as if it's not in the ReadyQueue. |
1953 // zone. | 1514 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; |
1515 if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU)) | |
1516 Pending.push(SU); | |
1517 else | |
1518 Available.push(SU); | |
1519 | |
1520 // Record this node as an immediate dependent of the scheduled node. | |
1521 NextSUs.insert(SU); | |
1522 } | |
1523 | |
1524 void SchedBoundary::releaseTopNode(SUnit *SU) { | |
1525 if (SU->isScheduled) | |
1526 return; | |
1527 | |
1528 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); | |
1529 I != E; ++I) { | |
1530 if (I->isWeak()) | |
1531 continue; | |
1532 unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; | |
1533 unsigned Latency = I->getLatency(); | |
1534 #ifndef NDEBUG | |
1535 MaxObservedLatency = std::max(Latency, MaxObservedLatency); | |
1536 #endif | |
1537 if (SU->TopReadyCycle < PredReadyCycle + Latency) | |
1538 SU->TopReadyCycle = PredReadyCycle + Latency; | |
1539 } | |
1540 releaseNode(SU, SU->TopReadyCycle); | |
1541 } | |
1542 | |
1543 void SchedBoundary::releaseBottomNode(SUnit *SU) { | |
1544 if (SU->isScheduled) | |
1545 return; | |
1546 | |
1547 assert(SU->getInstr() && "Scheduled SUnit must have instr"); | |
1548 | |
1549 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); | |
1550 I != E; ++I) { | |
1551 if (I->isWeak()) | |
1552 continue; | |
1553 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; | |
1554 unsigned Latency = I->getLatency(); | |
1555 #ifndef NDEBUG | |
1556 MaxObservedLatency = std::max(Latency, MaxObservedLatency); | |
1557 #endif | |
1558 if (SU->BotReadyCycle < SuccReadyCycle + Latency) | |
1559 SU->BotReadyCycle = SuccReadyCycle + Latency; | |
1560 } | |
1561 releaseNode(SU, SU->BotReadyCycle); | |
1562 } | |
1563 | |
1564 /// Move the boundary of scheduled code by one cycle. | |
1565 void SchedBoundary::bumpCycle(unsigned NextCycle) { | |
1566 if (SchedModel->getMicroOpBufferSize() == 0) { | |
1567 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); | |
1568 if (MinReadyCycle > NextCycle) | |
1569 NextCycle = MinReadyCycle; | |
1570 } | |
1571 // Update the current micro-ops, which will issue in the next cycle. | |
1572 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); | |
1573 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps; | |
1574 | |
1575 // Decrement DependentLatency based on the next cycle. | |
1576 if ((NextCycle - CurrCycle) > DependentLatency) | |
1577 DependentLatency = 0; | |
1578 else | |
1579 DependentLatency -= (NextCycle - CurrCycle); | |
1580 | |
1581 if (!HazardRec->isEnabled()) { | |
1582 // Bypass HazardRec virtual calls. | |
1583 CurrCycle = NextCycle; | |
1584 } | |
1585 else { | |
1586 // Bypass getHazardType calls in case of long latency. | |
1587 for (; CurrCycle != NextCycle; ++CurrCycle) { | |
1588 if (isTop()) | |
1589 HazardRec->AdvanceCycle(); | |
1590 else | |
1591 HazardRec->RecedeCycle(); | |
1592 } | |
1593 } | |
1594 CheckPending = true; | |
1595 unsigned LFactor = SchedModel->getLatencyFactor(); | |
1596 IsResourceLimited = | |
1597 (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) | |
1598 > (int)LFactor; | |
1599 | |
1600 DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); | |
1601 } | |
1602 | |
1603 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) { | |
1604 ExecutedResCounts[PIdx] += Count; | |
1605 if (ExecutedResCounts[PIdx] > MaxExecutedResCount) | |
1606 MaxExecutedResCount = ExecutedResCounts[PIdx]; | |
1607 } | |
1608 | |
1609 /// Add the given processor resource to this scheduled zone. | |
1610 /// | |
1611 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles | |
1612 /// during which this resource is consumed. | |
1613 /// | |
1614 /// \return the next cycle at which the instruction may execute without | |
1615 /// oversubscribing resources. | |
1616 unsigned SchedBoundary:: | |
1617 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { | |
1618 unsigned Factor = SchedModel->getResourceFactor(PIdx); | |
1619 unsigned Count = Factor * Cycles; | |
1620 DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) | |
1621 << " +" << Cycles << "x" << Factor << "u\n"); | |
1622 | |
1623 // Update Executed resources counts. | |
1624 incExecutedResources(PIdx, Count); | |
1625 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); | |
1626 Rem->RemainingCounts[PIdx] -= Count; | |
1627 | |
1628 // Check if this resource exceeds the current critical resource. If so, it | |
1629 // becomes the critical resource. | |
1630 if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { | |
1631 ZoneCritResIdx = PIdx; | |
1632 DEBUG(dbgs() << " *** Critical resource " | |
1633 << SchedModel->getResourceName(PIdx) << ": " | |
1634 << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); | |
1635 } | |
1636 // For reserved resources, record the highest cycle using the resource. | |
1637 unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles); | |
1638 if (NextAvailable > CurrCycle) { | |
1639 DEBUG(dbgs() << " Resource conflict: " | |
1640 << SchedModel->getProcResource(PIdx)->Name << " reserved until @" | |
1641 << NextAvailable << "\n"); | |
1642 } | |
1643 return NextAvailable; | |
1644 } | |
1645 | |
1646 /// Move the boundary of scheduled code by one SUnit. | |
1647 void SchedBoundary::bumpNode(SUnit *SU) { | |
1648 // Update the reservation table. | |
1649 if (HazardRec->isEnabled()) { | |
1650 if (!isTop() && SU->isCall) { | |
1651 // Calls are scheduled with their preceding instructions. For bottom-up | |
1652 // scheduling, clear the pipeline state before emitting. | |
1653 HazardRec->Reset(); | |
1654 } | |
1655 HazardRec->EmitInstruction(SU); | |
1656 } | |
1657 // checkHazard should prevent scheduling multiple instructions per cycle that | |
1658 // exceed the issue width. | |
1659 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); | |
1660 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); | |
1661 assert( | |
1662 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) && | |
1663 "Cannot schedule this instruction's MicroOps in the current cycle."); | |
1664 | |
1665 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); | |
1666 DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); | |
1667 | |
1668 unsigned NextCycle = CurrCycle; | |
1669 switch (SchedModel->getMicroOpBufferSize()) { | |
1670 case 0: | |
1671 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); | |
1672 break; | |
1673 case 1: | |
1674 if (ReadyCycle > NextCycle) { | |
1675 NextCycle = ReadyCycle; | |
1676 DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n"); | |
1677 } | |
1678 break; | |
1679 default: | |
1680 // We don't currently model the OOO reorder buffer, so consider all | |
1681 // scheduled MOps to be "retired". We do loosely model in-order resource | |
1682 // latency. If this instruction uses an in-order resource, account for any | |
1683 // likely stall cycles. | |
1684 if (SU->isUnbuffered && ReadyCycle > NextCycle) | |
1685 NextCycle = ReadyCycle; | |
1686 break; | |
1687 } | |
1688 RetiredMOps += IncMOps; | |
1689 | |
1690 // Update resource counts and critical resource. | |
1691 if (SchedModel->hasInstrSchedModel()) { | |
1692 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); | |
1693 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted"); | |
1694 Rem->RemIssueCount -= DecRemIssue; | |
1695 if (ZoneCritResIdx) { | |
1696 // Scale scheduled micro-ops for comparing with the critical resource. | |
1697 unsigned ScaledMOps = | |
1698 RetiredMOps * SchedModel->getMicroOpFactor(); | |
1699 | |
1700 // If scaled micro-ops are now more than the previous critical resource by | |
1701 // a full cycle, then micro-ops issue becomes critical. | |
1702 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) | |
1703 >= (int)SchedModel->getLatencyFactor()) { | |
1704 ZoneCritResIdx = 0; | |
1705 DEBUG(dbgs() << " *** Critical resource NumMicroOps: " | |
1706 << ScaledMOps / SchedModel->getLatencyFactor() << "c\n"); | |
1707 } | |
1708 } | |
1709 for (TargetSchedModel::ProcResIter | |
1710 PI = SchedModel->getWriteProcResBegin(SC), | |
1711 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { | |
1712 unsigned RCycle = | |
1713 countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle); | |
1714 if (RCycle > NextCycle) | |
1715 NextCycle = RCycle; | |
1716 } | |
1717 if (SU->hasReservedResource) { | |
1718 // For reserved resources, record the highest cycle using the resource. | |
1719 // For top-down scheduling, this is the cycle in which we schedule this | |
1720 // instruction plus the number of cycles the operations reserves the | |
1721 // resource. For bottom-up is it simply the instruction's cycle. | |
1722 for (TargetSchedModel::ProcResIter | |
1723 PI = SchedModel->getWriteProcResBegin(SC), | |
1724 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { | |
1725 unsigned PIdx = PI->ProcResourceIdx; | |
1726 if (SchedModel->getProcResource(PIdx)->BufferSize == 0) | |
1727 ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle; | |
1728 } | |
1729 } | |
1730 } | |
1731 // Update ExpectedLatency and DependentLatency. | |
1732 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; | |
1733 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency; | |
1734 if (SU->getDepth() > TopLatency) { | |
1735 TopLatency = SU->getDepth(); | |
1736 DEBUG(dbgs() << " " << Available.getName() | |
1737 << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n"); | |
1738 } | |
1739 if (SU->getHeight() > BotLatency) { | |
1740 BotLatency = SU->getHeight(); | |
1741 DEBUG(dbgs() << " " << Available.getName() | |
1742 << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n"); | |
1743 } | |
1744 // If we stall for any reason, bump the cycle. | |
1745 if (NextCycle > CurrCycle) { | |
1746 bumpCycle(NextCycle); | |
1747 } | |
1748 else { | |
1749 // After updating ZoneCritResIdx and ExpectedLatency, check if we're | |
1750 // resource limited. If a stall occured, bumpCycle does this. | |
1751 unsigned LFactor = SchedModel->getLatencyFactor(); | |
1752 IsResourceLimited = | |
1753 (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) | |
1754 > (int)LFactor; | |
1755 } | |
1756 // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle | |
1757 // resets CurrMOps. Loop to handle instructions with more MOps than issue in | |
1758 // one cycle. Since we commonly reach the max MOps here, opportunistically | |
1759 // bump the cycle to avoid uselessly checking everything in the readyQ. | |
1760 CurrMOps += IncMOps; | |
1761 while (CurrMOps >= SchedModel->getIssueWidth()) { | |
1762 bumpCycle(++NextCycle); | |
1763 DEBUG(dbgs() << " *** Max MOps " << CurrMOps | |
1764 << " at cycle " << CurrCycle << '\n'); | |
1765 } | |
1766 DEBUG(dumpScheduledState()); | |
1767 } | |
1768 | |
1769 /// Release pending ready nodes in to the available queue. This makes them | |
1770 /// visible to heuristics. | |
1771 void SchedBoundary::releasePending() { | |
1772 // If the available queue is empty, it is safe to reset MinReadyCycle. | |
1773 if (Available.empty()) | |
1774 MinReadyCycle = UINT_MAX; | |
1775 | |
1776 // Check to see if any of the pending instructions are ready to issue. If | |
1777 // so, add them to the available queue. | |
1778 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; | |
1779 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { | |
1780 SUnit *SU = *(Pending.begin()+i); | |
1781 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; | |
1782 | |
1783 if (ReadyCycle < MinReadyCycle) | |
1784 MinReadyCycle = ReadyCycle; | |
1785 | |
1786 if (!IsBuffered && ReadyCycle > CurrCycle) | |
1787 continue; | |
1788 | |
1789 if (checkHazard(SU)) | |
1790 continue; | |
1791 | |
1792 Available.push(SU); | |
1793 Pending.remove(Pending.begin()+i); | |
1794 --i; --e; | |
1795 } | |
1796 DEBUG(if (!Pending.empty()) Pending.dump()); | |
1797 CheckPending = false; | |
1798 } | |
1799 | |
1800 /// Remove SU from the ready set for this boundary. | |
1801 void SchedBoundary::removeReady(SUnit *SU) { | |
1802 if (Available.isInQueue(SU)) | |
1803 Available.remove(Available.find(SU)); | |
1804 else { | |
1805 assert(Pending.isInQueue(SU) && "bad ready count"); | |
1806 Pending.remove(Pending.find(SU)); | |
1807 } | |
1808 } | |
1809 | |
1810 /// If this queue only has one ready candidate, return it. As a side effect, | |
1811 /// defer any nodes that now hit a hazard, and advance the cycle until at least | |
1812 /// one node is ready. If multiple instructions are ready, return NULL. | |
1813 SUnit *SchedBoundary::pickOnlyChoice() { | |
1814 if (CheckPending) | |
1815 releasePending(); | |
1816 | |
1817 if (CurrMOps > 0) { | |
1818 // Defer any ready instrs that now have a hazard. | |
1819 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { | |
1820 if (checkHazard(*I)) { | |
1821 Pending.push(*I); | |
1822 I = Available.remove(I); | |
1823 continue; | |
1824 } | |
1825 ++I; | |
1826 } | |
1827 } | |
1828 for (unsigned i = 0; Available.empty(); ++i) { | |
1829 assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) && | |
1830 "permanent hazard"); (void)i; | |
1831 bumpCycle(CurrCycle + 1); | |
1832 releasePending(); | |
1833 } | |
1834 if (Available.size() == 1) | |
1835 return *Available.begin(); | |
1836 return NULL; | |
1837 } | |
1838 | |
1839 #ifndef NDEBUG | |
1840 // This is useful information to dump after bumpNode. | |
1841 // Note that the Queue contents are more useful before pickNodeFromQueue. | |
1842 void SchedBoundary::dumpScheduledState() { | |
1843 unsigned ResFactor; | |
1844 unsigned ResCount; | |
1845 if (ZoneCritResIdx) { | |
1846 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); | |
1847 ResCount = getResourceCount(ZoneCritResIdx); | |
1848 } | |
1849 else { | |
1850 ResFactor = SchedModel->getMicroOpFactor(); | |
1851 ResCount = RetiredMOps * SchedModel->getMicroOpFactor(); | |
1852 } | |
1853 unsigned LFactor = SchedModel->getLatencyFactor(); | |
1854 dbgs() << Available.getName() << " @" << CurrCycle << "c\n" | |
1855 << " Retired: " << RetiredMOps; | |
1856 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; | |
1857 dbgs() << "\n Critical: " << ResCount / LFactor << "c, " | |
1858 << ResCount / ResFactor << " " | |
1859 << SchedModel->getResourceName(ZoneCritResIdx) | |
1860 << "\n ExpectedLatency: " << ExpectedLatency << "c\n" | |
1861 << (IsResourceLimited ? " - Resource" : " - Latency") | |
1862 << " limited.\n"; | |
1863 } | |
1864 #endif | |
1865 | |
1866 //===----------------------------------------------------------------------===// | |
1867 // GenericScheduler - Implementation of the generic MachineSchedStrategy. | |
1868 //===----------------------------------------------------------------------===// | |
1869 | |
1870 namespace { | |
1871 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance | |
1872 /// the schedule. | |
1873 class GenericScheduler : public MachineSchedStrategy { | |
1874 public: | |
1875 /// Represent the type of SchedCandidate found within a single queue. | |
1876 /// pickNodeBidirectional depends on these listed by decreasing priority. | |
1877 enum CandReason { | |
1878 NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax, | |
1879 ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, | |
1880 TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; | |
1881 | |
1882 #ifndef NDEBUG | |
1883 static const char *getReasonStr(GenericScheduler::CandReason Reason); | |
1884 #endif | |
1885 | |
1886 /// Policy for scheduling the next instruction in the candidate's zone. | |
1887 struct CandPolicy { | |
1888 bool ReduceLatency; | |
1889 unsigned ReduceResIdx; | |
1890 unsigned DemandResIdx; | |
1891 | |
1892 CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} | |
1893 }; | |
1894 | |
1895 /// Status of an instruction's critical resource consumption. | |
1896 struct SchedResourceDelta { | |
1897 // Count critical resources in the scheduled region required by SU. | |
1898 unsigned CritResources; | |
1899 | |
1900 // Count critical resources from another region consumed by SU. | |
1901 unsigned DemandedResources; | |
1902 | |
1903 SchedResourceDelta(): CritResources(0), DemandedResources(0) {} | |
1904 | |
1905 bool operator==(const SchedResourceDelta &RHS) const { | |
1906 return CritResources == RHS.CritResources | |
1907 && DemandedResources == RHS.DemandedResources; | |
1908 } | |
1909 bool operator!=(const SchedResourceDelta &RHS) const { | |
1910 return !operator==(RHS); | |
1911 } | |
1912 }; | |
1913 | |
1914 /// Store the state used by GenericScheduler heuristics, required for the | |
1915 /// lifetime of one invocation of pickNode(). | |
1916 struct SchedCandidate { | |
1917 CandPolicy Policy; | |
1918 | |
1919 // The best SUnit candidate. | |
1920 SUnit *SU; | |
1921 | |
1922 // The reason for this candidate. | |
1923 CandReason Reason; | |
1924 | |
1925 // Set of reasons that apply to multiple candidates. | |
1926 uint32_t RepeatReasonSet; | |
1927 | |
1928 // Register pressure values for the best candidate. | |
1929 RegPressureDelta RPDelta; | |
1930 | |
1931 // Critical resource consumption of the best candidate. | |
1932 SchedResourceDelta ResDelta; | |
1933 | |
1934 SchedCandidate(const CandPolicy &policy) | |
1935 : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {} | |
1936 | |
1937 bool isValid() const { return SU; } | |
1938 | |
1939 // Copy the status of another candidate without changing policy. | |
1940 void setBest(SchedCandidate &Best) { | |
1941 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); | |
1942 SU = Best.SU; | |
1943 Reason = Best.Reason; | |
1944 RPDelta = Best.RPDelta; | |
1945 ResDelta = Best.ResDelta; | |
1946 } | |
1947 | |
1948 bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); } | |
1949 void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); } | |
1950 | |
1951 void initResourceDelta(const ScheduleDAGMI *DAG, | |
1952 const TargetSchedModel *SchedModel); | |
1953 }; | |
1954 | |
1955 private: | |
1956 const MachineSchedContext *Context; | |
1957 ScheduleDAGMI *DAG; | |
1958 const TargetSchedModel *SchedModel; | |
1959 const TargetRegisterInfo *TRI; | |
1960 | |
1961 // State of the top and bottom scheduled instruction boundaries. | |
1962 SchedRemainder Rem; | |
1963 SchedBoundary Top; | |
1964 SchedBoundary Bot; | |
1965 | |
1966 MachineSchedPolicy RegionPolicy; | |
1967 public: | |
1968 GenericScheduler(const MachineSchedContext *C): | |
1969 Context(C), DAG(0), SchedModel(0), TRI(0), | |
1970 Top(SchedBoundary::TopQID, "TopQ"), Bot(SchedBoundary::BotQID, "BotQ") {} | |
1971 | |
1972 virtual void initPolicy(MachineBasicBlock::iterator Begin, | |
1973 MachineBasicBlock::iterator End, | |
1974 unsigned NumRegionInstrs); | |
1975 | |
1976 bool shouldTrackPressure() const { return RegionPolicy.ShouldTrackPressure; } | |
1977 | |
1978 virtual void initialize(ScheduleDAGMI *dag); | |
1979 | |
1980 virtual SUnit *pickNode(bool &IsTopNode); | |
1981 | |
1982 virtual void schedNode(SUnit *SU, bool IsTopNode); | |
1983 | |
1984 virtual void releaseTopNode(SUnit *SU) { Top.releaseTopNode(SU); } | |
1985 | |
1986 virtual void releaseBottomNode(SUnit *SU) { Bot.releaseBottomNode(SU); } | |
1987 | |
1988 virtual void registerRoots(); | |
1989 | |
1990 protected: | |
1991 void checkAcyclicLatency(); | |
1992 | |
1993 void setPolicy(CandPolicy &Policy, SchedBoundary &CurrZone, | |
1994 SchedBoundary &OtherZone); | |
1995 | |
1996 void tryCandidate(SchedCandidate &Cand, | |
1997 SchedCandidate &TryCand, | |
1998 SchedBoundary &Zone, | |
1999 const RegPressureTracker &RPTracker, | |
2000 RegPressureTracker &TempTracker); | |
2001 | |
2002 SUnit *pickNodeBidirectional(bool &IsTopNode); | |
2003 | |
2004 void pickNodeFromQueue(SchedBoundary &Zone, | |
2005 const RegPressureTracker &RPTracker, | |
2006 SchedCandidate &Candidate); | |
2007 | |
2008 void reschedulePhysRegCopies(SUnit *SU, bool isTop); | |
2009 | |
2010 #ifndef NDEBUG | |
2011 void traceCandidate(const SchedCandidate &Cand); | |
2012 #endif | |
2013 }; | |
2014 } // namespace | |
2015 | |
2016 void GenericScheduler::initialize(ScheduleDAGMI *dag) { | |
2017 DAG = dag; | |
2018 SchedModel = DAG->getSchedModel(); | |
2019 TRI = DAG->TRI; | |
2020 | |
2021 Rem.init(DAG, SchedModel); | |
2022 Top.init(DAG, SchedModel, &Rem); | |
2023 Bot.init(DAG, SchedModel, &Rem); | |
2024 | |
2025 // Initialize resource counts. | |
2026 | |
2027 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or | |
2028 // are disabled, then these HazardRecs will be disabled. | |
2029 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); | |
2030 const TargetMachine &TM = DAG->MF.getTarget(); | |
2031 if (!Top.HazardRec) { | |
2032 Top.HazardRec = | |
2033 TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); | |
2034 } | |
2035 if (!Bot.HazardRec) { | |
2036 Bot.HazardRec = | |
2037 TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); | |
2038 } | |
2039 } | |
2040 | |
2041 /// Initialize the per-region scheduling policy. | |
2042 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, | |
2043 MachineBasicBlock::iterator End, | |
2044 unsigned NumRegionInstrs) { | |
2045 const TargetMachine &TM = Context->MF->getTarget(); | |
2046 | |
2047 // Avoid setting up the register pressure tracker for small regions to save | |
2048 // compile time. As a rough heuristic, only track pressure when the number of | |
2049 // schedulable instructions exceeds half the integer register file. | |
2050 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( | |
2051 TM.getTargetLowering()->getRegClassFor(MVT::i32)); | |
2052 | |
2053 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); | |
2054 | |
2055 // For generic targets, we default to bottom-up, because it's simpler and more | |
2056 // compile-time optimizations have been implemented in that direction. | |
2057 RegionPolicy.OnlyBottomUp = true; | |
2058 | |
2059 // Allow the subtarget to override default policy. | |
2060 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); | |
2061 ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs); | |
2062 | |
2063 // After subtarget overrides, apply command line options. | |
2064 if (!EnableRegPressure) | |
2065 RegionPolicy.ShouldTrackPressure = false; | |
2066 | |
2067 // Check -misched-topdown/bottomup can force or unforce scheduling direction. | |
2068 // e.g. -misched-bottomup=false allows scheduling in both directions. | |
2069 assert((!ForceTopDown || !ForceBottomUp) && | |
2070 "-misched-topdown incompatible with -misched-bottomup"); | |
2071 if (ForceBottomUp.getNumOccurrences() > 0) { | |
2072 RegionPolicy.OnlyBottomUp = ForceBottomUp; | |
2073 if (RegionPolicy.OnlyBottomUp) | |
2074 RegionPolicy.OnlyTopDown = false; | |
2075 } | |
2076 if (ForceTopDown.getNumOccurrences() > 0) { | |
2077 RegionPolicy.OnlyTopDown = ForceTopDown; | |
2078 if (RegionPolicy.OnlyTopDown) | |
2079 RegionPolicy.OnlyBottomUp = false; | |
2080 } | |
2081 } | |
2082 | |
2083 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic | |
2084 /// critical path by more cycles than it takes to drain the instruction buffer. | |
2085 /// We estimate an upper bounds on in-flight instructions as: | |
2086 /// | |
2087 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) | |
2088 /// InFlightIterations = AcyclicPath / CyclesPerIteration | |
2089 /// InFlightResources = InFlightIterations * LoopResources | |
2090 /// | |
2091 /// TODO: Check execution resources in addition to IssueCount. | |
2092 void GenericScheduler::checkAcyclicLatency() { | |
2093 if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) | |
2094 return; | |
2095 | |
2096 // Scaled number of cycles per loop iteration. | |
2097 unsigned IterCount = | |
2098 std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), | |
2099 Rem.RemIssueCount); | |
2100 // Scaled acyclic critical path. | |
2101 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); | |
2102 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop | |
2103 unsigned InFlightCount = | |
2104 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; | |
2105 unsigned BufferLimit = | |
2106 SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); | |
2107 | |
2108 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; | |
2109 | |
2110 DEBUG(dbgs() << "IssueCycles=" | |
2111 << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " | |
2112 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() | |
2113 << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount | |
2114 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() | |
2115 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; | |
2116 if (Rem.IsAcyclicLatencyLimited) | |
2117 dbgs() << " ACYCLIC LATENCY LIMIT\n"); | |
2118 } | |
2119 | |
2120 void GenericScheduler::registerRoots() { | |
2121 Rem.CriticalPath = DAG->ExitSU.getDepth(); | |
2122 | |
2123 // Some roots may not feed into ExitSU. Check all of them in case. | |
2124 for (std::vector<SUnit*>::const_iterator | |
2125 I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { | |
2126 if ((*I)->getDepth() > Rem.CriticalPath) | |
2127 Rem.CriticalPath = (*I)->getDepth(); | |
2128 } | |
2129 DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); | |
2130 | |
2131 if (EnableCyclicPath) { | |
2132 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); | |
2133 checkAcyclicLatency(); | |
2134 } | |
2135 } | |
2136 | |
2137 /// Set the CandPolicy given a scheduling zone given the current resources and | |
2138 /// latencies inside and outside the zone. | |
2139 void GenericScheduler::setPolicy(CandPolicy &Policy, SchedBoundary &CurrZone, | |
2140 SchedBoundary &OtherZone) { | |
2141 // Apply preemptive heuristics based on the the total latency and resources | |
2142 // inside and outside this zone. Potential stalls should be considered before | |
2143 // following this policy. | |
1954 | 2144 |
1955 // Compute remaining latency. We need this both to determine whether the | 2145 // Compute remaining latency. We need this both to determine whether the |
1956 // overall schedule has become latency-limited and whether the instructions | 2146 // overall schedule has become latency-limited and whether the instructions |
1957 // outside this zone are resource or latency limited. | 2147 // outside this zone are resource or latency limited. |
1958 // | 2148 // |
1963 // | 2153 // |
1964 // The "independent" latency is the max ready queue depth: | 2154 // The "independent" latency is the max ready queue depth: |
1965 // ILat = max N.depth for N in Available|Pending | 2155 // ILat = max N.depth for N in Available|Pending |
1966 // | 2156 // |
1967 // RemainingLatency is the greater of independent and dependent latency. | 2157 // RemainingLatency is the greater of independent and dependent latency. |
1968 unsigned RemLatency = DependentLatency; | 2158 unsigned RemLatency = CurrZone.getDependentLatency(); |
1969 RemLatency = std::max(RemLatency, findMaxLatency(Available.elements())); | 2159 RemLatency = std::max(RemLatency, |
1970 RemLatency = std::max(RemLatency, findMaxLatency(Pending.elements())); | 2160 CurrZone.findMaxLatency(CurrZone.Available.elements())); |
2161 RemLatency = std::max(RemLatency, | |
2162 CurrZone.findMaxLatency(CurrZone.Pending.elements())); | |
1971 | 2163 |
1972 // Compute the critical resource outside the zone. | 2164 // Compute the critical resource outside the zone. |
1973 unsigned OtherCritIdx; | 2165 unsigned OtherCritIdx; |
1974 unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx); | 2166 unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx); |
1975 | 2167 |
1976 bool OtherResLimited = false; | 2168 bool OtherResLimited = false; |
1977 if (SchedModel->hasInstrSchedModel()) { | 2169 if (SchedModel->hasInstrSchedModel()) { |
1978 unsigned LFactor = SchedModel->getLatencyFactor(); | 2170 unsigned LFactor = SchedModel->getLatencyFactor(); |
1979 OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; | 2171 OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; |
1980 } | 2172 } |
1981 if (!OtherResLimited && (RemLatency + CurrCycle > Rem->CriticalPath)) { | 2173 if (!OtherResLimited |
2174 && (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) { | |
1982 Policy.ReduceLatency |= true; | 2175 Policy.ReduceLatency |= true; |
1983 DEBUG(dbgs() << " " << Available.getName() << " RemainingLatency " | 2176 DEBUG(dbgs() << " " << CurrZone.Available.getName() << " RemainingLatency " |
1984 << RemLatency << " + " << CurrCycle << "c > CritPath " | 2177 << RemLatency << " + " << CurrZone.getCurrCycle() << "c > CritPath " |
1985 << Rem->CriticalPath << "\n"); | 2178 << Rem.CriticalPath << "\n"); |
1986 } | 2179 } |
1987 // If the same resource is limiting inside and outside the zone, do nothing. | 2180 // If the same resource is limiting inside and outside the zone, do nothing. |
1988 if (ZoneCritResIdx == OtherCritIdx) | 2181 if (CurrZone.getZoneCritResIdx() == OtherCritIdx) |
1989 return; | 2182 return; |
1990 | 2183 |
1991 DEBUG( | 2184 DEBUG( |
1992 if (IsResourceLimited) { | 2185 if (CurrZone.isResourceLimited()) { |
1993 dbgs() << " " << Available.getName() << " ResourceLimited: " | 2186 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: " |
1994 << getResourceName(ZoneCritResIdx) << "\n"; | 2187 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) |
2188 << "\n"; | |
1995 } | 2189 } |
1996 if (OtherResLimited) | 2190 if (OtherResLimited) |
1997 dbgs() << " RemainingLimit: " << getResourceName(OtherCritIdx) << "\n"; | 2191 dbgs() << " RemainingLimit: " |
1998 if (!IsResourceLimited && !OtherResLimited) | 2192 << SchedModel->getResourceName(OtherCritIdx) << "\n"; |
2193 if (!CurrZone.isResourceLimited() && !OtherResLimited) | |
1999 dbgs() << " Latency limited both directions.\n"); | 2194 dbgs() << " Latency limited both directions.\n"); |
2000 | 2195 |
2001 if (IsResourceLimited && !Policy.ReduceResIdx) | 2196 if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx) |
2002 Policy.ReduceResIdx = ZoneCritResIdx; | 2197 Policy.ReduceResIdx = CurrZone.getZoneCritResIdx(); |
2003 | 2198 |
2004 if (OtherResLimited) | 2199 if (OtherResLimited) |
2005 Policy.DemandResIdx = OtherCritIdx; | 2200 Policy.DemandResIdx = OtherCritIdx; |
2006 } | 2201 } |
2007 | |
2008 void GenericScheduler::SchedBoundary::releaseNode(SUnit *SU, | |
2009 unsigned ReadyCycle) { | |
2010 if (ReadyCycle < MinReadyCycle) | |
2011 MinReadyCycle = ReadyCycle; | |
2012 | |
2013 // Check for interlocks first. For the purpose of other heuristics, an | |
2014 // instruction that cannot issue appears as if it's not in the ReadyQueue. | |
2015 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; | |
2016 if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU)) | |
2017 Pending.push(SU); | |
2018 else | |
2019 Available.push(SU); | |
2020 | |
2021 // Record this node as an immediate dependent of the scheduled node. | |
2022 NextSUs.insert(SU); | |
2023 } | |
2024 | |
2025 /// Move the boundary of scheduled code by one cycle. | |
2026 void GenericScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) { | |
2027 if (SchedModel->getMicroOpBufferSize() == 0) { | |
2028 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); | |
2029 if (MinReadyCycle > NextCycle) | |
2030 NextCycle = MinReadyCycle; | |
2031 } | |
2032 // Update the current micro-ops, which will issue in the next cycle. | |
2033 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); | |
2034 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps; | |
2035 | |
2036 // Decrement DependentLatency based on the next cycle. | |
2037 if ((NextCycle - CurrCycle) > DependentLatency) | |
2038 DependentLatency = 0; | |
2039 else | |
2040 DependentLatency -= (NextCycle - CurrCycle); | |
2041 | |
2042 if (!HazardRec->isEnabled()) { | |
2043 // Bypass HazardRec virtual calls. | |
2044 CurrCycle = NextCycle; | |
2045 } | |
2046 else { | |
2047 // Bypass getHazardType calls in case of long latency. | |
2048 for (; CurrCycle != NextCycle; ++CurrCycle) { | |
2049 if (isTop()) | |
2050 HazardRec->AdvanceCycle(); | |
2051 else | |
2052 HazardRec->RecedeCycle(); | |
2053 } | |
2054 } | |
2055 CheckPending = true; | |
2056 unsigned LFactor = SchedModel->getLatencyFactor(); | |
2057 IsResourceLimited = | |
2058 (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) | |
2059 > (int)LFactor; | |
2060 | |
2061 DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); | |
2062 } | |
2063 | |
2064 void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx, | |
2065 unsigned Count) { | |
2066 ExecutedResCounts[PIdx] += Count; | |
2067 if (ExecutedResCounts[PIdx] > MaxExecutedResCount) | |
2068 MaxExecutedResCount = ExecutedResCounts[PIdx]; | |
2069 } | |
2070 | |
2071 /// Add the given processor resource to this scheduled zone. | |
2072 /// | |
2073 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles | |
2074 /// during which this resource is consumed. | |
2075 /// | |
2076 /// \return the next cycle at which the instruction may execute without | |
2077 /// oversubscribing resources. | |
2078 unsigned GenericScheduler::SchedBoundary:: | |
2079 countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) { | |
2080 unsigned Factor = SchedModel->getResourceFactor(PIdx); | |
2081 unsigned Count = Factor * Cycles; | |
2082 DEBUG(dbgs() << " " << getResourceName(PIdx) | |
2083 << " +" << Cycles << "x" << Factor << "u\n"); | |
2084 | |
2085 // Update Executed resources counts. | |
2086 incExecutedResources(PIdx, Count); | |
2087 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); | |
2088 Rem->RemainingCounts[PIdx] -= Count; | |
2089 | |
2090 // Check if this resource exceeds the current critical resource. If so, it | |
2091 // becomes the critical resource. | |
2092 if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { | |
2093 ZoneCritResIdx = PIdx; | |
2094 DEBUG(dbgs() << " *** Critical resource " | |
2095 << getResourceName(PIdx) << ": " | |
2096 << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); | |
2097 } | |
2098 // TODO: We don't yet model reserved resources. It's not hard though. | |
2099 return CurrCycle; | |
2100 } | |
2101 | |
2102 /// Move the boundary of scheduled code by one SUnit. | |
2103 void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { | |
2104 // Update the reservation table. | |
2105 if (HazardRec->isEnabled()) { | |
2106 if (!isTop() && SU->isCall) { | |
2107 // Calls are scheduled with their preceding instructions. For bottom-up | |
2108 // scheduling, clear the pipeline state before emitting. | |
2109 HazardRec->Reset(); | |
2110 } | |
2111 HazardRec->EmitInstruction(SU); | |
2112 } | |
2113 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); | |
2114 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); | |
2115 CurrMOps += IncMOps; | |
2116 // checkHazard prevents scheduling multiple instructions per cycle that exceed | |
2117 // issue width. However, we commonly reach the maximum. In this case | |
2118 // opportunistically bump the cycle to avoid uselessly checking everything in | |
2119 // the readyQ. Furthermore, a single instruction may produce more than one | |
2120 // cycle's worth of micro-ops. | |
2121 // | |
2122 // TODO: Also check if this SU must end a dispatch group. | |
2123 unsigned NextCycle = CurrCycle; | |
2124 if (CurrMOps >= SchedModel->getIssueWidth()) { | |
2125 ++NextCycle; | |
2126 DEBUG(dbgs() << " *** Max MOps " << CurrMOps | |
2127 << " at cycle " << CurrCycle << '\n'); | |
2128 } | |
2129 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); | |
2130 DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); | |
2131 | |
2132 switch (SchedModel->getMicroOpBufferSize()) { | |
2133 case 0: | |
2134 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); | |
2135 break; | |
2136 case 1: | |
2137 if (ReadyCycle > NextCycle) { | |
2138 NextCycle = ReadyCycle; | |
2139 DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n"); | |
2140 } | |
2141 break; | |
2142 default: | |
2143 // We don't currently model the OOO reorder buffer, so consider all | |
2144 // scheduled MOps to be "retired". | |
2145 break; | |
2146 } | |
2147 RetiredMOps += IncMOps; | |
2148 | |
2149 // Update resource counts and critical resource. | |
2150 if (SchedModel->hasInstrSchedModel()) { | |
2151 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); | |
2152 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted"); | |
2153 Rem->RemIssueCount -= DecRemIssue; | |
2154 if (ZoneCritResIdx) { | |
2155 // Scale scheduled micro-ops for comparing with the critical resource. | |
2156 unsigned ScaledMOps = | |
2157 RetiredMOps * SchedModel->getMicroOpFactor(); | |
2158 | |
2159 // If scaled micro-ops are now more than the previous critical resource by | |
2160 // a full cycle, then micro-ops issue becomes critical. | |
2161 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) | |
2162 >= (int)SchedModel->getLatencyFactor()) { | |
2163 ZoneCritResIdx = 0; | |
2164 DEBUG(dbgs() << " *** Critical resource NumMicroOps: " | |
2165 << ScaledMOps / SchedModel->getLatencyFactor() << "c\n"); | |
2166 } | |
2167 } | |
2168 for (TargetSchedModel::ProcResIter | |
2169 PI = SchedModel->getWriteProcResBegin(SC), | |
2170 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { | |
2171 unsigned RCycle = | |
2172 countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle); | |
2173 if (RCycle > NextCycle) | |
2174 NextCycle = RCycle; | |
2175 } | |
2176 } | |
2177 // Update ExpectedLatency and DependentLatency. | |
2178 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; | |
2179 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency; | |
2180 if (SU->getDepth() > TopLatency) { | |
2181 TopLatency = SU->getDepth(); | |
2182 DEBUG(dbgs() << " " << Available.getName() | |
2183 << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n"); | |
2184 } | |
2185 if (SU->getHeight() > BotLatency) { | |
2186 BotLatency = SU->getHeight(); | |
2187 DEBUG(dbgs() << " " << Available.getName() | |
2188 << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n"); | |
2189 } | |
2190 // If we stall for any reason, bump the cycle. | |
2191 if (NextCycle > CurrCycle) { | |
2192 bumpCycle(NextCycle); | |
2193 } | |
2194 else { | |
2195 // After updating ZoneCritResIdx and ExpectedLatency, check if we're | |
2196 // resource limited. If a stall occured, bumpCycle does this. | |
2197 unsigned LFactor = SchedModel->getLatencyFactor(); | |
2198 IsResourceLimited = | |
2199 (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) | |
2200 > (int)LFactor; | |
2201 } | |
2202 DEBUG(dumpScheduledState()); | |
2203 } | |
2204 | |
2205 /// Release pending ready nodes in to the available queue. This makes them | |
2206 /// visible to heuristics. | |
2207 void GenericScheduler::SchedBoundary::releasePending() { | |
2208 // If the available queue is empty, it is safe to reset MinReadyCycle. | |
2209 if (Available.empty()) | |
2210 MinReadyCycle = UINT_MAX; | |
2211 | |
2212 // Check to see if any of the pending instructions are ready to issue. If | |
2213 // so, add them to the available queue. | |
2214 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; | |
2215 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { | |
2216 SUnit *SU = *(Pending.begin()+i); | |
2217 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; | |
2218 | |
2219 if (ReadyCycle < MinReadyCycle) | |
2220 MinReadyCycle = ReadyCycle; | |
2221 | |
2222 if (!IsBuffered && ReadyCycle > CurrCycle) | |
2223 continue; | |
2224 | |
2225 if (checkHazard(SU)) | |
2226 continue; | |
2227 | |
2228 Available.push(SU); | |
2229 Pending.remove(Pending.begin()+i); | |
2230 --i; --e; | |
2231 } | |
2232 DEBUG(if (!Pending.empty()) Pending.dump()); | |
2233 CheckPending = false; | |
2234 } | |
2235 | |
2236 /// Remove SU from the ready set for this boundary. | |
2237 void GenericScheduler::SchedBoundary::removeReady(SUnit *SU) { | |
2238 if (Available.isInQueue(SU)) | |
2239 Available.remove(Available.find(SU)); | |
2240 else { | |
2241 assert(Pending.isInQueue(SU) && "bad ready count"); | |
2242 Pending.remove(Pending.find(SU)); | |
2243 } | |
2244 } | |
2245 | |
2246 /// If this queue only has one ready candidate, return it. As a side effect, | |
2247 /// defer any nodes that now hit a hazard, and advance the cycle until at least | |
2248 /// one node is ready. If multiple instructions are ready, return NULL. | |
2249 SUnit *GenericScheduler::SchedBoundary::pickOnlyChoice() { | |
2250 if (CheckPending) | |
2251 releasePending(); | |
2252 | |
2253 if (CurrMOps > 0) { | |
2254 // Defer any ready instrs that now have a hazard. | |
2255 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { | |
2256 if (checkHazard(*I)) { | |
2257 Pending.push(*I); | |
2258 I = Available.remove(I); | |
2259 continue; | |
2260 } | |
2261 ++I; | |
2262 } | |
2263 } | |
2264 for (unsigned i = 0; Available.empty(); ++i) { | |
2265 assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) && | |
2266 "permanent hazard"); (void)i; | |
2267 bumpCycle(CurrCycle + 1); | |
2268 releasePending(); | |
2269 } | |
2270 if (Available.size() == 1) | |
2271 return *Available.begin(); | |
2272 return NULL; | |
2273 } | |
2274 | |
2275 #ifndef NDEBUG | |
2276 // This is useful information to dump after bumpNode. | |
2277 // Note that the Queue contents are more useful before pickNodeFromQueue. | |
2278 void GenericScheduler::SchedBoundary::dumpScheduledState() { | |
2279 unsigned ResFactor; | |
2280 unsigned ResCount; | |
2281 if (ZoneCritResIdx) { | |
2282 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); | |
2283 ResCount = getResourceCount(ZoneCritResIdx); | |
2284 } | |
2285 else { | |
2286 ResFactor = SchedModel->getMicroOpFactor(); | |
2287 ResCount = RetiredMOps * SchedModel->getMicroOpFactor(); | |
2288 } | |
2289 unsigned LFactor = SchedModel->getLatencyFactor(); | |
2290 dbgs() << Available.getName() << " @" << CurrCycle << "c\n" | |
2291 << " Retired: " << RetiredMOps; | |
2292 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; | |
2293 dbgs() << "\n Critical: " << ResCount / LFactor << "c, " | |
2294 << ResCount / ResFactor << " " << getResourceName(ZoneCritResIdx) | |
2295 << "\n ExpectedLatency: " << ExpectedLatency << "c\n" | |
2296 << (IsResourceLimited ? " - Resource" : " - Latency") | |
2297 << " limited.\n"; | |
2298 } | |
2299 #endif | |
2300 | 2202 |
2301 void GenericScheduler::SchedCandidate:: | 2203 void GenericScheduler::SchedCandidate:: |
2302 initResourceDelta(const ScheduleDAGMI *DAG, | 2204 initResourceDelta(const ScheduleDAGMI *DAG, |
2303 const TargetSchedModel *SchedModel) { | 2205 const TargetSchedModel *SchedModel) { |
2304 if (!Policy.ReduceResIdx && !Policy.DemandResIdx) | 2206 if (!Policy.ReduceResIdx && !Policy.DemandResIdx) |
2312 ResDelta.CritResources += PI->Cycles; | 2214 ResDelta.CritResources += PI->Cycles; |
2313 if (PI->ProcResourceIdx == Policy.DemandResIdx) | 2215 if (PI->ProcResourceIdx == Policy.DemandResIdx) |
2314 ResDelta.DemandedResources += PI->Cycles; | 2216 ResDelta.DemandedResources += PI->Cycles; |
2315 } | 2217 } |
2316 } | 2218 } |
2317 | |
2318 | 2219 |
2319 /// Return true if this heuristic determines order. | 2220 /// Return true if this heuristic determines order. |
2320 static bool tryLess(int TryVal, int CandVal, | 2221 static bool tryLess(int TryVal, int CandVal, |
2321 GenericScheduler::SchedCandidate &TryCand, | 2222 GenericScheduler::SchedCandidate &TryCand, |
2322 GenericScheduler::SchedCandidate &Cand, | 2223 GenericScheduler::SchedCandidate &Cand, |
2407 return 0; | 2308 return 0; |
2408 } | 2309 } |
2409 | 2310 |
2410 static bool tryLatency(GenericScheduler::SchedCandidate &TryCand, | 2311 static bool tryLatency(GenericScheduler::SchedCandidate &TryCand, |
2411 GenericScheduler::SchedCandidate &Cand, | 2312 GenericScheduler::SchedCandidate &Cand, |
2412 GenericScheduler::SchedBoundary &Zone) { | 2313 SchedBoundary &Zone) { |
2413 if (Zone.isTop()) { | 2314 if (Zone.isTop()) { |
2414 if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { | 2315 if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { |
2415 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), | 2316 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), |
2416 TryCand, Cand, GenericScheduler::TopDepthReduce)) | 2317 TryCand, Cand, GenericScheduler::TopDepthReduce)) |
2417 return true; | 2318 return true; |
2443 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. | 2344 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. |
2444 /// \param Zone describes the scheduled zone that we are extending. | 2345 /// \param Zone describes the scheduled zone that we are extending. |
2445 /// \param RPTracker describes reg pressure within the scheduled zone. | 2346 /// \param RPTracker describes reg pressure within the scheduled zone. |
2446 /// \param TempTracker is a scratch pressure tracker to reuse in queries. | 2347 /// \param TempTracker is a scratch pressure tracker to reuse in queries. |
2447 void GenericScheduler::tryCandidate(SchedCandidate &Cand, | 2348 void GenericScheduler::tryCandidate(SchedCandidate &Cand, |
2448 SchedCandidate &TryCand, | 2349 SchedCandidate &TryCand, |
2449 SchedBoundary &Zone, | 2350 SchedBoundary &Zone, |
2450 const RegPressureTracker &RPTracker, | 2351 const RegPressureTracker &RPTracker, |
2451 RegPressureTracker &TempTracker) { | 2352 RegPressureTracker &TempTracker) { |
2452 | 2353 |
2453 if (DAG->isTrackingPressure()) { | 2354 if (DAG->isTrackingPressure()) { |
2454 // Always initialize TryCand's RPDelta. | 2355 // Always initialize TryCand's RPDelta. |
2455 if (Zone.isTop()) { | 2356 if (Zone.isTop()) { |
2456 TempTracker.getMaxDownwardPressureDelta( | 2357 TempTracker.getMaxDownwardPressureDelta( |
2508 return; | 2409 return; |
2509 | 2410 |
2510 // For loops that are acyclic path limited, aggressively schedule for latency. | 2411 // For loops that are acyclic path limited, aggressively schedule for latency. |
2511 // This can result in very long dependence chains scheduled in sequence, so | 2412 // This can result in very long dependence chains scheduled in sequence, so |
2512 // once every cycle (when CurrMOps == 0), switch to normal heuristics. | 2413 // once every cycle (when CurrMOps == 0), switch to normal heuristics. |
2513 if (Rem.IsAcyclicLatencyLimited && !Zone.CurrMOps | 2414 if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps() |
2514 && tryLatency(TryCand, Cand, Zone)) | 2415 && tryLatency(TryCand, Cand, Zone)) |
2416 return; | |
2417 | |
2418 // Prioritize instructions that read unbuffered resources by stall cycles. | |
2419 if (tryLess(Zone.getLatencyStallCycles(TryCand.SU), | |
2420 Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) | |
2515 return; | 2421 return; |
2516 | 2422 |
2517 // Keep clustered nodes together to encourage downstream peephole | 2423 // Keep clustered nodes together to encourage downstream peephole |
2518 // optimizations which may reduce resource requirements. | 2424 // optimizations which may reduce resource requirements. |
2519 // | 2425 // |
2556 } | 2462 } |
2557 | 2463 |
2558 // Prefer immediate defs/users of the last scheduled instruction. This is a | 2464 // Prefer immediate defs/users of the last scheduled instruction. This is a |
2559 // local pressure avoidance strategy that also makes the machine code | 2465 // local pressure avoidance strategy that also makes the machine code |
2560 // readable. | 2466 // readable. |
2561 if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), | 2467 if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU), |
2562 TryCand, Cand, NextDefUse)) | 2468 TryCand, Cand, NextDefUse)) |
2563 return; | 2469 return; |
2564 | 2470 |
2565 // Fall through to original instruction order. | 2471 // Fall through to original instruction order. |
2566 if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) | 2472 if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) |
2575 switch (Reason) { | 2481 switch (Reason) { |
2576 case NoCand: return "NOCAND "; | 2482 case NoCand: return "NOCAND "; |
2577 case PhysRegCopy: return "PREG-COPY"; | 2483 case PhysRegCopy: return "PREG-COPY"; |
2578 case RegExcess: return "REG-EXCESS"; | 2484 case RegExcess: return "REG-EXCESS"; |
2579 case RegCritical: return "REG-CRIT "; | 2485 case RegCritical: return "REG-CRIT "; |
2486 case Stall: return "STALL "; | |
2580 case Cluster: return "CLUSTER "; | 2487 case Cluster: return "CLUSTER "; |
2581 case Weak: return "WEAK "; | 2488 case Weak: return "WEAK "; |
2582 case RegMax: return "REG-MAX "; | 2489 case RegMax: return "REG-MAX "; |
2583 case ResourceReduce: return "RES-REDUCE"; | 2490 case ResourceReduce: return "RES-REDUCE"; |
2584 case ResourceDemand: return "RES-DEMAND"; | 2491 case ResourceDemand: return "RES-DEMAND"; |
2649 /// | 2556 /// |
2650 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during | 2557 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during |
2651 /// DAG building. To adjust for the current scheduling location we need to | 2558 /// DAG building. To adjust for the current scheduling location we need to |
2652 /// maintain the number of vreg uses remaining to be top-scheduled. | 2559 /// maintain the number of vreg uses remaining to be top-scheduled. |
2653 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, | 2560 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, |
2654 const RegPressureTracker &RPTracker, | 2561 const RegPressureTracker &RPTracker, |
2655 SchedCandidate &Cand) { | 2562 SchedCandidate &Cand) { |
2656 ReadyQueue &Q = Zone.Available; | 2563 ReadyQueue &Q = Zone.Available; |
2657 | 2564 |
2658 DEBUG(Q.dump()); | 2565 DEBUG(Q.dump()); |
2659 | 2566 |
2660 // getMaxPressureDelta temporarily modifies the tracker. | 2567 // getMaxPressureDelta temporarily modifies the tracker. |
2696 return SU; | 2603 return SU; |
2697 } | 2604 } |
2698 CandPolicy NoPolicy; | 2605 CandPolicy NoPolicy; |
2699 SchedCandidate BotCand(NoPolicy); | 2606 SchedCandidate BotCand(NoPolicy); |
2700 SchedCandidate TopCand(NoPolicy); | 2607 SchedCandidate TopCand(NoPolicy); |
2701 Bot.setPolicy(BotCand.Policy, Top); | 2608 // Set the bottom-up policy based on the state of the current bottom zone and |
2702 Top.setPolicy(TopCand.Policy, Bot); | 2609 // the instructions outside the zone, including the top zone. |
2610 setPolicy(BotCand.Policy, Bot, Top); | |
2611 // Set the top-down policy based on the state of the current top zone and | |
2612 // the instructions outside the zone, including the bottom zone. | |
2613 setPolicy(TopCand.Policy, Top, Bot); | |
2703 | 2614 |
2704 // Prefer bottom scheduling when heuristics are silent. | 2615 // Prefer bottom scheduling when heuristics are silent. |
2705 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); | 2616 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); |
2706 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); | 2617 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); |
2707 | 2618 |
2814 /// | 2725 /// |
2815 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling | 2726 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling |
2816 /// them here. See comments in biasPhysRegCopy. | 2727 /// them here. See comments in biasPhysRegCopy. |
2817 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { | 2728 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { |
2818 if (IsTopNode) { | 2729 if (IsTopNode) { |
2819 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle); | 2730 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); |
2820 Top.bumpNode(SU); | 2731 Top.bumpNode(SU); |
2821 if (SU->hasPhysRegUses) | 2732 if (SU->hasPhysRegUses) |
2822 reschedulePhysRegCopies(SU, true); | 2733 reschedulePhysRegCopies(SU, true); |
2823 } | 2734 } |
2824 else { | 2735 else { |
2825 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.CurrCycle); | 2736 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); |
2826 Bot.bumpNode(SU); | 2737 Bot.bumpNode(SU); |
2827 if (SU->hasPhysRegDefs) | 2738 if (SU->hasPhysRegDefs) |
2828 reschedulePhysRegCopies(SU, false); | 2739 reschedulePhysRegCopies(SU, false); |
2829 } | 2740 } |
2830 } | 2741 } |