//===---- PPCReduceCRLogicals.cpp - Reduce CR Bit Logical operations ------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===---------------------------------------------------------------------===//
//
// This pass aims to reduce the number of logical operations on bits in the CR
// register. These instructions have a fairly high latency and only a single
// pipeline at their disposal in modern PPC cores. Furthermore, they have a
// tendency to occur in fairly small blocks where there's little opportunity
// to hide the latency between the CR logical operation and its user.
//
//===---------------------------------------------------------------------===//

#include "PPCInstrInfo.h"
#include "PPC.h"
#include "PPCTargetMachine.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/Statistic.h"

using namespace llvm;

#define DEBUG_TYPE "ppc-reduce-cr-ops"
#include "PPCMachineBasicBlockUtils.h"

STATISTIC(NumContainedSingleUseBinOps,
          "Number of single-use binary CR logical ops contained in a block");
STATISTIC(NumToSplitBlocks,
          "Number of binary CR logical ops that can be used to split blocks");
STATISTIC(TotalCRLogicals, "Number of CR logical ops.");
STATISTIC(TotalNullaryCRLogicals,
          "Number of nullary CR logical ops (CRSET/CRUNSET).");
STATISTIC(TotalUnaryCRLogicals, "Number of unary CR logical ops.");
STATISTIC(TotalBinaryCRLogicals, "Number of CR logical ops.");
STATISTIC(NumBlocksSplitOnBinaryCROp,
          "Number of blocks split on CR binary logical ops.");
STATISTIC(NumNotSplitIdenticalOperands,
          "Number of blocks not split due to operands being identical.");
STATISTIC(NumNotSplitChainCopies,
          "Number of blocks not split due to operands being chained copies.");
STATISTIC(NumNotSplitWrongOpcode,
          "Number of blocks not split due to the wrong opcode.");

namespace llvm {
  void initializePPCReduceCRLogicalsPass(PassRegistry&);
}

namespace {

static bool isBinary(MachineInstr &MI) {
  return MI.getNumOperands() == 3;
}

static bool isNullary(MachineInstr &MI) {
  return MI.getNumOperands() == 1;
}

/// Given a CR logical operation \p CROp, branch opcode \p BROp as well as
/// a flag to indicate if the first operand of \p CROp is used as the
/// SplitBefore operand, determines whether either of the branches are to be
/// inverted as well as whether the new target should be the original
/// fall-through block.
static void
computeBranchTargetAndInversion(unsigned CROp, unsigned BROp, bool UsingDef1,
                                bool &InvertNewBranch, bool &InvertOrigBranch,
                                bool &TargetIsFallThrough) {
  // The conditions under which each of the output operands should be [un]set
  // can certainly be written much more concisely with just 3 if statements or
  // ternary expressions. However, this provides a much clearer overview to the
  // reader as to what is set for each <CROp, BROp, OpUsed> combination.
  if (BROp == PPC::BC || BROp == PPC::BCLR) {
    // Regular branches.
    switch (CROp) {
    default:
      llvm_unreachable("Don't know how to handle this CR logical.");
    case PPC::CROR:
      InvertNewBranch = false;
      InvertOrigBranch = false;
      TargetIsFallThrough = false;
      return;
    case PPC::CRAND:
      InvertNewBranch = true;
      InvertOrigBranch = false;
      TargetIsFallThrough = true;
      return;
    case PPC::CRNAND:
      InvertNewBranch = true;
      InvertOrigBranch = true;
      TargetIsFallThrough = false;
      return;
    case PPC::CRNOR:
      InvertNewBranch = false;
      InvertOrigBranch = true;
      TargetIsFallThrough = true;
      return;
    case PPC::CRORC:
      InvertNewBranch = UsingDef1;
      InvertOrigBranch = !UsingDef1;
      TargetIsFallThrough = false;
      return;
    case PPC::CRANDC:
      InvertNewBranch = !UsingDef1;
      InvertOrigBranch = !UsingDef1;
      TargetIsFallThrough = true;
      return;
    }
  } else if (BROp == PPC::BCn || BROp == PPC::BCLRn) {
    // Negated branches.
    switch (CROp) {
    default:
      llvm_unreachable("Don't know how to handle this CR logical.");
    case PPC::CROR:
      InvertNewBranch = true;
      InvertOrigBranch = false;
      TargetIsFallThrough = true;
      return;
    case PPC::CRAND:
      InvertNewBranch = false;
      InvertOrigBranch = false;
      TargetIsFallThrough = false;
      return;
    case PPC::CRNAND:
      InvertNewBranch = false;
      InvertOrigBranch = true;
      TargetIsFallThrough = true;
      return;
    case PPC::CRNOR:
      InvertNewBranch = true;
      InvertOrigBranch = true;
      TargetIsFallThrough = false;
      return;
    case PPC::CRORC:
      InvertNewBranch = !UsingDef1;
      InvertOrigBranch = !UsingDef1;
      TargetIsFallThrough = true;
      return;
    case PPC::CRANDC:
      InvertNewBranch = UsingDef1;
      InvertOrigBranch = !UsingDef1;
      TargetIsFallThrough = false;
      return;
    }
  } else
    llvm_unreachable("Don't know how to handle this branch.");
}

class PPCReduceCRLogicals : public MachineFunctionPass {

public:
  static char ID;
  struct CRLogicalOpInfo {
    MachineInstr *MI;
    // FIXME: If chains of copies are to be handled, this should be a vector.
    std::pair<MachineInstr*, MachineInstr*> CopyDefs;
    std::pair<MachineInstr*, MachineInstr*> TrueDefs;
    unsigned IsBinary : 1;
    unsigned IsNullary : 1;
    unsigned ContainedInBlock : 1;
    unsigned FeedsISEL : 1;
    unsigned FeedsBR : 1;
    unsigned FeedsLogical : 1;
    unsigned SingleUse : 1;
    unsigned DefsSingleUse : 1;
    unsigned SubregDef1;
    unsigned SubregDef2;
    CRLogicalOpInfo() : MI(nullptr), IsBinary(0), IsNullary(0),
                        ContainedInBlock(0), FeedsISEL(0), FeedsBR(0),
                        FeedsLogical(0), SingleUse(0), DefsSingleUse(1),
                        SubregDef1(0), SubregDef2(0) { }
    void dump();
  };

private:
  const PPCInstrInfo *TII;
  MachineFunction *MF;
  MachineRegisterInfo *MRI;
  const MachineBranchProbabilityInfo *MBPI;

  // A vector to contain all the CR logical operations
  std::vector<CRLogicalOpInfo> AllCRLogicalOps;
  void initialize(MachineFunction &MFParm);
  void collectCRLogicals();
  bool handleCROp(CRLogicalOpInfo &CRI);
  bool splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI);
  static bool isCRLogical(MachineInstr &MI) {
    unsigned Opc = MI.getOpcode();
    return Opc == PPC::CRAND || Opc == PPC::CRNAND || Opc == PPC::CROR ||
      Opc == PPC::CRXOR || Opc == PPC::CRNOR || Opc == PPC::CREQV ||
      Opc == PPC::CRANDC || Opc == PPC::CRORC || Opc == PPC::CRSET ||
      Opc == PPC::CRUNSET || Opc == PPC::CR6SET || Opc == PPC::CR6UNSET;
  }
  bool simplifyCode() {
    bool Changed = false;
    // Not using a range-based for loop here as the vector may grow while being
    // operated on.
    for (unsigned i = 0; i < AllCRLogicalOps.size(); i++)
      Changed |= handleCROp(AllCRLogicalOps[i]);
    return Changed;
  }

public:
  PPCReduceCRLogicals() : MachineFunctionPass(ID) {
    initializePPCReduceCRLogicalsPass(*PassRegistry::getPassRegistry());
  }

  MachineInstr *lookThroughCRCopy(unsigned Reg, unsigned &Subreg,
                                  MachineInstr *&CpDef);
  bool runOnMachineFunction(MachineFunction &MF) override {
    if (skipFunction(MF.getFunction()))
      return false;

    // If the subtarget doesn't use CR bits, there's nothing to do.
    const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
    if (!STI.useCRBits())
      return false;

    initialize(MF);
    collectCRLogicals();
    return simplifyCode();
  }
  CRLogicalOpInfo createCRLogicalOpInfo(MachineInstr &MI);
  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequired<MachineBranchProbabilityInfo>();
    AU.addRequired<MachineDominatorTree>();
    MachineFunctionPass::getAnalysisUsage(AU);
  }
};

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void PPCReduceCRLogicals::CRLogicalOpInfo::dump() {
  dbgs() << "CRLogicalOpMI: ";
  MI->dump();
  dbgs() << "IsBinary: " << IsBinary << ", FeedsISEL: " << FeedsISEL;
  dbgs() << ", FeedsBR: " << FeedsBR << ", FeedsLogical: ";
  dbgs() << FeedsLogical << ", SingleUse: " << SingleUse;
  dbgs() << ", DefsSingleUse: " << DefsSingleUse;
  dbgs() << ", SubregDef1: " << SubregDef1 << ", SubregDef2: ";
  dbgs() << SubregDef2 << ", ContainedInBlock: " << ContainedInBlock;
  if (!IsNullary) {
    dbgs() << "\nDefs:\n";
    TrueDefs.first->dump();
  }
  if (IsBinary)
    TrueDefs.second->dump();
  dbgs() << "\n";
  if (CopyDefs.first) {
    dbgs() << "CopyDef1: ";
    CopyDefs.first->dump();
  }
  if (CopyDefs.second) {
    dbgs() << "CopyDef2: ";
    CopyDefs.second->dump();
  }
}
#endif

PPCReduceCRLogicals::CRLogicalOpInfo
PPCReduceCRLogicals::createCRLogicalOpInfo(MachineInstr &MIParam) {
  CRLogicalOpInfo Ret;
  Ret.MI = &MIParam;
  // Get the defs
  if (isNullary(MIParam)) {
    Ret.IsNullary = 1;
    Ret.TrueDefs = std::make_pair(nullptr, nullptr);
    Ret.CopyDefs = std::make_pair(nullptr, nullptr);
  } else {
    MachineInstr *Def1 = lookThroughCRCopy(MIParam.getOperand(1).getReg(),
                                           Ret.SubregDef1, Ret.CopyDefs.first);
    Ret.DefsSingleUse &=
      MRI->hasOneNonDBGUse(Def1->getOperand(0).getReg());
    Ret.DefsSingleUse &=
      MRI->hasOneNonDBGUse(Ret.CopyDefs.first->getOperand(0).getReg());
    assert(Def1 && "Must be able to find a definition of operand 1.");
    if (isBinary(MIParam)) {
      Ret.IsBinary = 1;
      MachineInstr *Def2 = lookThroughCRCopy(MIParam.getOperand(2).getReg(),
                                             Ret.SubregDef2,
                                             Ret.CopyDefs.second);
      Ret.DefsSingleUse &=
        MRI->hasOneNonDBGUse(Def2->getOperand(0).getReg());
      Ret.DefsSingleUse &=
        MRI->hasOneNonDBGUse(Ret.CopyDefs.second->getOperand(0).getReg());
      assert(Def2 && "Must be able to find a definition of operand 2.");
      Ret.TrueDefs = std::make_pair(Def1, Def2);
    } else {
      Ret.TrueDefs = std::make_pair(Def1, nullptr);
      Ret.CopyDefs.second = nullptr;
    }
  }

  Ret.ContainedInBlock = 1;
  // Get the uses
  for (MachineInstr &UseMI :
       MRI->use_nodbg_instructions(MIParam.getOperand(0).getReg())) {
    unsigned Opc = UseMI.getOpcode();
    if (Opc == PPC::ISEL || Opc == PPC::ISEL8)
      Ret.FeedsISEL = 1;
    if (Opc == PPC::BC || Opc == PPC::BCn || Opc == PPC::BCLR ||
        Opc == PPC::BCLRn)
      Ret.FeedsBR = 1;
    Ret.FeedsLogical = isCRLogical(UseMI);
    if (UseMI.getParent() != MIParam.getParent())
      Ret.ContainedInBlock = 0;
  }
  Ret.SingleUse = MRI->hasOneNonDBGUse(MIParam.getOperand(0).getReg()) ? 1 : 0;

  // We now know whether all the uses of the CR logical are in the same block.
  if (!Ret.IsNullary) {
    Ret.ContainedInBlock &=
      (MIParam.getParent() == Ret.TrueDefs.first->getParent());
    if (Ret.IsBinary)
      Ret.ContainedInBlock &=
        (MIParam.getParent() == Ret.TrueDefs.second->getParent());
  }
  DEBUG(Ret.dump());
  if (Ret.IsBinary && Ret.ContainedInBlock && Ret.SingleUse) {
    NumContainedSingleUseBinOps++;
    if (Ret.FeedsBR && Ret.DefsSingleUse)
      NumToSplitBlocks++;
  }
  return Ret;
}

/// Looks trhough a COPY instruction to the actual definition of the CR-bit
/// register and returns the instruction that defines it.
/// FIXME: This currently handles what is by-far the most common case:
/// an instruction that defines a CR field followed by a single copy of a bit
/// from that field into a virtual register. If chains of copies need to be
/// handled, this should have a loop until a non-copy instruction is found.
MachineInstr *PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg,
                                                     unsigned &Subreg,
                                                     MachineInstr *&CpDef) {
  Subreg = -1;
  if (!TargetRegisterInfo::isVirtualRegister(Reg))
    return nullptr;
  MachineInstr *Copy = MRI->getVRegDef(Reg);
  CpDef = Copy;
  if (!Copy->isCopy())
    return Copy;
  unsigned CopySrc = Copy->getOperand(1).getReg();
  Subreg = Copy->getOperand(1).getSubReg();
  if (!TargetRegisterInfo::isVirtualRegister(CopySrc)) {
    const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
    // Set the Subreg
    if (CopySrc == PPC::CR0EQ || CopySrc == PPC::CR6EQ)
      Subreg = PPC::sub_eq;
    if (CopySrc == PPC::CR0LT || CopySrc == PPC::CR6LT)
      Subreg = PPC::sub_lt;
    if (CopySrc == PPC::CR0GT || CopySrc == PPC::CR6GT)
      Subreg = PPC::sub_gt;
    if (CopySrc == PPC::CR0UN || CopySrc == PPC::CR6UN)
      Subreg = PPC::sub_un;
    // Loop backwards and return the first MI that modifies the physical CR Reg.
    MachineBasicBlock::iterator Me = Copy, B = Copy->getParent()->begin();
    while (Me != B)
      if ((--Me)->modifiesRegister(CopySrc, TRI))
        return &*Me;
    return nullptr;
  }
  return MRI->getVRegDef(CopySrc);
}

void PPCReduceCRLogicals::initialize(MachineFunction &MFParam) {
  MF = &MFParam;
  MRI = &MF->getRegInfo();
  TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();

  AllCRLogicalOps.clear();
}

/// Contains all the implemented transformations on CR logical operations.
/// For example, a binary CR logical can be used to split a block on its inputs,
/// a unary CR logical might be used to change the condition code on a
/// comparison feeding it. A nullary CR logical might simply be removable
/// if the user of the bit it [un]sets can be transformed.
bool PPCReduceCRLogicals::handleCROp(CRLogicalOpInfo &CRI) {
  // We can definitely split a block on the inputs to a binary CR operation
  // whose defs and (single) use are within the same block.
  bool Changed = false;
  if (CRI.IsBinary && CRI.ContainedInBlock && CRI.SingleUse && CRI.FeedsBR &&
      CRI.DefsSingleUse) {
    Changed = splitBlockOnBinaryCROp(CRI);
    if (Changed)
      NumBlocksSplitOnBinaryCROp++;
  }
  return Changed;
}

/// Splits a block that contains a CR-logical operation that feeds a branch
/// and whose operands are produced within the block.
/// Example:
///    %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2
///    %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5
///    %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3
///    %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7
///    %vr9<def> = CROR %vr6<kill>, %vr8<kill>; CRBITRC:%vr9,%vr6,%vr8
///    BC %vr9<kill>, <BB#2>; CRBITRC:%vr9
/// Becomes:
///    %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2
///    %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5
///    BC %vr6<kill>, <BB#2>; CRBITRC:%vr6
///
///    %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3
///    %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7
///    BC %vr9<kill>, <BB#2>; CRBITRC:%vr9
bool PPCReduceCRLogicals::splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI) {
  if (CRI.CopyDefs.first == CRI.CopyDefs.second) {
    DEBUG(dbgs() << "Unable to split as the two operands are the same\n");
    NumNotSplitIdenticalOperands++;
    return false;
  }
  if (CRI.TrueDefs.first->isCopy() || CRI.TrueDefs.second->isCopy() ||
      CRI.TrueDefs.first->isPHI() || CRI.TrueDefs.second->isPHI()) {
    DEBUG(dbgs() << "Unable to split because one of the operands is a PHI or "
          "chain of copies.\n");
    NumNotSplitChainCopies++;
    return false;
  }
  // Note: keep in sync with computeBranchTargetAndInversion().
  if (CRI.MI->getOpcode() != PPC::CROR &&
      CRI.MI->getOpcode() != PPC::CRAND &&
      CRI.MI->getOpcode() != PPC::CRNOR &&
      CRI.MI->getOpcode() != PPC::CRNAND &&
      CRI.MI->getOpcode() != PPC::CRORC &&
      CRI.MI->getOpcode() != PPC::CRANDC) {
    DEBUG(dbgs() << "Unable to split blocks on this opcode.\n");
    NumNotSplitWrongOpcode++;
    return false;
  }
  DEBUG(dbgs() << "Splitting the following CR op:\n"; CRI.dump());
  MachineBasicBlock::iterator Def1It = CRI.TrueDefs.first;
  MachineBasicBlock::iterator Def2It = CRI.TrueDefs.second;

  bool UsingDef1 = false;
  MachineInstr *SplitBefore = &*Def2It;
  for (auto E = CRI.MI->getParent()->end(); Def2It != E; ++Def2It) {
    if (Def1It == Def2It) { // Def2 comes before Def1.
      SplitBefore = &*Def1It;
      UsingDef1 = true;
      break;
    }
  }

  DEBUG(dbgs() << "We will split the following block:\n";);
  DEBUG(CRI.MI->getParent()->dump());
  DEBUG(dbgs() << "Before instruction:\n"; SplitBefore->dump());

  // Get the branch instruction.
  MachineInstr *Branch =
    MRI->use_nodbg_begin(CRI.MI->getOperand(0).getReg())->getParent();

  // We want the new block to have no code in it other than the definition
  // of the input to the CR logical and the CR logical itself. So we move
  // those to the bottom of the block (just before the branch). Then we
  // will split before the CR logical.
  MachineBasicBlock *MBB = SplitBefore->getParent();
  auto FirstTerminator = MBB->getFirstTerminator();
  MachineBasicBlock::iterator FirstInstrToMove =
    UsingDef1 ? CRI.TrueDefs.first : CRI.TrueDefs.second;
  MachineBasicBlock::iterator SecondInstrToMove =
    UsingDef1 ? CRI.CopyDefs.first : CRI.CopyDefs.second;

  // The instructions that need to be moved are not guaranteed to be
  // contiguous. Move them individually.
  // FIXME: If one of the operands is a chain of (single use) copies, they
  // can all be moved and we can still split.
  MBB->splice(FirstTerminator, MBB, FirstInstrToMove);
  if (FirstInstrToMove != SecondInstrToMove)
    MBB->splice(FirstTerminator, MBB, SecondInstrToMove);
  MBB->splice(FirstTerminator, MBB, CRI.MI);

  unsigned Opc = CRI.MI->getOpcode();
  bool InvertOrigBranch, InvertNewBranch, TargetIsFallThrough;
  computeBranchTargetAndInversion(Opc, Branch->getOpcode(), UsingDef1,
                                  InvertNewBranch, InvertOrigBranch,
                                  TargetIsFallThrough);
  MachineInstr *SplitCond =
    UsingDef1 ? CRI.CopyDefs.second : CRI.CopyDefs.first;
  DEBUG(dbgs() << "We will " <<  (InvertNewBranch ? "invert" : "copy"));
  DEBUG(dbgs() << " the original branch and the target is the " <<
        (TargetIsFallThrough ? "fallthrough block\n" : "orig. target block\n"));
  DEBUG(dbgs() << "Original branch instruction: "; Branch->dump());
  BlockSplitInfo BSI { Branch, SplitBefore, SplitCond, InvertNewBranch,
    InvertOrigBranch, TargetIsFallThrough, MBPI, CRI.MI,
    UsingDef1 ? CRI.CopyDefs.first : CRI.CopyDefs.second };
  bool Changed = splitMBB(BSI);
  // If we've split on a CR logical that is fed by a CR logical,
  // recompute the source CR logical as it may be usable for splitting.
  if (Changed) {
    bool Input1CRlogical =
      CRI.TrueDefs.first && isCRLogical(*CRI.TrueDefs.first);
    bool Input2CRlogical =
      CRI.TrueDefs.second && isCRLogical(*CRI.TrueDefs.second);
    if (Input1CRlogical)
      AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.first));
    if (Input2CRlogical)
      AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.second));
  }
  return Changed;
}

void PPCReduceCRLogicals::collectCRLogicals() {
  for (MachineBasicBlock &MBB : *MF) {
    for (MachineInstr &MI : MBB) {
      if (isCRLogical(MI)) {
        AllCRLogicalOps.push_back(createCRLogicalOpInfo(MI));
        TotalCRLogicals++;
        if (AllCRLogicalOps.back().IsNullary)
          TotalNullaryCRLogicals++;
        else if (AllCRLogicalOps.back().IsBinary)
          TotalBinaryCRLogicals++;
        else
          TotalUnaryCRLogicals++;
      }
    }
  }
}

} // end annonymous namespace

INITIALIZE_PASS_BEGIN(PPCReduceCRLogicals, DEBUG_TYPE,
                      "PowerPC Reduce CR logical Operation", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(PPCReduceCRLogicals, DEBUG_TYPE,
                    "PowerPC Reduce CR logical Operation", false, false)

char PPCReduceCRLogicals::ID = 0;
FunctionPass*
llvm::createPPCReduceCRLogicalsPass() { return new PPCReduceCRLogicals(); }
