关于LiveIntervals pass中相关数据结构的含义,在寄存器分配前置分析(5.1) - LiveInterval这篇博客中已经做了清晰的讲解,此处不再赘述,本文主要讲解值定义信息VNInfo的使用方法和注意事项。
1. VNInfo含义
在LLVM的源码中,VNInfo定义在include/llvm/CodeGen/LiveInterval.h中,该结构主要表示machine级别的值定义信息,例如下面的
MachineBasicBlock bb1中,%4是一个vreg,对于96处的use点,其定义在SlotIndex 80B的位置,后面通过%4的LiveInterval查询96B处使用的%4的定义信息时,返回的就是VNInfo对象。
值得注意的是,VNInfo还包含了isPHIDef的接口,可以查询一个值是否是Phi定义的(即使在PHI指令被消除后也可以查),实现原理主要是
看该值的def是不是Basic block boundary类型的SlotIndex,这是因为一般的值定义点SlotIndex类型都是Normal register def,只有PHI值定义点才是Basic block boundary类型。
64B bb.1:
; predecessors: %bb.0
successors: %bb.3(0x80000000); %bb.3(100.00%)
80B %4:gpr32all = COPY $wzr
96B %3:gpr32all = COPY %4:gpr32all
112B %12:gpr32all = COPY %3:gpr32all
128B B %bb.3
/// VNInfo - Value Number Information.
/// This class holds information about a machine level values, including
/// definition and use points.
///
class VNInfo {
public:
using Allocator = BumpPtrAllocator;
/// The ID number of this value.
unsigned id;
/// The index of the defining instruction.
SlotIndex def;
/// VNInfo constructor.
VNInfo(unsigned i, SlotIndex d) : id(i), def(d) {}
/// VNInfo constructor, copies values from orig, except for the value number.
VNInfo(unsigned i, const VNInfo &orig) : id(i), def(orig.def) {}
/// Copy from the parameter into this VNInfo.
void copyFrom(VNInfo &src) {
def = src.def;
}
/// Returns true if this value is defined by a PHI instruction (or was,
/// PHI instructions may have been eliminated).
/// PHI-defs begin at a block boundary, all other defs begin at register or
/// EC slots.
bool isPHIDef() const { return def.isBlock(); }
/// Returns true if this value is unused.
bool isUnused() const { return !def.isValid(); }
/// Mark this value as unused.
void markUnused() { def = SlotIndex(); }
};
2. 如何在llc后端pass中查询一个值的在Use点对应的定义点
2.1 编写一个简单的示例
// learn_vni_info.cc
int VNIInfoLearn(int n) {
int res = 0;
for (int i = 0; i < n; i++) {
res += i;
}
return res;
}
使用命令clang -O1 -S -emit-llvm learn_vni_info.cc -o learn_vni_info.ll
将learn_vni_info.cc编译为ll文件(这里我是用的NDK中自带的clang,
因为我编出来的代码一般在安卓设备上运行,ndk中已经配置好了交叉编译环境,对应LLVM-17)
gwz@DESKTOP-VNM3O2M:~/work/learn_llvm/vni_info$ cat learn_vni_info.ll
; ModuleID = 'learn_vni_info.cc'
source_filename = "learn_vni_info.cc"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
define dso_local noundef i32 @_Z12VNIInfoLearni(i32 noundef %0) local_unnamed_addr #0 {
%2 = icmp sgt i32 %0, 0
br i1 %2, label %3, label %13
3: ; preds = %1
%4 = add i32 %0, -1
%5 = zext i32 %4 to i33
%6 = add i32 %0, -2
%7 = zext i32 %6 to i33
%8 = mul i33 %5, %7
%9 = lshr i33 %8, 1
%10 = trunc i33 %9 to i32
%11 = add i32 %10, %0
%12 = add i32 %11, -1
br label %13
13: ; preds = %3, %1
%14 = phi i32 [ 0, %1 ], [ %12, %3 ]
ret i32 %14
}
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
!llvm.module.flags = !{!0, !1, !2, !3}
!llvm.ident = !{!4}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{!"Android (11349228, +pgo, +bolt, +lto, -mlgo, based on r487747e) clang version 17.0.2 (https://android.googlesource.com/toolchain/llvm-project d9f89f4d16663d5012e5c09495f3b30ece3d2362)"}
然后使用命令~/work/llvm-project/build/bin/opt --passes='view-cfg' learn_vni_info.ll
生成CFG图,然后使用
dotty /tmp/cfg._Z12VNIInfoLearni-9cd348.dot
观察结构。(ndk中没有prebuilt的opt和llc,这里的opt我是自己编译的LLVM-19版本,
SSH界面使用的mobaxterm)
可以看到这里有个PHI值%14,在Phi消除后,在该Phi值的2个source block中,都会被替换为同一个vreg,
这里的过程可以参考寄存器分配前置分析(1) — PHIElimination.
为了简单起见,直接对lib/CodeGen/RegisterCoalescer.cpp代码进行一点小改造。为啥改这个pass,是因为
LiveIntervals pass知之后就是这个pass,对值定义点进行分析需要LiveIntervals 分析的结果,从代码中
也可以看到register-coalescer的依赖pass。
char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
INITIALIZE_PASS_BEGIN(RegisterCoalescer, "register-coalescer",
"Register Coalescer", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(RegisterCoalescer, "register-coalescer",
"Register Coalescer", false, false)
对该pass进行小改动,在执行该pass之前,check一下COPY指令SrcReg的def信息,修改代码后重新编译llc
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -4209,6 +4209,42 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
LIS = &getAnalysis<LiveIntervals>();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
Loops = &getAnalysis<MachineLoopInfo>();
+ LLVM_DEBUG(
+ dbgs() << "learn VNInfo ----------------------------\n";
+ const SlotIndexes &Indexes = *LIS->getSlotIndexes();
+ MF->print(dbgs(), &Indexes);
+ for (MachineBasicBlock& MBB : *MF) {
+ for (MachineInstr& MI : MBB) {
+ // 以COPY指令为例,分析COPY的SrcReg定义点
+ if (MI.isCopy()) {
+ MachineOperand SrcOp = MI.getOperand(1);
+ Register SrcReg = SrcOp.getReg();
+ if (SrcReg.isVirtual()) {
+ // 获取SrcReg的LiveInterval信息
+ LiveInterval& LI = LIS->getInterval(SrcReg);
+ // 获取当前MI对应的SlotIndex,也就是在MIR中的编号
+ SlotIndex SIdx = Indexes.getInstructionIndex(MI);
+ // 通过Query接口,查询SrcReg在当前使用点SIdx的定义信息
+ LiveQueryResult LRQ = LI.Query(SIdx);
+ dbgs() << "Cur SlotIndex = " << SIdx << ", MI = " << MI;
+ // valueIn返回当前MI处Use值(live-in)的def信息,如果没有则返回nullptr
+ if (VNInfo* VNI = LRQ.valueIn()) {
+ MachineInstr* DefMI = Indexes.getInstructionFromIndex(VNI->def);
+ dbgs() << SrcOp << " def SlotIndex = " << VNI->def << "\n";
+ // 需要注意,Phi值是Block boundry的SlotIndex定义,无法与MI直接对应
+ if (!VNI->isPHIDef()) {
+ dbgs() << SrcOp << " def MI = " << *DefMI;
+ }
+ } else {
+ dbgs() << "Dont find live in value!";
+ }
+ dbgs() << "\n";
+ }
+ }
+ }
+ }
+ );
+
if (EnableGlobalCopies == cl::BOU_UNSET)
JoinGlobalCopies = STI.enableJoinGlobalCopies();
else
2. 2 分析结果
执行~/work/llvm-project/build/bin/llc -march=aarch64 -filetype=obj -debug-only=regalloc -stop-after=register-coalescer learn_vni_info.ll -o learn_vni_info.o > vni.log 2>&1
命令将debug信息输出到vni.log中,下面我将关键的部分截取出来(为了减少干扰,在执行完register-coalescer pass后就停止)。
为了便于阅读,我直接将分析写在log中。
********** REGISTER COALESCER **********
********** Function: _Z12VNIInfoLearni
learn VNInfo ----------------------------
# Machine code for function _Z12VNIInfoLearni: NoPHIs, TracksLiveness, TiedOpsRewritten
Function Live Ins: $w0 in %2
0B bb.0 (%ir-block.1):
successors: %bb.2(0x50000000), %bb.1(0x30000000); %bb.2(62.50%), %bb.1(37.50%)
liveins: $w0
16B %2:gpr32common = COPY $w0
32B %5:gpr32 = SUBSWri %2:gpr32common, 1, 0, implicit-def $nzcv
48B Bcc 10, %bb.2, implicit killed $nzcv
64B bb.1:
; predecessors: %bb.0
successors: %bb.3(0x80000000); %bb.3(100.00%)
80B %4:gpr32all = COPY $wzr
96B %3:gpr32all = COPY %4:gpr32all
112B %12:gpr32all = COPY %3:gpr32all
128B B %bb.3
144B bb.2 (%ir-block.3):
; predecessors: %bb.0
successors: %bb.3(0x80000000); %bb.3(100.00%)
160B %7:gpr32common = SUBWri %2:gpr32common, 2, 0
176B %8:gpr64 = UMADDLrrr %5:gpr32, %7:gpr32common, $xzr
192B %9:gpr64 = UBFMXri %8:gpr64, 1, 63
208B %10:gpr32 = COPY %9.sub_32:gpr64
224B %11:gpr32 = ADDWrr %5:gpr32, %10:gpr32
240B %0:gpr32all = COPY %11:gpr32
256B %12:gpr32all = COPY %0:gpr32all
272B bb.3 (%ir-block.13):
; predecessors: %bb.2, %bb.1
288B %1:gpr32all = COPY %12:gpr32all
304B $w0 = COPY %1:gpr32all
320B RET_ReallyLR implicit $w0
# End machine code for function _Z12VNIInfoLearni.
Cur SlotIndex = 96B, MI = %3:gpr32all = COPY %4:gpr32all
%4:gpr32all def SlotIndex = 80r
%4:gpr32all def MI = %4:gpr32all = COPY $wzr
// %3在112B处被使用,这里定义的%12是bb.3中原Phi值(Phi消除后对应vreg %12)的第一个Source值的定义点,是常量0值
Cur SlotIndex = 112B, MI = %12:gpr32all = COPY %3:gpr32all
%3:gpr32all def SlotIndex = 96r
%3:gpr32all def MI = %3:gpr32all = COPY %4:gpr32all
Cur SlotIndex = 208B, MI = %10:gpr32 = COPY %9.sub_32:gpr64
%9.sub_32:gpr64 def SlotIndex = 192r
%9.sub_32:gpr64 def MI = %9:gpr64 = UBFMXri %8:gpr64, 1, 63
Cur SlotIndex = 240B, MI = %0:gpr32all = COPY %11:gpr32
%11:gpr32 def SlotIndex = 224r
%11:gpr32 def MI = %11:gpr32 = ADDWrr %5:gpr32, %10:gpr32
// %0在256B处被使用,这里定义的%12是bb.3中原Phi值的第二个Source值的定义点
Cur SlotIndex = 256B, MI = %12:gpr32all = COPY %0:gpr32all
%0:gpr32all def SlotIndex = 240r
%0:gpr32all def MI = %0:gpr32all = COPY %11:gpr32
// %12在288B处被使用,这里定义的%1是bb.3中的Phi值,可以看到这里%12的定义点
// 对应的SlotIndex是272B,而不是272r。
Cur SlotIndex = 288B, MI = %1:gpr32all = COPY %12:gpr32all
%12:gpr32all def SlotIndex = 272B
Cur SlotIndex = 304B, MI = $w0 = COPY %1:gpr32all
%1:gpr32all def SlotIndex = 288r
%1:gpr32all def MI = %1:gpr32all = COPY %12:gpr32all
对于查询结果LiveQueryResult还有不少有用的接口,使用方法都是类似的,读者可以通过上述的简单学习验证方法快速掌握,
这里就不再赘述了。