赞
踩
相关
《Postgresql源码(128)深入分析JIT中的函数内联llvm_inline》
《LLVM的ThinLTO编译优化技术在Postgresql中的应用》
前置阅读:《Postgresql源码(128)深入分析JIT中的函数内联llvm_inline》
在JIT inline函数的过程中,会通过函数的bc代码,经过一系列规则、成本的判断来决定函数能否Inline,本篇重点分析这段逻辑:function_inlinable。
总结速查:
{函数名,搜索路径}
,包括本次表达式计算的函数 和 在function_inlinable函数内部检查的过程中,函数调用的其他函数。function_inlinable会做dfs搜索所有调用到的函数,关心函数的指令数、里面用到的全局变量的个数。
function_inlinable(...)
{
...
__attribute__((weak))
,不会Inline。 if (F.isInterposable())
return false;
if (F.hasAvailableExternallyLinkage())
return false;
if (F.materialize())
elog(FATAL, "failed to materialize metadata");
if (F.getAttributes().hasFnAttr(llvm::Attribute::NoInline))
{
ilog(DEBUG1, "ineligibile to import %s due to noinline",
F.getName().data());
return false;
}
function_references(F, running_instcount, referencedVars, referencedFunctions);
add i32 %a,%b
的指令。%1 = add i32 %a, %b
是Instruction,同时也是一个User,抽象理解就是拥有操作数的一切对象都是User。定义:
; Function Attrs: nounwind uwtable define dso_local i64 @dexp(ptr nocapture noundef readonly %0) local_unnamed_addr #6 { %2 = getelementptr inbounds %struct.FunctionCallInfoBaseData, ptr %0, i64 0, i32 6, i64 0, i32 0 %3 = bitcast ptr %2 to ptr %4 = load double, ptr %3, align 8 %5 = fcmp uno double %4, 0.000000e+00 br i1 %5, label %28, label %6 6: ; preds = %1 %7 = tail call double @llvm.fabs.f64(double %4) #22 %8 = fcmp oeq double %7, 0x7FF0000000000000 br i1 %8, label %9, label %12 9: ; preds = %6 %10 = fcmp ogt double %4, 0.000000e+00 %11 = select i1 %10, double %4, double 0.000000e+00 br label %28 12: ; preds = %6 %13 = tail call ptr @__errno_location() #23 store i32 0, ptr %13, align 4 %14 = tail call double @exp(double noundef %4) #20 %15 = load i32, ptr %13, align 4 %16 = icmp eq i32 %15, 34 br i1 %16, label %17, label %21, !prof !11 17: ; preds = %12 %18 = fcmp une double %14, 0.000000e+00 br i1 %18, label %19, label %20 19: ; preds = %17 tail call void @float_overflow_error() #24 unreachable 20: ; preds = %17 tail call void @float_underflow_error() #24 unreachable 21: ; preds = %12 %22 = tail call double @llvm.fabs.f64(double %14) #22 %23 = fcmp oeq double %22, 0x7FF0000000000000 br i1 %23, label %24, label %25, !prof !11 24: ; preds = %21 tail call void @float_overflow_error() #24 unreachable 25: ; preds = %21 %26 = fcmp oeq double %14, 0.000000e+00 br i1 %26, label %27, label %28, !prof !11 27: ; preds = %25 tail call void @float_underflow_error() #24 unreachable 28: ; preds = %25, %9, %1 %29 = phi double [ %11, %9 ], [ %14, %25 ], [ %4, %1 ] %30 = bitcast double %29 to i64 ret i64 %30 }
static void
function_references(llvm::Function &F,
int &running_instcount,
llvm::SmallPtrSet<llvm::GlobalVariable *, 8> &referencedVars,
llvm::SmallPtrSet<llvm::Function *, 8> &referencedFunctions)
{
llvm::SmallPtrSet<const llvm::User *, 32> Visited;
for (llvm::BasicBlock &BB : F)
{
for (llvm::Instruction &I : BB)
{
if (llvm::isa<llvm::DbgInfoIntrinsic>(I))
continue;
llvm::SmallVector<llvm::User *, 8> Worklist;
Worklist.push_back(&I);
running_instcount++;
while (!Worklist.empty()) {
llvm::User *U = Worklist.pop_back_val();
if (!Visited.insert(U).second)
continue;
for (auto &OI : U->operands()) {
llvm::User *Operand = llvm::dyn_cast<llvm::User>(OI);
if (!Operand)
continue;
if (llvm::isa<llvm::BlockAddress>(Operand))
continue;
if (auto *GV = llvm::dyn_cast<llvm::GlobalVariable>(Operand)) {
referencedVars.insert(GV);
if (GV->hasInitializer())
Worklist.push_back(GV->getInitializer());
continue;
}
if (auto *CF = llvm::dyn_cast<llvm::Function>(Operand)) {
referencedFunctions.insert(CF);
continue;
}
Worklist.push_back(Operand);
}
}
}
}
}
执行结束后:
dexp函数的IR分两部分:函数摘要和函数定义(index文件就是收集了bc文件中的函数摘要)
摘要:
^62 = gv: (name: "dexp", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 35, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 1, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^302), (callee: ^157), (callee: ^277), (callee: ^54))))) ; guid = 3352526880228194314
定义
$ cat float.ll | grep -A 58 '@dexp' define dso_local i64 @dexp(ptr nocapture noundef readonly %0) local_unnamed_addr #6 { %2 = getelementptr inbounds %struct.FunctionCallInfoBaseData, ptr %0, i64 0, i32 6, i64 0, i32 0 %3 = bitcast ptr %2 to ptr %4 = load double, ptr %3, align 8 %5 = fcmp uno double %4, 0.000000e+00 br i1 %5, label %28, label %6 6: ; preds = %1 %7 = tail call double @llvm.fabs.f64(double %4) #22 %8 = fcmp oeq double %7, 0x7FF0000000000000 br i1 %8, label %9, label %12 9: ; preds = %6 %10 = fcmp ogt double %4, 0.000000e+00 %11 = select i1 %10, double %4, double 0.000000e+00 br label %28 12: ; preds = %6 %13 = tail call ptr @__errno_location() #23 store i32 0, ptr %13, align 4 %14 = tail call double @exp(double noundef %4) #20 %15 = load i32, ptr %13, align 4 %16 = icmp eq i32 %15, 34 br i1 %16, label %17, label %21, !prof !11 17: ; preds = %12 %18 = fcmp une double %14, 0.000000e+00 br i1 %18, label %19, label %20 19: ; preds = %17 tail call void @float_overflow_error() #24 unreachable 20: ; preds = %17 tail call void @float_underflow_error() #24 unreachable 21: ; preds = %12 %22 = tail call double @llvm.fabs.f64(double %14) #22 %23 = fcmp oeq double %22, 0x7FF0000000000000 br i1 %23, label %24, label %25, !prof !11 24: ; preds = %21 tail call void @float_overflow_error() #24 unreachable 25: ; preds = %21 %26 = fcmp oeq double %14, 0.000000e+00 br i1 %26, label %27, label %28, !prof !11 27: ; preds = %25 tail call void @float_underflow_error() #24 unreachable 28: ; preds = %25, %9, %1 %29 = phi double [ %11, %9 ], [ %14, %25 ], [ %4, %1 ] %30 = bitcast double %29 to i64 ret i64 %30 }
和function_references计算结果一致。
for (llvm::GlobalVariable* rv: referencedVars)
{
...
importVars.insert(rv->getName());
/* small cost attributed to each cloned global */
running_instcount += 5;
}
visitedFunctions.insert(&F);
llvm.fabs.f64
__errno_location
exp
float_overflow_error
float_underflow_error
for (llvm::Function* referencedFunction: referencedFunctions)
{
llvm::StringSet<> recImportVars;
if (referencedFunction->materialize())
elog(FATAL, "failed to materialize metadata");
if (referencedFunction->isIntrinsic())
continue;
if (!visitedFunctions.insert(referencedFunction).second)
continue;
__errno_location
函数就在glibc中。 if (referencedFunction->hasExternalLinkage())
{
llvm::StringRef funcName = referencedFunction->getName();
/*
* Don't bother checking for inlining if remaining cost budget is
* very small.
*/
if (subThreshold < 5)
continue;
auto it = functionStates.find(funcName);
if (it == functionStates.end())
{
FunctionInlineState inlineState; inlineState.costLimit = subThreshold; inlineState.processed = false; inlineState.inlined = false; inlineState.allowReconsidering = false; functionStates[funcName] = inlineState; worklist.push_back({funcName, searchpath}); ilog(DEBUG1, "considering extern function %s at %d for inlining", funcName.data(), subThreshold); } ...
__attribute__((weak))
,排除。 if (referencedFunction->isInterposable())
return false;
if (!function_inlinable(*referencedFunction, subThreshold, functionStates, worklist, searchpath, visitedFunctions, running_instcount, recImportVars)) { return false; } /* import referenced function itself */ importVars.insert(referencedFunction->getName()); /* import referenced function and its dependents */ for (auto& recImportVar : recImportVars) importVars.insert(recImportVar.first()); }
经过function_inlinable的递归调用,dfs所有会调用到的函数,最终:
{函数名字,搜索路径}
在worklist中。返回true表示当前函数可以inline。
return true;
}
怎么拿到函数的guid:funcGUID = llvm::GlobalValue::getGUID(cfuncname);
(GUID是用函数名MD5 hash出来的)
funcGUID = 3352526880228194314
index文件中查看函数属性:
^12463 = gv: (guid: 3352526880228194314, summaries: (function: (module: ^604, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 79, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 1, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^6190), (callee: ^59633), (callee: ^10786), (callee: ^32543)))))
这里函数被标记了noInline: 1,所以该函数不会被inline。
但是dexp为什么不能被inline呢?看起来函数不长,分支也不多,也没有标记__attribute__((noinline))
。
Datum dexp(PG_FUNCTION_ARGS) { float8 arg1 = PG_GETARG_FLOAT8(0); float8 result; if (isnan(arg1)) result = arg1; else if (isinf(arg1)) { /* Per POSIX, exp(-Inf) is 0 */ result = (arg1 > 0.0) ? arg1 : 0; } else { errno = 0; result = exp(arg1); if (unlikely(errno == ERANGE)) { if (result != 0.0) float_overflow_error(); else float_underflow_error(); } else if (unlikely(isinf(result))) float_overflow_error(); else if (unlikely(result == 0.0)) float_underflow_error(); } PG_RETURN_FLOAT8(result); }
原因是这里llvm是按O2编译的,按O0编译后noInline: 0
^10363 = gv: (guid: 3352526880228194314, summaries: (function: (module: ^604, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 35, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 1, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^49065), (callee: ^8990)))))
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。