Difference between revisions of "LDC LLVM profiling instrumentation"
JohanEngelen (talk | contribs) |
JohanEngelen (talk | contribs) (→Example with Clang 3.7) |
||
Line 16: | Line 16: | ||
= Example with Clang 3.7 = | = Example with Clang 3.7 = | ||
− | == | + | The sequence used to [http://clang.llvm.org/docs/UsersManual.html#profiling-with-instrumentation build a program with PGO using clang is]: |
+ | clang -fprofile-instr-generate pgo.c -o pgo | ||
+ | ./pgo | ||
+ | llvm-profdata merge -output=pgo.profdata default.profraw | ||
+ | clang -fprofile-instr-use=pgo.profdata -S -emit-llvm pgo.c -o pgo0.ll | ||
+ | ./pgo 1 | ||
+ | llvm-profdata merge -output=pgo.profdata default.profraw | ||
+ | clang -fprofile-instr-use=pgo.profdata -S -emit-llvm pgo.c pgo1.ll | ||
+ | |||
+ | Perhaps it'd be nice for LDC if the llvm-profdata tool is not needed. OTOH, when the profiling data is in a standard LLVM format, all of LLVM's tools can be used to interpret the data. | ||
+ | |||
+ | == pgo.c == | ||
int main(int argc, const char *argv[]) { | int main(int argc, const char *argv[]) { | ||
Line 47: | Line 58: | ||
store i64 %0, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_profile_counters_main, i64 0, i64 0) | store i64 %0, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_profile_counters_main, i64 0, i64 0) | ||
%1 = load i32, i32* %argc.addr, align 4 | %1 = load i32, i32* %argc.addr, align 4 | ||
− | % | + | %cmp = icmp sgt i32 %1, 1 |
− | br i1 % | + | br i1 %cmp, label %if.then, label %if.else |
if.then: ; preds = %entry | if.then: ; preds = %entry | ||
%pgocount1 = load i64, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_profile_counters_main, i64 0, i64 1) | %pgocount1 = load i64, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_profile_counters_main, i64 0, i64 1) | ||
Line 72: | Line 83: | ||
} | } | ||
− | + | == pgo0.ll, LLVM IR after clang -fprofile-instr-use == | |
− | + | ||
− | + | ; Function Attrs: inlinehint nounwind ssp uwtable | |
+ | define i32 @main(i32 %argc, i8** %argv) #0 !prof !2 { | ||
+ | entry: | ||
+ | %retval = alloca i32, align 4 | ||
+ | %argc.addr = alloca i32, align 4 | ||
+ | %argv.addr = alloca i8**, align 8 | ||
+ | store i32 0, i32* %retval | ||
+ | store i32 %argc, i32* %argc.addr, align 4 | ||
+ | store i8** %argv, i8*** %argv.addr, align 8 | ||
+ | %0 = load i32, i32* %argc.addr, align 4 | ||
+ | %cmp = icmp sgt i32 %0, 1 | ||
+ | br i1 %cmp, label %if.then, label %if.else, !prof !3 | ||
+ | if.then: ; preds = %entry | ||
+ | store i32 0, i32* %retval | ||
+ | br label %return | ||
+ | if.else: ; preds = %entry | ||
+ | store i32 1, i32* %retval | ||
+ | br label %return | ||
+ | return: ; preds = %if.else, %if.then | ||
+ | %1 = load i32, i32* %retval | ||
+ | ret i32 %1 | ||
+ | } | ||
+ | |||
+ | !2 = !{!"function_entry_count", i64 1} | ||
+ | !3 = !{!"branch_weights", i32 1, i32 2} | ||
+ | |||
+ | |||
+ | == pgo1.ll, LLVM IR after clang -fprofile-instr-use == | ||
+ | |||
+ | The file is identical to pgo0.ll except the last line: | ||
+ | !3 = !{!"branch_weights", i32 2, i32 1} |
Revision as of 21:04, 21 November 2015
This page functions as a place to collect information and to document how the implementation in LDC is done.
Contents
LLVM InstrProf
[describe what and how LLVM supports profile instrumentation]
LLVM intrinsics
These intrinsics were introduced with LLVM commit r223672, and Clang was modified to use them in Clang commit r223683.
LLVM features two intrinsics for instrumentation:
Example with Clang 3.7
The sequence used to build a program with PGO using clang is:
clang -fprofile-instr-generate pgo.c -o pgo ./pgo llvm-profdata merge -output=pgo.profdata default.profraw clang -fprofile-instr-use=pgo.profdata -S -emit-llvm pgo.c -o pgo0.ll ./pgo 1 llvm-profdata merge -output=pgo.profdata default.profraw clang -fprofile-instr-use=pgo.profdata -S -emit-llvm pgo.c pgo1.ll
Perhaps it'd be nice for LDC if the llvm-profdata tool is not needed. OTOH, when the profiling data is in a standard LLVM format, all of LLVM's tools can be used to interpret the data.
pgo.c
int main(int argc, const char *argv[]) { if (argc) return 0; else return 1; }
LLVM IR with clang -fprofile-instr-generate
Unimportant IR has been stripped.
@__llvm_profile_name_main = private constant [4 x i8] c"main", section "__DATA,__llvm_prf_names", align 1 @__llvm_profile_counters_main = private global [2 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8 @__llvm_profile_data_main = private constant { i32, i32, i64, i8*, i64* } { i32 4, i32 2, i64 10, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__llvm_profile_name_main, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_profile_counters_main, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8 @__llvm_profile_runtime = external global i32
; Function Attrs: nounwind ssp uwtable define i32 @main(i32 %argc, i8** %argv) #0 { entry: %retval = alloca i32, align 4 %argc.addr = alloca i32, align 4 %argv.addr = alloca i8**, align 8 store i32 0, i32* %retval store i32 %argc, i32* %argc.addr, align 4 store i8** %argv, i8*** %argv.addr, align 8 %pgocount = load i64, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_profile_counters_main, i64 0, i64 0) %0 = add i64 %pgocount, 1 store i64 %0, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_profile_counters_main, i64 0, i64 0) %1 = load i32, i32* %argc.addr, align 4 %cmp = icmp sgt i32 %1, 1 br i1 %cmp, label %if.then, label %if.else if.then: ; preds = %entry %pgocount1 = load i64, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_profile_counters_main, i64 0, i64 1) %2 = add i64 %pgocount1, 1 store i64 %2, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_profile_counters_main, i64 0, i64 1) store i32 0, i32* %retval br label %return if.else: ; preds = %entry store i32 1, i32* %retval br label %return return: ; preds = %if.else, %if.then %3 = load i32, i32* %retval ret i32 %3 }
; Function Attrs: nounwind declare void @llvm.instrprof.increment(i8*, i64, i32, i32) #1
; Function Attrs: noinline define linkonce_odr hidden i32 @__llvm_profile_runtime_user() #2 { %1 = load i32, i32* @__llvm_profile_runtime ret i32 %1 }
pgo0.ll, LLVM IR after clang -fprofile-instr-use
; Function Attrs: inlinehint nounwind ssp uwtable define i32 @main(i32 %argc, i8** %argv) #0 !prof !2 { entry: %retval = alloca i32, align 4 %argc.addr = alloca i32, align 4 %argv.addr = alloca i8**, align 8 store i32 0, i32* %retval store i32 %argc, i32* %argc.addr, align 4 store i8** %argv, i8*** %argv.addr, align 8 %0 = load i32, i32* %argc.addr, align 4 %cmp = icmp sgt i32 %0, 1 br i1 %cmp, label %if.then, label %if.else, !prof !3 if.then: ; preds = %entry store i32 0, i32* %retval br label %return if.else: ; preds = %entry store i32 1, i32* %retval br label %return return: ; preds = %if.else, %if.then %1 = load i32, i32* %retval ret i32 %1 }
!2 = !{!"function_entry_count", i64 1} !3 = !{!"branch_weights", i32 1, i32 2}
pgo1.ll, LLVM IR after clang -fprofile-instr-use
The file is identical to pgo0.ll except the last line:
!3 = !{!"branch_weights", i32 2, i32 1}