LDC LLVM profiling instrumentation

From D Wiki
Revision as of 21:04, 21 November 2015 by JohanEngelen (talk | contribs) (Example with Clang 3.7)
Jump to: navigation, search

This page functions as a place to collect information and to document how the implementation in LDC is done.

LLVM InstrProf

[describe what and how LLVM supports profile instrumentation]

LLVM intrinsics

These intrinsics were introduced with LLVM commit r223672, and Clang was modified to use them in Clang commit r223683.

LLVM features two intrinsics for instrumentation:

Example with Clang 3.7

The sequence used to build a program with PGO using clang is:

 clang -fprofile-instr-generate pgo.c -o pgo
 ./pgo
 llvm-profdata merge -output=pgo.profdata default.profraw
 clang -fprofile-instr-use=pgo.profdata -S -emit-llvm pgo.c -o pgo0.ll
 ./pgo 1
 llvm-profdata merge -output=pgo.profdata default.profraw
 clang -fprofile-instr-use=pgo.profdata -S -emit-llvm pgo.c pgo1.ll

Perhaps it'd be nice for LDC if the llvm-profdata tool is not needed. OTOH, when the profiling data is in a standard LLVM format, all of LLVM's tools can be used to interpret the data.

pgo.c

 int main(int argc, const char *argv[]) {
   if (argc)
     return 0;
   else
     return 1;
 }

LLVM IR with clang -fprofile-instr-generate

Unimportant IR has been stripped.

 @__llvm_profile_name_main = private constant [4 x i8] c"main", section "__DATA,__llvm_prf_names", align 1
 @__llvm_profile_counters_main = private global [2 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
 @__llvm_profile_data_main = private constant { i32, i32, i64, i8*, i64* } { i32 4, i32 2, i64 10, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__llvm_profile_name_main, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_profile_counters_main, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
 @__llvm_profile_runtime = external global i32
 ; Function Attrs: nounwind ssp uwtable
 define i32 @main(i32 %argc, i8** %argv) #0 {
 entry:
   %retval = alloca i32, align 4
   %argc.addr = alloca i32, align 4
   %argv.addr = alloca i8**, align 8
   store i32 0, i32* %retval
   store i32 %argc, i32* %argc.addr, align 4
   store i8** %argv, i8*** %argv.addr, align 8
   %pgocount = load i64, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_profile_counters_main, i64 0, i64 0)
   %0 = add i64 %pgocount, 1
   store i64 %0, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_profile_counters_main, i64 0, i64 0)
   %1 = load i32, i32* %argc.addr, align 4
   %cmp = icmp sgt i32 %1, 1
   br i1 %cmp, label %if.then, label %if.else
 if.then:                                          ; preds = %entry
   %pgocount1 = load i64, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_profile_counters_main, i64 0, i64 1)
   %2 = add i64 %pgocount1, 1
   store i64 %2, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__llvm_profile_counters_main, i64 0, i64 1)
   store i32 0, i32* %retval
   br label %return
 if.else:                                          ; preds = %entry
   store i32 1, i32* %retval
   br label %return
 return:                                           ; preds = %if.else, %if.then
   %3 = load i32, i32* %retval
   ret i32 %3
 }
 ; Function Attrs: nounwind
 declare void @llvm.instrprof.increment(i8*, i64, i32, i32) #1
 ; Function Attrs: noinline
 define linkonce_odr hidden i32 @__llvm_profile_runtime_user() #2 {
   %1 = load i32, i32* @__llvm_profile_runtime
   ret i32 %1
 }

pgo0.ll, LLVM IR after clang -fprofile-instr-use

 ; Function Attrs: inlinehint nounwind ssp uwtable
 define i32 @main(i32 %argc, i8** %argv) #0 !prof !2 {
 entry:
   %retval = alloca i32, align 4
   %argc.addr = alloca i32, align 4
   %argv.addr = alloca i8**, align 8
   store i32 0, i32* %retval
   store i32 %argc, i32* %argc.addr, align 4
   store i8** %argv, i8*** %argv.addr, align 8
   %0 = load i32, i32* %argc.addr, align 4
   %cmp = icmp sgt i32 %0, 1
   br i1 %cmp, label %if.then, label %if.else, !prof !3
 if.then:                                          ; preds = %entry
   store i32 0, i32* %retval
   br label %return
 if.else:                                          ; preds = %entry
   store i32 1, i32* %retval
   br label %return
 return:                                           ; preds = %if.else, %if.then
   %1 = load i32, i32* %retval
   ret i32 %1
 }
 !2 = !{!"function_entry_count", i64 1}
 !3 = !{!"branch_weights", i32 1, i32 2}


pgo1.ll, LLVM IR after clang -fprofile-instr-use

The file is identical to pgo0.ll except the last line:

 !3 = !{!"branch_weights", i32 2, i32 1}