Screenshot

Screenshot

I’ve now got both line debugging (break, next, continue) working, and variable display (and modification) debugging now working for my toy language and compiler.

Here’s an example program:

BOOL i1;
i1 = TRUE;
PRINT i1;

INT8 i8;
i8 = 10;
PRINT i8;

INT16 i16;
i16 = 1000;
PRINT i16;

INT32 i32;
i32 = 100000;
PRINT i32;

INT64 i64;
i64 = 100000000000;
PRINT i64;

FLOAT32 f32;
f32 = 1.1;
PRINT f32;

FLOAT64 f64;
f64 = 2.2E-1;
PRINT f64;

It doesn’t do anything interesting, other than demonstrate that I got the DILocalVariableAttr declarations right for each supported type. Here’s the MLIR for this program:

"builtin.module"() ({
  "toy.program"() ({
    "toy.declare"() <{name = "i1", type = i1}> : () -> () loc(#loc)
    %0 = "arith.constant"() <{value = true}> : () -> i1 loc(#loc1)
    "toy.assign"(%0) <{name = "i1"}> : (i1) -> () loc(#loc1)
    %1 = "toy.load"() <{name = "i1"}> : () -> i1 loc(#loc2)
    "toy.print"(%1) : (i1) -> () loc(#loc2)
    "toy.declare"() <{name = "i8", type = i8}> : () -> () loc(#loc3)
    %2 = "arith.constant"() <{value = 10 : i64}> : () -> i64 loc(#loc4)
    "toy.assign"(%2) <{name = "i8"}> : (i64) -> () loc(#loc4)
    %3 = "toy.load"() <{name = "i8"}> : () -> i8 loc(#loc5)
    "toy.print"(%3) : (i8) -> () loc(#loc5)
    "toy.declare"() <{name = "i16", type = i16}> : () -> () loc(#loc6)
    %4 = "arith.constant"() <{value = 1000 : i64}> : () -> i64 loc(#loc7)
    "toy.assign"(%4) <{name = "i16"}> : (i64) -> () loc(#loc7)
    %5 = "toy.load"() <{name = "i16"}> : () -> i16 loc(#loc8)
    "toy.print"(%5) : (i16) -> () loc(#loc8)
    "toy.declare"() <{name = "i32", type = i32}> : () -> () loc(#loc9)
    %6 = "arith.constant"() <{value = 100000 : i64}> : () -> i64 loc(#loc10)
    "toy.assign"(%6) <{name = "i32"}> : (i64) -> () loc(#loc10)
    %7 = "toy.load"() <{name = "i32"}> : () -> i32 loc(#loc11)
    "toy.print"(%7) : (i32) -> () loc(#loc11)
    "toy.declare"() <{name = "i64", type = i64}> : () -> () loc(#loc12)
    %8 = "arith.constant"() <{value = 100000000000 : i64}> : () -> i64 loc(#loc13)
    "toy.assign"(%8) <{name = "i64"}> : (i64) -> () loc(#loc13)
    %9 = "toy.load"() <{name = "i64"}> : () -> i64 loc(#loc14)
    "toy.print"(%9) : (i64) -> () loc(#loc14)
    "toy.declare"() <{name = "f32", type = f32}> : () -> () loc(#loc15)
    %10 = "arith.constant"() <{value = 1.100000e+00 : f64}> : () -> f64 loc(#loc16)
    "toy.assign"(%10) <{name = "f32"}> : (f64) -> () loc(#loc16)
    %11 = "toy.load"() <{name = "f32"}> : () -> f32 loc(#loc17)
    "toy.print"(%11) : (f32) -> () loc(#loc17)
    "toy.declare"() <{name = "f64", type = f64}> : () -> () loc(#loc18)
    %12 = "arith.constant"() <{value = 2.200000e-01 : f64}> : () -> f64 loc(#loc19)
    "toy.assign"(%12) <{name = "f64"}> : (f64) -> () loc(#loc19)
    %13 = "toy.load"() <{name = "f64"}> : () -> f64 loc(#loc20)
    "toy.print"(%13) : (f64) -> () loc(#loc20)
    "toy.exit"() : () -> () loc(#loc)
  }) : () -> () loc(#loc)
}) : () -> () loc(#loc)
#loc = loc("types.toy":1:1)
#loc1 = loc("types.toy":2:6)
#loc2 = loc("types.toy":3:1)
#loc3 = loc("types.toy":5:1)
#loc4 = loc("types.toy":6:6)
#loc5 = loc("types.toy":7:1)
#loc6 = loc("types.toy":9:1)
#loc7 = loc("types.toy":10:7)
#loc8 = loc("types.toy":11:1)
#loc9 = loc("types.toy":13:1)
#loc10 = loc("types.toy":14:7)
#loc11 = loc("types.toy":15:1)
#loc12 = loc("types.toy":17:1)
#loc13 = loc("types.toy":18:7)
#loc14 = loc("types.toy":19:1)
#loc15 = loc("types.toy":21:1)
#loc16 = loc("types.toy":22:7)
#loc17 = loc("types.toy":23:1)
#loc18 = loc("types.toy":25:1)
#loc19 = loc("types.toy":26:7)
#loc20 = loc("types.toy":27:1)

and the generated LLVM-IR

; ModuleID = 'types.toy'
source_filename = "types.toy"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

declare void @__toy_print_f64(double)

declare void @__toy_print_i64(i64)

define i32 @main() !dbg !4 {
  %1 = alloca i1, i64 1, align 1, !dbg !8
    #dbg_declare(ptr %1, !9, !DIExpression(), !8)
  store i1 true, ptr %1, align 1, !dbg !11
  %2 = load i1, ptr %1, align 1, !dbg !12
  %3 = zext i1 %2 to i64, !dbg !12
  call void @__toy_print_i64(i64 %3), !dbg !12
  %4 = alloca i8, i64 1, align 1, !dbg !13
    #dbg_declare(ptr %4, !14, !DIExpression(), !13)
  store i8 10, ptr %4, align 1, !dbg !16
  %5 = load i8, ptr %4, align 1, !dbg !17
  %6 = sext i8 %5 to i64, !dbg !17
  call void @__toy_print_i64(i64 %6), !dbg !17
  %7 = alloca i16, i64 1, align 2, !dbg !18
    #dbg_declare(ptr %7, !19, !DIExpression(), !18)
  store i16 1000, ptr %7, align 2, !dbg !21
  %8 = load i16, ptr %7, align 2, !dbg !22
  %9 = sext i16 %8 to i64, !dbg !22
  call void @__toy_print_i64(i64 %9), !dbg !22
  %10 = alloca i32, i64 1, align 4, !dbg !23
    #dbg_declare(ptr %10, !24, !DIExpression(), !23)
  store i32 100000, ptr %10, align 4, !dbg !26
  %11 = load i32, ptr %10, align 4, !dbg !27
  %12 = sext i32 %11 to i64, !dbg !27
  call void @__toy_print_i64(i64 %12), !dbg !27
  %13 = alloca i64, i64 1, align 8, !dbg !28
    #dbg_declare(ptr %13, !29, !DIExpression(), !28)
  store i64 100000000000, ptr %13, align 8, !dbg !31
  %14 = load i64, ptr %13, align 8, !dbg !32
  call void @__toy_print_i64(i64 %14), !dbg !32
  %15 = alloca float, i64 1, align 4, !dbg !33
    #dbg_declare(ptr %15, !34, !DIExpression(), !33)
  store float 0x3FF19999A0000000, ptr %15, align 4, !dbg !36
  %16 = load float, ptr %15, align 4, !dbg !37
  %17 = fpext float %16 to double, !dbg !37
  call void @__toy_print_f64(double %17), !dbg !37
  %18 = alloca double, i64 1, align 8, !dbg !38
    #dbg_declare(ptr %18, !39, !DIExpression(), !38)
  store double 2.200000e-01, ptr %18, align 8, !dbg !41
  %19 = load double, ptr %18, align 8, !dbg !42
  call void @__toy_print_f64(double %19), !dbg !42
  ret i32 0, !dbg !8
}

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare void @llvm.dbg.declare(metadata, metadata, metadata) #0

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

!llvm.module.flags = !{!0}
!llvm.dbg.cu = !{!1}
!llvm.ident = !{!3}

!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = distinct !DICompileUnit(language: DW_LANG_C, file: !2, producer: "toycalculator", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
!2 = !DIFile(filename: "types.toy", directory: ".")
!3 = !{!"toycalculator V2"}
!4 = distinct !DISubprogram(name: "main", linkageName: "main", scope: !2, file: !2, line: 1, type: !5, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !1)
!5 = !DISubroutineType(types: !6)
!6 = !{!7}
!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!8 = !DILocation(line: 1, column: 1, scope: !4)
!9 = !DILocalVariable(name: "i1", scope: !4, file: !2, line: 1, type: !10, align: 8)
!10 = !DIBasicType(name: "bool", size: 8, encoding: DW_ATE_boolean)
!11 = !DILocation(line: 2, column: 6, scope: !4)
!12 = !DILocation(line: 3, column: 1, scope: !4)
!13 = !DILocation(line: 5, column: 1, scope: !4)
!14 = !DILocalVariable(name: "i8", scope: !4, file: !2, line: 5, type: !15, align: 8)
!15 = !DIBasicType(name: "int8_t", size: 8, encoding: DW_ATE_signed)
!16 = !DILocation(line: 6, column: 6, scope: !4)
!17 = !DILocation(line: 7, column: 1, scope: !4)
!18 = !DILocation(line: 9, column: 1, scope: !4)
!19 = !DILocalVariable(name: "i16", scope: !4, file: !2, line: 9, type: !20, align: 16)
!20 = !DIBasicType(name: "int16_t", size: 16, encoding: DW_ATE_signed)
!21 = !DILocation(line: 10, column: 7, scope: !4)
!22 = !DILocation(line: 11, column: 1, scope: !4)
!23 = !DILocation(line: 13, column: 1, scope: !4)
!24 = !DILocalVariable(name: "i32", scope: !4, file: !2, line: 13, type: !25, align: 32)
!25 = !DIBasicType(name: "int32_t", size: 32, encoding: DW_ATE_signed)
!26 = !DILocation(line: 14, column: 7, scope: !4)
!27 = !DILocation(line: 15, column: 1, scope: !4)
!28 = !DILocation(line: 17, column: 1, scope: !4)
!29 = !DILocalVariable(name: "i64", scope: !4, file: !2, line: 17, type: !30, align: 64)
!30 = !DIBasicType(name: "int64_t", size: 64, encoding: DW_ATE_signed)
!31 = !DILocation(line: 18, column: 7, scope: !4)
!32 = !DILocation(line: 19, column: 1, scope: !4)
!33 = !DILocation(line: 21, column: 1, scope: !4)
!34 = !DILocalVariable(name: "f32", scope: !4, file: !2, line: 21, type: !35, align: 32)
!35 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
!36 = !DILocation(line: 22, column: 7, scope: !4)
!37 = !DILocation(line: 23, column: 1, scope: !4)
!38 = !DILocation(line: 25, column: 1, scope: !4)
!39 = !DILocalVariable(name: "f64", scope: !4, file: !2, line: 25, type: !40, align: 64)
!40 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float)
!41 = !DILocation(line: 26, column: 7, scope: !4)
!42 = !DILocation(line: 27, column: 1, scope: !4)

Interesting bits include:

source_filename = "types.toy"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

!llvm.module.flags = !{!0}
!llvm.dbg.cu = !{!1}
!llvm.ident = !{!3}

!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = distinct !DICompileUnit(language: DW_LANG_C, file: !2, producer: "toycalculator", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
!2 = !DIFile(filename: "types.toy", directory: ".")
!3 = !{!"toycalculator V2"}
!4 = distinct !DISubprogram(name: "main", linkageName: "main", scope: !2, file: !2, line: 1, type: !5, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !1)
!5 = !DISubroutineType(types: !6)
!6 = !{!7}

Unlike flang’s AddDebugInfoPass DI instrumentation pass, I didn’t try to do anything fancy, instead just implemented a couple of helper functions.

One for the target triple:

void setModuleAttrs()
{
    std::string targetTriple = llvm::sys::getDefaultTargetTriple();
    llvm::Triple triple( targetTriple );
    assert( triple.isArch64Bit() &amp;amp;amp;amp;&amp;amp;amp;amp; triple.isOSLinux() );

    std::string error;
    const llvm::Target* target = llvm::TargetRegistry::lookupTarget( targetTriple, error );
    assert( target );
    llvm::TargetOptions options;
    auto targetMachine = std::unique_ptr&amp;amp;amp;lt;llvm::TargetMachine&amp;amp;amp;gt;( target-&amp;amp;amp;gt;createTargetMachine(
        targetTriple, "generic", "", options, std::optional&amp;amp;amp;lt;llvm::Reloc::Model&amp;amp;amp;gt;( llvm::Reloc::PIC_ ) ) );
    assert( targetMachine );
    std::string dataLayoutStr = targetMachine-&amp;amp;amp;gt;createDataLayout().getStringRepresentation();

    module-&amp;amp;amp;gt;setAttr( "llvm.ident", builder.getStringAttr( COMPILER_NAME COMPILER_VERSION ) );
    module-&amp;amp;amp;gt;setAttr( "llvm.data_layout", builder.getStringAttr( dataLayoutStr ) );
    module-&amp;amp;amp;gt;setAttr( "llvm.target_triple", builder.getStringAttr( targetTriple ) );
}

one for the DICompileUnitAttr, and DISubprogramAttr:

void createMain()
{
    auto ctx = builder.getContext();
    auto mainFuncType = LLVM::LLVMFunctionType::get( builder.getI32Type(), {}, false );
    mainFunc =
        builder.create&amp;amp;amp;lt;LLVM::LLVMFuncOp&amp;amp;amp;gt;( module.getLoc(), ENTRY_SYMBOL_NAME, mainFuncType, LLVM::Linkage::External );

    // Construct module level DI state:
    fileAttr = mlir::LLVM::DIFileAttr::get( ctx, driverState.filename, "." );
    auto distinctAttr = mlir::DistinctAttr::create( builder.getUnitAttr() );
    auto compileUnitAttr = mlir::LLVM::DICompileUnitAttr::get(
        ctx, distinctAttr, llvm::dwarf::DW_LANG_C, fileAttr, builder.getStringAttr( COMPILER_NAME ), false,
        mlir::LLVM::DIEmissionKind::Full, mlir::LLVM::DINameTableKind::Default );
    auto ta =
        mlir::LLVM::DIBasicTypeAttr::get( ctx, (unsigned)llvm::dwarf::DW_TAG_base_type, builder.getStringAttr( "int" ),
                                          32, (unsigned)llvm::dwarf::DW_ATE_signed );
    llvm::SmallVector&amp;amp;amp;lt;mlir::LLVM::DITypeAttr, 1&amp;amp;amp;gt; typeArray;
    typeArray.push_back( ta );
    auto subprogramType = mlir::LLVM::DISubroutineTypeAttr::get( ctx, 0, typeArray );
    subprogramAttr = mlir::LLVM::DISubprogramAttr::get(
        ctx, mlir::DistinctAttr::create( builder.getUnitAttr() ), compileUnitAttr, fileAttr,
        builder.getStringAttr( ENTRY_SYMBOL_NAME ), builder.getStringAttr( ENTRY_SYMBOL_NAME ), fileAttr, 1, 1,
        mlir::LLVM::DISubprogramFlags::Definition, subprogramType, llvm::ArrayRef&amp;amp;amp;lt;mlir::LLVM::DINodeAttr&amp;amp;amp;gt;{},
        llvm::ArrayRef&amp;amp;amp;lt;mlir::LLVM::DINodeAttr&amp;amp;amp;gt;{} );
    mainFunc-&amp;amp;amp;gt;setAttr( "llvm.debug.subprogram", subprogramAttr );

    // This is the key to ensure that translateModuleToLLVMIR does not strip the location info (instead converts
    // loc's into !dbg's)
    mainFunc-&amp;amp;amp;gt;setLoc( builder.getFusedLoc( { module.getLoc() }, subprogramAttr ) );
}

The ‘setLoc’ call above, right near the end is critical.  Without that, the call to mlir::translateModuleToLLVMIR strips out all the loc() references, instead of replacing them with !DILocation.

Finally, one for the variable DI creation:

void constructVariableDI( llvm::StringRef varName, mlir::Type&amp;amp;amp;amp; elemType, mlir::FileLineColLoc loc,
                          unsigned elemSizeInBits, mlir::LLVM::AllocaOp&amp;amp;amp;amp; allocaOp )
{
    auto ctx = builder.getContext();
    allocaOp-&amp;amp;amp;gt;setAttr( "bindc_name", builder.getStringAttr( varName ) );

    mlir::LLVM::DILocalVariableAttr diVar;

    if ( elemType.isa&amp;amp;amp;lt;mlir::IntegerType&amp;amp;amp;gt;() )
    {
        const char* typeName{};
        unsigned dwType = llvm::dwarf::DW_ATE_signed;
        unsigned sz = elemSizeInBits;

        switch ( elemSizeInBits )
        {
            case 1:
            {
                typeName = "bool";
                dwType = llvm::dwarf::DW_ATE_boolean;
                sz = 8;
                break;
            }
            case 8:
            {
                typeName = "int8_t";
                break;
            }
            case 16:
            {
                typeName = "int16_t";
                break;
            }
            case 32:
            {
                typeName = "int32_t";
                break;
            }
            case 64:
            {
                typeName = "int64_t";
                break;
            }
            default:
            {
                llvm_unreachable( "Unsupported float type size" );
            }
        }

        auto diType = mlir::LLVM::DIBasicTypeAttr::get( ctx, llvm::dwarf::DW_TAG_base_type,
                                                        builder.getStringAttr( typeName ), sz, dwType );

        diVar = mlir::LLVM::DILocalVariableAttr::get( ctx, subprogramAttr, builder.getStringAttr( varName ), fileAttr,
                                                      loc.getLine(), 0, sz, diType, mlir::LLVM::DIFlags::Zero );
    }
    else
    {
        const char* typeName{};

        switch ( elemSizeInBits )
        {
            case 32:
            {
                typeName = "float";
                break;
            }
            case 64:
            {
                typeName = "double";
                break;
            }
            default:
            {
                llvm_unreachable( "Unsupported float type size" );
            }
        }

        auto diType =
            mlir::LLVM::DIBasicTypeAttr::get( ctx, llvm::dwarf::DW_TAG_base_type, builder.getStringAttr( typeName ),
                                              elemSizeInBits, llvm::dwarf::DW_ATE_float );

        diVar =
            mlir::LLVM::DILocalVariableAttr::get( ctx, subprogramAttr, builder.getStringAttr( varName ), fileAttr,
                                                  loc.getLine(), 0, elemSizeInBits, diType, mlir::LLVM::DIFlags::Zero );
    }
            
    builder.setInsertionPointAfter( allocaOp );
    builder.create&amp;amp;amp;lt;mlir::LLVM::DbgDeclareOp&amp;amp;amp;gt;( loc, allocaOp, diVar );
        
    symbolToAlloca[varName] = allocaOp;
}

In this code, the call to builder.setInsertionPointAfter is critical.  When the lowering eraseOp takes out the DeclareOp, we need the replacement instructions to all end up in the same place.  Without that, the subsequent AssignOp lowering results in an error like this:

//===-------------------------------------------===//
Legalizing operation : 'toy.assign'(0x2745ab50) {
  "toy.assign"(%3) <{name = "x"}> : (i64) -> ()Fold {
  } -> FAILURE : unable to fold
Pattern : 'toy.assign -> ()' {
Trying to match "toy::AssignOpLowering"
Lowering AssignOp: toy.assign "x", %c5_i64 : i64
name: x
value: ImplicitTypeIDRegistry::lookupOrInsert(mlir::PromotableOpInterface::Trait<mlir::TypeID::get()::Empty>)
...
operand #0 does not dominate this use
mlir-asm-printer: 'builtin.module' failed to verify and will be printed in generic form
%3 = "arith.constant"() <{value = 5 : i64}> : () -> i64
valType: i64
elemType: f64
** Insert  : 'llvm.sitofp'(0x274a6ed0)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::LLVM::detail::StoreOpGenericAdaptorBase::Properties)
** Insert  : 'llvm.store'(0x27437f30)
** Erase   : 'toy.assign'(0x2745ab50)
"toy::AssignOpLowering" result 1

My DI insertion isn’t fancy like flang’s, but I have only simple types to deal with, and don’t even support functions yet, so my simple way seemed like a reasonable choice. Regardless, getting working debugger support is nice milestone.