diff --git a/compiler/asserts.go b/compiler/asserts.go index f07b73bc26..c890a3317e 100644 --- a/compiler/asserts.go +++ b/compiler/asserts.go @@ -245,7 +245,7 @@ func (b *builder) createRuntimeAssert(assert llvm.Value, blockPrefix, assertFunc // current insert position. faultBlock := b.ctx.AddBasicBlock(b.llvmFn, blockPrefix+".throw") nextBlock := b.insertBasicBlock(blockPrefix + ".next") - b.blockExits[b.currentBlock] = nextBlock // adjust outgoing block for phi nodes + b.currentBlockInfo.exit = nextBlock // adjust outgoing block for phi nodes // Now branch to the out-of-bounds or the regular block. b.CreateCondBr(assert, faultBlock, nextBlock) diff --git a/compiler/compiler.go b/compiler/compiler.go index 61d7e89335..d0c518e5bc 100644 --- a/compiler/compiler.go +++ b/compiler/compiler.go @@ -152,10 +152,12 @@ type builder struct { llvmFnType llvm.Type llvmFn llvm.Value info functionInfo - locals map[ssa.Value]llvm.Value // local variables - blockEntries map[*ssa.BasicBlock]llvm.BasicBlock // a *ssa.BasicBlock may be split up - blockExits map[*ssa.BasicBlock]llvm.BasicBlock // these are the exit blocks + locals map[ssa.Value]llvm.Value // local variables + blockInfo []blockInfo currentBlock *ssa.BasicBlock + currentBlockInfo *blockInfo + tarjanStack []uint + tarjanIndex uint phis []phiNode deferPtr llvm.Value deferFrame llvm.Value @@ -187,11 +189,22 @@ func newBuilder(c *compilerContext, irbuilder llvm.Builder, f *ssa.Function) *bu info: c.getFunctionInfo(f), locals: make(map[ssa.Value]llvm.Value), dilocals: make(map[*types.Var]llvm.Metadata), - blockEntries: make(map[*ssa.BasicBlock]llvm.BasicBlock), - blockExits: make(map[*ssa.BasicBlock]llvm.BasicBlock), } } +type blockInfo struct { + // entry is the LLVM basic block corresponding to the start of this *ssa.Block. + entry llvm.BasicBlock + + // exit is the LLVM basic block corresponding to the end of this *ssa.Block. + // It will be different than entry if any of the block's instructions contain internal branches. + exit llvm.BasicBlock + + // tarjan holds state for applying Tarjan's strongly connected components algorithm to the CFG. + // This is used by defer.go to determine whether to stack- or heap-allocate defer data. + tarjan tarjanNode +} + type deferBuiltin struct { callName string pos token.Pos @@ -1220,14 +1233,29 @@ func (b *builder) createFunctionStart(intrinsic bool) { // intrinsic (like an atomic operation). Create the entry block // manually. entryBlock = b.ctx.AddBasicBlock(b.llvmFn, "entry") + // Intrinsics may create internal branches (e.g. nil checks). + // They will attempt to access b.currentBlockInfo to update the exit block. + // Create some fake block info for them to access. + blockInfo := []blockInfo{ + { + entry: entryBlock, + exit: entryBlock, + }, + } + b.blockInfo = blockInfo + b.currentBlockInfo = &blockInfo[0] } else { + blocks := b.fn.Blocks + blockInfo := make([]blockInfo, len(blocks)) for _, block := range b.fn.DomPreorder() { + info := &blockInfo[block.Index] llvmBlock := b.ctx.AddBasicBlock(b.llvmFn, block.Comment) - b.blockEntries[block] = llvmBlock - b.blockExits[block] = llvmBlock + info.entry = llvmBlock + info.exit = llvmBlock } + b.blockInfo = blockInfo // Normal functions have an entry block. - entryBlock = b.blockEntries[b.fn.Blocks[0]] + entryBlock = blockInfo[0].entry } b.SetInsertPointAtEnd(entryBlock) @@ -1323,8 +1351,9 @@ func (b *builder) createFunction() { if b.DumpSSA { fmt.Printf("%d: %s:\n", block.Index, block.Comment) } - b.SetInsertPointAtEnd(b.blockEntries[block]) b.currentBlock = block + b.currentBlockInfo = &b.blockInfo[block.Index] + b.SetInsertPointAtEnd(b.currentBlockInfo.entry) for _, instr := range block.Instrs { if instr, ok := instr.(*ssa.DebugRef); ok { if !b.Debug { @@ -1384,7 +1413,7 @@ func (b *builder) createFunction() { block := phi.ssa.Block() for i, edge := range phi.ssa.Edges { llvmVal := b.getValue(edge, getPos(phi.ssa)) - llvmBlock := b.blockExits[block.Preds[i]] + llvmBlock := b.blockInfo[block.Preds[i].Index].exit phi.llvm.AddIncoming([]llvm.Value{llvmVal}, []llvm.BasicBlock{llvmBlock}) } } @@ -1498,11 +1527,11 @@ func (b *builder) createInstruction(instr ssa.Instruction) { case *ssa.If: cond := b.getValue(instr.Cond, getPos(instr)) block := instr.Block() - blockThen := b.blockEntries[block.Succs[0]] - blockElse := b.blockEntries[block.Succs[1]] + blockThen := b.blockInfo[block.Succs[0].Index].entry + blockElse := b.blockInfo[block.Succs[1].Index].entry b.CreateCondBr(cond, blockThen, blockElse) case *ssa.Jump: - blockJump := b.blockEntries[instr.Block().Succs[0]] + blockJump := b.blockInfo[instr.Block().Succs[0].Index].entry b.CreateBr(blockJump) case *ssa.MapUpdate: m := b.getValue(instr.Map, getPos(instr)) diff --git a/compiler/defer.go b/compiler/defer.go index e2944456df..9f3bfe68ae 100644 --- a/compiler/defer.go +++ b/compiler/defer.go @@ -100,7 +100,7 @@ func (b *builder) createLandingPad() { // Continue at the 'recover' block, which returns to the parent in an // appropriate way. - b.CreateBr(b.blockEntries[b.fn.Recover]) + b.CreateBr(b.blockInfo[b.fn.Recover.Index].entry) } // Create a checkpoint (similar to setjmp). This emits inline assembly that @@ -234,41 +234,88 @@ func (b *builder) createInvokeCheckpoint() { continueBB := b.insertBasicBlock("") b.CreateCondBr(isZero, continueBB, b.landingpad) b.SetInsertPointAtEnd(continueBB) - b.blockExits[b.currentBlock] = continueBB + b.currentBlockInfo.exit = continueBB } -// isInLoop checks if there is a path from a basic block to itself. -func isInLoop(start *ssa.BasicBlock) bool { - // Use a breadth-first search to scan backwards through the block graph. - queue := []*ssa.BasicBlock{start} - checked := map[*ssa.BasicBlock]struct{}{} - - for len(queue) > 0 { - // pop a block off of the queue - block := queue[len(queue)-1] - queue = queue[:len(queue)-1] - - // Search through predecessors. - // Searching backwards means that this is pretty fast when the block is close to the start of the function. - // Defers are often placed near the start of the function. - for _, pred := range block.Preds { - if pred == start { - // cycle found - return true - } +// isInLoop checks if there is a path from the current block to itself. +// Use Tarjan's strongly connected components algorithm to search for cycles. +// A one-node SCC is a cycle iff there is an edge from the node to itself. +// A multi-node SCC is always a cycle. +func (b *builder) isInLoop() bool { + if b.currentBlockInfo.tarjan.index == 0 { + b.strongConnect(b.currentBlock) + } + return b.currentBlockInfo.tarjan.cyclic +} - if _, ok := checked[pred]; ok { - // block already checked - continue - } +func (b *builder) strongConnect(block *ssa.BasicBlock) { + // Assign a new index. + assignedIndex := b.tarjanIndex + 1 + b.tarjanIndex = assignedIndex + + // Apply the new index. + blockIndex := block.Index + node := &b.blockInfo[blockIndex].tarjan + node.index = assignedIndex + node.lowLink = assignedIndex + + // Push the node onto the stack. + node.onStack = true + b.tarjanStack = append(b.tarjanStack, uint(blockIndex)) + + // Process the successors. + for _, successor := range block.Succs { + // Look up the successor's state. + successorIndex := successor.Index + if successorIndex == blockIndex { + // Handle a self-cycle specially. + node.cyclic = true + continue + } + successorNode := &b.blockInfo[successorIndex].tarjan + + if successorNode.index == 0 { + // This node has not yet been visisted. + b.strongConnect(successor) + } else if !successorNode.onStack { + // This node is has been visited, but is in a different SCC. + // Ignore it. + continue + } + if successorNode.lowLink < node.lowLink { + node.lowLink = successorNode.lowLink + } + } - // add to queue and checked map - queue = append(queue, pred) - checked[pred] = struct{}{} + if node.lowLink == node.index { + // This is a root node. + // Pop the SCC off the stack. + stack := b.tarjanStack + top := stack[len(stack)-1] + stack = stack[:len(stack)-1] + blocks := b.blockInfo + topNode := &blocks[top].tarjan + topNode.onStack = false + if top != uint(blockIndex) { + // Mark all nodes in this SCC as cyclic. + topNode.cyclic = true + for top != uint(blockIndex) { + top = stack[len(stack)-1] + stack = stack[:len(stack)-1] + topNode := &blocks[top].tarjan + topNode.onStack = false + topNode.cyclic = true + } } + b.tarjanStack = stack } +} - return false +type tarjanNode struct { + index uint + lowLink uint + onStack bool + cyclic bool } // createDefer emits a single defer instruction, to be run when this function @@ -410,7 +457,10 @@ func (b *builder) createDefer(instr *ssa.Defer) { // Put this struct in an allocation. var alloca llvm.Value - if !isInLoop(instr.Block()) { + if instr.Block() != b.currentBlock { + panic("block mismatch") + } + if !b.isInLoop() { // This can safely use a stack allocation. alloca = llvmutil.CreateEntryBlockAlloca(b.Builder, deferredCallType, "defer.alloca") } else { diff --git a/compiler/interface.go b/compiler/interface.go index 17c4c399bf..e63666834f 100644 --- a/compiler/interface.go +++ b/compiler/interface.go @@ -737,7 +737,7 @@ func (b *builder) createTypeAssert(expr *ssa.TypeAssert) llvm.Value { prevBlock := b.GetInsertBlock() okBlock := b.insertBasicBlock("typeassert.ok") nextBlock := b.insertBasicBlock("typeassert.next") - b.blockExits[b.currentBlock] = nextBlock // adjust outgoing block for phi nodes + b.currentBlockInfo.exit = nextBlock // adjust outgoing block for phi nodes b.CreateCondBr(commaOk, okBlock, nextBlock) // Retrieve the value from the interface if the type assert was diff --git a/compiler/testdata/defer-cortex-m-qemu.ll b/compiler/testdata/defer-cortex-m-qemu.ll index b0a65ab4cd..53acb20b90 100644 --- a/compiler/testdata/defer-cortex-m-qemu.ll +++ b/compiler/testdata/defer-cortex-m-qemu.ll @@ -270,6 +270,241 @@ entry: ret void } +; Function Attrs: nounwind +define hidden void @main.deferInfiniteLoop(ptr %context) unnamed_addr #1 { +entry: + %deferPtr = alloca ptr, align 4 + store ptr null, ptr %deferPtr, align 4 + %deferframe.buf = alloca %runtime.deferFrame, align 4 + %0 = call ptr @llvm.stacksave.p0() + call void @runtime.setupDeferFrame(ptr nonnull %deferframe.buf, ptr %0, ptr undef) #4 + br label %for.body + +for.body: ; preds = %for.body, %entry + %defer.next = load ptr, ptr %deferPtr, align 4 + %defer.alloc.call = call dereferenceable(12) ptr @runtime.alloc(i32 12, ptr null, ptr undef) #4 + store i32 0, ptr %defer.alloc.call, align 4 + %defer.alloc.call.repack1 = getelementptr inbounds nuw i8, ptr %defer.alloc.call, i32 4 + store ptr %defer.next, ptr %defer.alloc.call.repack1, align 4 + %defer.alloc.call.repack3 = getelementptr inbounds nuw i8, ptr %defer.alloc.call, i32 8 + store i32 8, ptr %defer.alloc.call.repack3, align 4 + store ptr %defer.alloc.call, ptr %deferPtr, align 4 + br label %for.body + +recover: ; preds = %rundefers.end + ret void + +lpad: ; No predecessors! + br label %rundefers.loophead + +rundefers.loophead: ; preds = %rundefers.callback0, %lpad + br i1 poison, label %rundefers.end, label %rundefers.loop + +rundefers.loop: ; preds = %rundefers.loophead + switch i32 poison, label %rundefers.default [ + i32 0, label %rundefers.callback0 + ] + +rundefers.callback0: ; preds = %rundefers.loop + br label %rundefers.loophead + +rundefers.default: ; preds = %rundefers.loop + unreachable + +rundefers.end: ; preds = %rundefers.loophead + br label %recover +} + +; Function Attrs: nounwind +define hidden void @main.deferLoop(ptr %context) unnamed_addr #1 { +entry: + %deferPtr = alloca ptr, align 4 + store ptr null, ptr %deferPtr, align 4 + %deferframe.buf = alloca %runtime.deferFrame, align 4 + %0 = call ptr @llvm.stacksave.p0() + call void @runtime.setupDeferFrame(ptr nonnull %deferframe.buf, ptr %0, ptr undef) #4 + br label %for.loop + +for.loop: ; preds = %for.body, %entry + %1 = phi i32 [ 0, %entry ], [ %3, %for.body ] + %2 = icmp slt i32 %1, 10 + br i1 %2, label %for.body, label %for.done + +for.body: ; preds = %for.loop + %defer.next = load ptr, ptr %deferPtr, align 4 + %defer.alloc.call = call dereferenceable(12) ptr @runtime.alloc(i32 12, ptr null, ptr undef) #4 + store i32 0, ptr %defer.alloc.call, align 4 + %defer.alloc.call.repack13 = getelementptr inbounds nuw i8, ptr %defer.alloc.call, i32 4 + store ptr %defer.next, ptr %defer.alloc.call.repack13, align 4 + %defer.alloc.call.repack15 = getelementptr inbounds nuw i8, ptr %defer.alloc.call, i32 8 + store i32 %1, ptr %defer.alloc.call.repack15, align 4 + store ptr %defer.alloc.call, ptr %deferPtr, align 4 + %3 = add i32 %1, 1 + br label %for.loop + +for.done: ; preds = %for.loop + br label %rundefers.block + +rundefers.after: ; preds = %rundefers.end + call void @runtime.destroyDeferFrame(ptr nonnull %deferframe.buf, ptr undef) #4 + ret void + +rundefers.block: ; preds = %for.done + br label %rundefers.loophead + +rundefers.loophead: ; preds = %rundefers.callback0, %rundefers.block + %4 = load ptr, ptr %deferPtr, align 4 + %stackIsNil = icmp eq ptr %4, null + br i1 %stackIsNil, label %rundefers.end, label %rundefers.loop + +rundefers.loop: ; preds = %rundefers.loophead + %stack.next.gep = getelementptr inbounds nuw i8, ptr %4, i32 4 + %stack.next = load ptr, ptr %stack.next.gep, align 4 + store ptr %stack.next, ptr %deferPtr, align 4 + %callback = load i32, ptr %4, align 4 + switch i32 %callback, label %rundefers.default [ + i32 0, label %rundefers.callback0 + ] + +rundefers.callback0: ; preds = %rundefers.loop + %gep = getelementptr inbounds nuw i8, ptr %4, i32 8 + %param = load i32, ptr %gep, align 4 + call void @runtime.printlock(ptr undef) #4 + call void @runtime.printint32(i32 %param, ptr undef) #4 + call void @runtime.printunlock(ptr undef) #4 + br label %rundefers.loophead + +rundefers.default: ; preds = %rundefers.loop + unreachable + +rundefers.end: ; preds = %rundefers.loophead + br label %rundefers.after + +recover: ; preds = %rundefers.end1 + ret void + +lpad: ; No predecessors! + br label %rundefers.loophead4 + +rundefers.loophead4: ; preds = %rundefers.callback010, %lpad + br i1 poison, label %rundefers.end1, label %rundefers.loop3 + +rundefers.loop3: ; preds = %rundefers.loophead4 + switch i32 poison, label %rundefers.default2 [ + i32 0, label %rundefers.callback010 + ] + +rundefers.callback010: ; preds = %rundefers.loop3 + br label %rundefers.loophead4 + +rundefers.default2: ; preds = %rundefers.loop3 + unreachable + +rundefers.end1: ; preds = %rundefers.loophead4 + br label %recover +} + +; Function Attrs: nounwind +define hidden void @main.deferBetweenLoops(ptr %context) unnamed_addr #1 { +entry: + %defer.alloca = alloca { i32, ptr, i32 }, align 4 + %deferPtr = alloca ptr, align 4 + store ptr null, ptr %deferPtr, align 4 + %deferframe.buf = alloca %runtime.deferFrame, align 4 + %0 = call ptr @llvm.stacksave.p0() + call void @runtime.setupDeferFrame(ptr nonnull %deferframe.buf, ptr %0, ptr undef) #4 + br label %for.loop + +for.loop: ; preds = %for.body, %entry + %1 = phi i32 [ 0, %entry ], [ %3, %for.body ] + %2 = icmp slt i32 %1, 10 + br i1 %2, label %for.body, label %for.done + +for.body: ; preds = %for.loop + %3 = add i32 %1, 1 + br label %for.loop + +for.done: ; preds = %for.loop + %defer.next = load ptr, ptr %deferPtr, align 4 + store i32 0, ptr %defer.alloca, align 4 + %defer.alloca.repack16 = getelementptr inbounds nuw i8, ptr %defer.alloca, i32 4 + store ptr %defer.next, ptr %defer.alloca.repack16, align 4 + %defer.alloca.repack18 = getelementptr inbounds nuw i8, ptr %defer.alloca, i32 8 + store i32 1, ptr %defer.alloca.repack18, align 4 + store ptr %defer.alloca, ptr %deferPtr, align 4 + br label %for.loop1 + +for.loop1: ; preds = %for.body2, %for.done + %4 = phi i32 [ 0, %for.done ], [ %6, %for.body2 ] + %5 = icmp slt i32 %4, 10 + br i1 %5, label %for.body2, label %for.done3 + +for.body2: ; preds = %for.loop1 + %6 = add i32 %4, 1 + br label %for.loop1 + +for.done3: ; preds = %for.loop1 + br label %rundefers.block + +rundefers.after: ; preds = %rundefers.end + call void @runtime.destroyDeferFrame(ptr nonnull %deferframe.buf, ptr undef) #4 + ret void + +rundefers.block: ; preds = %for.done3 + br label %rundefers.loophead + +rundefers.loophead: ; preds = %rundefers.callback0, %rundefers.block + %7 = load ptr, ptr %deferPtr, align 4 + %stackIsNil = icmp eq ptr %7, null + br i1 %stackIsNil, label %rundefers.end, label %rundefers.loop + +rundefers.loop: ; preds = %rundefers.loophead + %stack.next.gep = getelementptr inbounds nuw i8, ptr %7, i32 4 + %stack.next = load ptr, ptr %stack.next.gep, align 4 + store ptr %stack.next, ptr %deferPtr, align 4 + %callback = load i32, ptr %7, align 4 + switch i32 %callback, label %rundefers.default [ + i32 0, label %rundefers.callback0 + ] + +rundefers.callback0: ; preds = %rundefers.loop + %gep = getelementptr inbounds nuw i8, ptr %7, i32 8 + %param = load i32, ptr %gep, align 4 + call void @runtime.printlock(ptr undef) #4 + call void @runtime.printint32(i32 %param, ptr undef) #4 + call void @runtime.printunlock(ptr undef) #4 + br label %rundefers.loophead + +rundefers.default: ; preds = %rundefers.loop + unreachable + +rundefers.end: ; preds = %rundefers.loophead + br label %rundefers.after + +recover: ; preds = %rundefers.end4 + ret void + +lpad: ; No predecessors! + br label %rundefers.loophead7 + +rundefers.loophead7: ; preds = %rundefers.callback013, %lpad + br i1 poison, label %rundefers.end4, label %rundefers.loop6 + +rundefers.loop6: ; preds = %rundefers.loophead7 + switch i32 poison, label %rundefers.default5 [ + i32 0, label %rundefers.callback013 + ] + +rundefers.callback013: ; preds = %rundefers.loop6 + br label %rundefers.loophead7 + +rundefers.default5: ; preds = %rundefers.loop6 + unreachable + +rundefers.end4: ; preds = %rundefers.loophead7 + br label %recover +} + attributes #0 = { allockind("alloc,zeroed") allocsize(0) "alloc-family"="runtime.alloc" "target-features"="+armv7-m,+hwdiv,+soft-float,+thumb-mode,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-dsp,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-hwdiv-arm,-i8mm,-lob,-mve,-mve.fp,-neon,-pacbti,-ras,-sb,-sha2,-vfp2,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" } attributes #1 = { nounwind "target-features"="+armv7-m,+hwdiv,+soft-float,+thumb-mode,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-dsp,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-hwdiv-arm,-i8mm,-lob,-mve,-mve.fp,-neon,-pacbti,-ras,-sb,-sha2,-vfp2,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" } attributes #2 = { "target-features"="+armv7-m,+hwdiv,+soft-float,+thumb-mode,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-dsp,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-hwdiv-arm,-i8mm,-lob,-mve,-mve.fp,-neon,-pacbti,-ras,-sb,-sha2,-vfp2,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" } diff --git a/compiler/testdata/defer.go b/compiler/testdata/defer.go index ae334a6568..b93d304991 100644 --- a/compiler/testdata/defer.go +++ b/compiler/testdata/defer.go @@ -18,3 +18,23 @@ func deferMultiple() { }() external() } + +func deferInfiniteLoop() { + for { + defer print(8) + } +} + +func deferLoop() { + for i := 0; i < 10; i++ { + defer print(i) + } +} + +func deferBetweenLoops() { + for i := 0; i < 10; i++ { + } + defer print(1) + for i := 0; i < 10; i++ { + } +}