@@ -63,21 +63,33 @@ import SIL
6363///
6464let redundantLoadElimination = FunctionPass ( name: " redundant-load-elimination " ) {
6565 ( function: Function , context: FunctionPassContext ) in
66- eliminateRedundantLoads ( in: function, ignoreArrays : false , context)
66+ _ = eliminateRedundantLoads ( in: function, variant : . regular , context)
6767}
6868
6969// Early RLE does not touch loads from Arrays. This is important because later array optimizations,
7070// like ABCOpt, get confused if an array load in a loop is converted to a pattern with a phi argument.
7171let earlyRedundantLoadElimination = FunctionPass ( name: " early-redundant-load-elimination " ) {
7272 ( function: Function , context: FunctionPassContext ) in
73- eliminateRedundantLoads ( in: function, ignoreArrays : true , context)
73+ _ = eliminateRedundantLoads ( in: function, variant : . early , context)
7474}
7575
76- private func eliminateRedundantLoads( in function: Function , ignoreArrays: Bool , _ context: FunctionPassContext ) {
76+ let mandatoryRedundantLoadElimination = FunctionPass ( name: " mandatory-redundant-load-elimination " ) {
77+ ( function: Function , context: FunctionPassContext ) in
78+ _ = eliminateRedundantLoads ( in: function, variant: . mandatory, context)
79+ }
80+
81+ enum RedundantLoadEliminationVariant {
82+ case mandatory, mandatoryInGlobalInit, early, regular
83+ }
7784
85+ func eliminateRedundantLoads( in function: Function ,
86+ variant: RedundantLoadEliminationVariant ,
87+ _ context: FunctionPassContext ) -> Bool
88+ {
7889 // Avoid quadratic complexity by limiting the number of visited instructions.
7990 // This limit is sufficient for most "real-world" functions, by far.
8091 var complexityBudget = 50_000
92+ var changed = false
8193
8294 for block in function. blocks. reversed ( ) {
8395
@@ -89,50 +101,76 @@ private func eliminateRedundantLoads(in function: Function, ignoreArrays: Bool,
89101
90102 if let load = inst as? LoadInst {
91103 if !context. continueWithNextSubpassRun ( for: load) {
92- return
104+ return changed
93105 }
94- if ignoreArrays,
95- let nominal = load. type. nominal,
96- nominal == context. swiftArrayDecl
97- {
98- continue
106+ if complexityBudget < 20 {
107+ complexityBudget = 20
99108 }
100- // Check if the type can be expanded without a significant increase to
101- // code size.
102- // We block redundant load elimination because it might increase
103- // register pressure for large values. Furthermore, this pass also
104- // splits values into its projections (e.g
105- // shrinkMemoryLifetimeAndSplit).
106- if !load. type. shouldExpand ( context) {
107- continue
109+ if !load. isEligibleForElimination ( in: variant, context) {
110+ continue ;
108111 }
109- tryEliminate ( load: load, complexityBudget: & complexityBudget, context)
112+ changed = tryEliminate ( load: load, complexityBudget: & complexityBudget, context) || changed
110113 }
111114 }
112115 }
116+ return changed
113117}
114118
115- private func tryEliminate( load: LoadInst , complexityBudget: inout Int , _ context: FunctionPassContext ) {
119+ private func tryEliminate( load: LoadInst , complexityBudget: inout Int , _ context: FunctionPassContext ) -> Bool {
116120 switch load. isRedundant ( complexityBudget: & complexityBudget, context) {
117121 case . notRedundant:
118- break
122+ return false
119123 case . redundant( let availableValues) :
120124 replace ( load: load, with: availableValues, context)
125+ return true
121126 case . maybePartiallyRedundant( let subPath) :
122127 // Check if the a partial load would really be redundant to avoid unnecessary splitting.
123128 switch load. isRedundant ( at: subPath, complexityBudget: & complexityBudget, context) {
124129 case . notRedundant, . maybePartiallyRedundant:
125- break
130+ return false
126131 case . redundant:
127132 // The new individual loads are inserted right before the current load and
128133 // will be optimized in the following loop iterations.
129- load. trySplit ( context)
134+ return load. trySplit ( context)
130135 }
131136 }
132137}
133138
134139private extension LoadInst {
135140
141+ func isEligibleForElimination( in variant: RedundantLoadEliminationVariant , _ context: FunctionPassContext ) -> Bool {
142+ switch variant {
143+ case . mandatory, . mandatoryInGlobalInit:
144+ if loadOwnership == . take {
145+ // load [take] would require to shrinkMemoryLifetime. But we don't want to do this in the mandatory
146+ // pipeline to not shrink or remove an alloc_stack which is relevant for debug info.
147+ return false
148+ }
149+ switch address. accessBase {
150+ case . box, . stack:
151+ break
152+ default :
153+ return false
154+ }
155+ case . early:
156+ // See the comment of `earlyRedundantLoadElimination`.
157+ if let nominal = self . type. nominal, nominal == context. swiftArrayDecl {
158+ return false
159+ }
160+ case . regular:
161+ break
162+ }
163+ // Check if the type can be expanded without a significant increase to code size.
164+ // We block redundant load elimination because it might increase register pressure for large values.
165+ // Furthermore, this pass also splits values into its projections (e.g shrinkMemoryLifetimeAndSplit).
166+ // But: it is required to remove loads, even of large structs, in global init functions to ensure
167+ // that globals (containing large structs) can be statically initialized.
168+ if variant != . mandatoryInGlobalInit, !self . type. shouldExpand ( context) {
169+ return false
170+ }
171+ return true
172+ }
173+
136174 enum DataflowResult {
137175 case notRedundant
138176 case redundant( [ AvailableValue ] )
@@ -251,7 +289,7 @@ private func replace(load: LoadInst, with availableValues: [AvailableValue], _ c
251289 var ssaUpdater = SSAUpdater ( function: load. parentFunction,
252290 type: load. type, ownership: load. ownership, context)
253291
254- for availableValue in availableValues {
292+ for availableValue in availableValues. replaceCopyAddrsWithLoadsAndStores ( context ) {
255293 let block = availableValue. instruction. parentBlock
256294 let availableValue = provideValue ( for: load, from: availableValue, context)
257295 ssaUpdater. addAvailableValue ( availableValue, in: block)
@@ -279,6 +317,10 @@ private func replace(load: LoadInst, with availableValues: [AvailableValue], _ c
279317 //
280318 newValue = ssaUpdater. getValue ( inMiddleOf: load. parentBlock)
281319 }
320+
321+ // Make sure to keep dependencies valid after replacing the load
322+ insertMarkDependencies ( for: load, context)
323+
282324 load. replace ( with: newValue, context)
283325}
284326
@@ -306,6 +348,39 @@ private func provideValue(
306348 }
307349}
308350
351+ /// If the memory location depends on something, insert a dependency for the loaded value:
352+ ///
353+ /// %2 = mark_dependence %1 on %0
354+ /// %3 = load %2
355+ /// ->
356+ /// %2 = mark_dependence %1 on %0 // not needed anymore, can be removed eventually
357+ /// %3 = load %2
358+ /// %4 = mark_dependence %3 on %0
359+ /// // replace %3 with %4
360+ ///
361+ private func insertMarkDependencies( for load: LoadInst , _ context: FunctionPassContext ) {
362+ var inserter = MarkDependenceInserter ( load: load, context: context)
363+ _ = inserter. walkUp ( address: load. address, path: UnusedWalkingPath ( ) )
364+ }
365+
366+ private struct MarkDependenceInserter : AddressUseDefWalker {
367+ let load : LoadInst
368+ let context : FunctionPassContext
369+
370+ mutating func walkUp( address: Value , path: UnusedWalkingPath ) -> WalkResult {
371+ if let mdi = address as? MarkDependenceInst {
372+ let builder = Builder ( after: load, context)
373+ let newMdi = builder. createMarkDependence ( value: load, base: mdi. base, kind: mdi. dependenceKind)
374+ load. uses. ignore ( user: newMdi) . replaceAll ( with: newMdi, context)
375+ }
376+ return walkUpDefault ( address: address, path: path)
377+ }
378+
379+ mutating func rootDef( address: Value , path: UnusedWalkingPath ) -> WalkResult {
380+ return . continueWalk
381+ }
382+ }
383+
309384/// In case of a `load [take]` shrink lifetime of the value in memory back to the `availableValue`
310385/// and return the (possibly projected) available value. For example:
311386///
@@ -342,6 +417,8 @@ private func shrinkMemoryLifetime(from load: LoadInst, to availableValue: Availa
342417 fatalError ( " unqualified store in ossa function? " )
343418 }
344419 return valueToAdd
420+ case . viaCopyAddr:
421+ fatalError ( " copy_addr must be lowered before shrinking lifetime " )
345422 }
346423}
347424
@@ -380,39 +457,58 @@ private func shrinkMemoryLifetimeAndSplit(from load: LoadInst, to availableValue
380457 let valueToAdd = builder. createLoad ( fromAddress: addr, ownership: . take)
381458 availableStore. trySplit ( context)
382459 return valueToAdd
460+ case . viaCopyAddr:
461+ fatalError ( " copy_addr must be lowered before shrinking lifetime " )
383462 }
384463}
385464
386465/// Either a `load` or `store` which is preceding the original load and provides the loaded value.
387466private enum AvailableValue {
388467 case viaLoad( LoadInst )
389468 case viaStore( StoreInst )
469+ case viaCopyAddr( CopyAddrInst )
390470
391471 var value : Value {
392472 switch self {
393473 case . viaLoad( let load) : return load
394474 case . viaStore( let store) : return store. source
475+ case . viaCopyAddr: fatalError ( " copy_addr must be lowered " )
395476 }
396477 }
397478
398479 var address : Value {
399480 switch self {
400- case . viaLoad( let load) : return load. address
401- case . viaStore( let store) : return store. destination
481+ case . viaLoad( let load) : return load. address
482+ case . viaStore( let store) : return store. destination
483+ case . viaCopyAddr( let copyAddr) : return copyAddr. destination
402484 }
403485 }
404486
405487 var instruction : Instruction {
406488 switch self {
407- case . viaLoad( let load) : return load
408- case . viaStore( let store) : return store
489+ case . viaLoad( let load) : return load
490+ case . viaStore( let store) : return store
491+ case . viaCopyAddr( let copyAddr) : return copyAddr
409492 }
410493 }
411494
412495 func getBuilderForProjections( _ context: FunctionPassContext ) -> Builder {
413496 switch self {
414497 case . viaLoad( let load) : return Builder ( after: load, context)
415498 case . viaStore( let store) : return Builder ( before: store, context)
499+ case . viaCopyAddr: fatalError ( " copy_addr must be lowered " )
500+ }
501+ }
502+ }
503+
504+ private extension Array where Element == AvailableValue {
505+ func replaceCopyAddrsWithLoadsAndStores( _ context: FunctionPassContext ) -> [ AvailableValue ] {
506+ return map {
507+ if case . viaCopyAddr( let copyAddr) = $0 {
508+ return . viaStore( copyAddr. replaceWithLoadAndStore ( context) )
509+ } else {
510+ return $0
511+ }
416512 }
417513 }
418514}
@@ -479,7 +575,7 @@ private struct InstructionScanner {
479575
480576 private mutating func visit( instruction: Instruction ) -> ScanResult {
481577 switch instruction {
482- case is FixLifetimeInst , is EndAccessInst , is EndBorrowInst :
578+ case is FixLifetimeInst , is BeginAccessInst , is EndAccessInst , is EndBorrowInst :
483579 // Those scope-ending instructions are only irrelevant if the preceding load is not changed.
484580 // If it is changed from `load [copy]` -> `load [take]` the memory effects of those scope-ending
485581 // instructions prevent that the `load [take]` will illegally mutate memory which is protected
@@ -520,6 +616,16 @@ private struct InstructionScanner {
520616 potentiallyRedundantSubpath = precedingStorePath
521617 }
522618
619+ case let preceedingCopy as CopyAddrInst where preceedingCopy. canProvideValue:
620+ let copyPath = preceedingCopy. destination. constantAccessPath
621+ if copyPath. getMaterializableProjection ( to: accessPath) != nil {
622+ availableValues. append ( . viaCopyAddr( preceedingCopy) )
623+ return . available
624+ }
625+ if accessPath. getMaterializableProjection ( to: copyPath) != nil , potentiallyRedundantSubpath == nil {
626+ potentiallyRedundantSubpath = copyPath
627+ }
628+
523629 default :
524630 break
525631 }
@@ -606,3 +712,20 @@ private struct Liverange {
606712 return false
607713 }
608714}
715+
716+ private extension CopyAddrInst {
717+ var canProvideValue : Bool {
718+ if !source. type. isLoadable ( in: parentFunction) {
719+ // Although the original load's type is loadable (obviously), it can be projected-out
720+ // from the copy_addr's type which might be not loadable.
721+ return false
722+ }
723+ if !parentFunction. hasOwnership {
724+ if !isTakeOfSrc || !isInitializationOfDest {
725+ // For simplicity, bail if we would have to insert compensating retains and releases.
726+ return false
727+ }
728+ }
729+ return true
730+ }
731+ }
0 commit comments