@@ -39,6 +39,7 @@ namespace {
3939// / * Replaces x.append(y) with x = x + y if x and y are constant strings.
4040// / * Replaces _typeName(T.self) with a constant string if T is statically
4141// / known.
42+ // / * Replaces String(literal).utf8CString with the string literal itself.
4243// /
4344// / This pass must run on high-level SIL, where semantic calls are still in
4445// / place.
@@ -92,6 +93,7 @@ class StringOptimization {
9293 llvm::DenseMap<SILValue, SILValue> &storedStrings);
9394 bool optimizeStringConcat (ApplyInst *concatCall);
9495 bool optimizeTypeName (ApplyInst *typeNameCall);
96+ bool optimizeGetCString (ApplyInst *getCStringCall);
9597
9698 static ApplyInst *isSemanticCall (SILInstruction *inst, StringRef attr,
9799 unsigned numArgs);
@@ -156,6 +158,13 @@ bool StringOptimization::optimizeBlock(SILBasicBlock &block) {
156158 continue ;
157159 }
158160 }
161+ if (ApplyInst *getCString = isSemanticCall (inst,
162+ semantics::STRING_GET_UTF8_CSTRING, 1 )) {
163+ if (optimizeGetCString (getCString)) {
164+ changed = true ;
165+ continue ;
166+ }
167+ }
159168 // Remove items from storedStrings if inst overwrites (or potentially
160169 // overwrites) a stored String in an identifyable object.
161170 invalidateModifiedObjects (inst, storedStrings);
@@ -328,6 +337,50 @@ bool StringOptimization::optimizeTypeName(ApplyInst *typeNameCall) {
328337 return true ;
329338}
330339
340+ // / Replaces a String initializer followed by String.utf8CString with a
341+ // / (UTF8 encoded) string literal.
342+ // /
343+ // / Note that string literals are always generated with a trailing 0-byte.
344+ bool StringOptimization::optimizeGetCString (ApplyInst *getCStringCall) {
345+ // Is this a String.utf8CString of a literal String?
346+ StringInfo stringInfo = getStringInfo (getCStringCall->getArgument (0 ));
347+ if (!stringInfo.isConstant ())
348+ return false ;
349+
350+ StringLiteralInst *literal = nullptr ;
351+ bool changed = false ;
352+ SmallVector<SILInstruction *, 16 > workList;
353+ workList.push_back (getCStringCall);
354+
355+ // / String.utf8CString returns an array of Int8. Search for ref_tail_addr of
356+ // / the array buffer.
357+ while (!workList.empty ()) {
358+ SILInstruction *inst = workList.pop_back_val ();
359+ // Look through string_extract which extract the buffer from the array.
360+ if (isa<StructExtractInst>(inst) || inst == getCStringCall) {
361+ for (Operand *use : cast<SingleValueInstruction>(inst)->getUses ()) {
362+ workList.push_back (use->getUser ());
363+ }
364+ continue ;
365+ }
366+ if (auto *rta = dyn_cast<RefTailAddrInst>(inst)) {
367+ // Replace the ref_tail_addr with a pointer_to_address of the string
368+ // literal.
369+ if (!literal) {
370+ // Build the literal if we don't have one, yet.
371+ SILBuilder builder (getCStringCall);
372+ literal = builder.createStringLiteral (getCStringCall->getLoc (),
373+ stringInfo.str , StringLiteralInst::Encoding::UTF8);
374+ }
375+ SILBuilder builder (rta);
376+ auto *strAddr = builder.createPointerToAddress (rta->getLoc (), literal,
377+ rta->getType (), /* isStrict*/ false );
378+ rta->replaceAllUsesWith (strAddr);
379+ changed = true ;
380+ }
381+ }
382+ return changed;
383+ }
331384
332385// / Returns the apply instruction if \p inst is a call of a function which has
333386// / a semantic attribute \p attr and exactly \p numArgs arguments.
0 commit comments