Skip to content

Commit 3673cc7

Browse files
authored
[llvm-rc] Don't interpret integer literals as octal numbers in rc.exe mode (#166915)
It turns out that rc.exe doesn't interpret integer literals as octal numbers - but GNU windres does. Previously, llvm-rc did interpret them as octal. Fix the issue by stripping away the leading zeros during tokenization. The alternative (which would be somewhat cleaner, as visible in tokenizer.test) would be to retain them in the RCToken object, but strip them out before calling StringRef::getAsInteger. Alternatively to handle the radix detection locally in llvm-rc code and not rely on getAsInteger to autodetect it. Both of those solutions require propagating the IsWindres flag so that it is available within RCToken, or at least when calling RCToken::intValue(). Fixes: #144723
1 parent afc8368 commit 3673cc7

File tree

9 files changed

+90
-14
lines changed

9 files changed

+90
-14
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
1 VERSIONINFO
2+
FILEVERSION 0010,0010,0010,0010
3+
BEGIN
4+
END
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
1 VERSIONINFO
2+
FILEVERSION 9,08,09,1
3+
BEGIN
4+
END

llvm/test/tools/llvm-rc/Inputs/tokens.rc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
1 + 2 - 3214L & 0x120894 032173 2|&~+(-7){0xabcdef 0xABCDEFl} Begin End
1+
1 + 2 - 3214L & 0x120894 032173 -0042 009 2|&~+(-7){0xabcdef 0xABCDEFl} Begin End
22
1*3/4
33
He11o LLVM
44
identifier-with-dashes

llvm/test/tools/llvm-rc/octal.test

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; RUN: llvm-rc -no-preprocess /FO %t.in-range-rc.res -- %p/Inputs/octal-in-range.rc
2+
; RUN: llvm-readobj %t.in-range-rc.res | FileCheck %s --check-prefix=IN-RANGE-RC
3+
; RUN: llvm-windres --no-preprocess %p/Inputs/octal-in-range.rc %t.in-range-windres.res
4+
; RUN: llvm-readobj %t.in-range-windres.res | FileCheck %s --check-prefix=IN-RANGE-WINDRES
5+
6+
; IN-RANGE-RC: Data: (
7+
; IN-RANGE-RC-NEXT: 0000: 5C003400 00005600 53005F00 56004500 |\.4...V.S._.V.E.|
8+
; IN-RANGE-RC-NEXT: 0010: 52005300 49004F00 4E005F00 49004E00 |R.S.I.O.N._.I.N.|
9+
; IN-RANGE-RC-NEXT: 0020: 46004F00 00000000 BD04EFFE 00000100 |F.O.............|
10+
; IN-RANGE-RC-NEXT: 0030: 0A000A00 0A000A00 00000000 00000000 |................|
11+
; IN-RANGE-RC-NEXT: 0040: 00000000 00000000 00000000 00000000 |................|
12+
; IN-RANGE-RC-NEXT: 0050: 00000000 00000000 00000000 |............|
13+
; IN-RANGE-RC-NEXT: )
14+
15+
; IN-RANGE-WINDRES: Data: (
16+
; IN-RANGE-WINDRES-NEXT: 0000: 5C003400 00005600 53005F00 56004500 |\.4...V.S._.V.E.|
17+
; IN-RANGE-WINDRES-NEXT: 0010: 52005300 49004F00 4E005F00 49004E00 |R.S.I.O.N._.I.N.|
18+
; IN-RANGE-WINDRES-NEXT: 0020: 46004F00 00000000 BD04EFFE 00000100 |F.O.............|
19+
; IN-RANGE-WINDRES-NEXT: 0030: 08000800 08000800 00000000 00000000 |................|
20+
; IN-RANGE-WINDRES-NEXT: 0040: 00000000 00000000 00000000 00000000 |................|
21+
; IN-RANGE-WINDRES-NEXT: 0050: 00000000 00000000 00000000 |............|
22+
; IN-RANGE-WINDRES-NEXT: )
23+
24+
; RUN: llvm-rc -no-preprocess /FO %t.out-of-range-rc.res -- %p/Inputs/octal-out-of-range.rc
25+
; RUN: llvm-readobj %t.out-of-range-rc.res | FileCheck %s --check-prefix=OUT-OF-RANGE-RC
26+
; RUN: not llvm-windres --no-preprocess %p/Inputs/octal-out-of-range.rc %t.out-of-range-windres.res 2>&1 | FileCheck %s --check-prefix OUT-OF-RANGE-WINDRES
27+
28+
; OUT-OF-RANGE-RC: Data: (
29+
; OUT-OF-RANGE-RC-NEXT: 0000: 5C003400 00005600 53005F00 56004500 |\.4...V.S._.V.E.|
30+
; OUT-OF-RANGE-RC-NEXT: 0010: 52005300 49004F00 4E005F00 49004E00 |R.S.I.O.N._.I.N.|
31+
; OUT-OF-RANGE-RC-NEXT: 0020: 46004F00 00000000 BD04EFFE 00000100 |F.O.............|
32+
; OUT-OF-RANGE-RC-NEXT: 0030: 08000900 01000900 00000000 00000000 |................|
33+
; OUT-OF-RANGE-RC-NEXT: 0040: 00000000 00000000 00000000 00000000 |................|
34+
; OUT-OF-RANGE-RC-NEXT: 0050: 00000000 00000000 00000000 |............|
35+
; OUT-OF-RANGE-RC-NEXT: )
36+
37+
38+
; OUT-OF-RANGE-WINDRES: llvm-rc: Error parsing file: Integer invalid or too large: 08

llvm/test/tools/llvm-rc/tokenizer.test

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
; CHECK-NEXT: Int: 3214L; int value = 3214
1010
; CHECK-NEXT: Amp: &
1111
; CHECK-NEXT: Int: 0x120894; int value = 1181844
12-
; CHECK-NEXT: Int: 032173; int value = 13435
12+
; CHECK-NEXT: Int: 32173; int value = 32173
13+
; CHECK-NEXT: Minus: -
14+
; CHECK-NEXT: Int: 42; int value = 42
15+
; CHECK-NEXT: Int: 9; int value = 9
1316
; CHECK-NEXT: Int: 2; int value = 2
1417
; CHECK-NEXT: Pipe: |
1518
; CHECK-NEXT: Amp: &

llvm/tools/llvm-rc/ResourceScriptToken.cpp

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ using namespace llvm;
2626
using Kind = RCToken::Kind;
2727

2828
// Checks if Representation is a correct description of an RC integer.
29-
// It should be a 32-bit unsigned integer, either decimal, octal (0[0-7]+),
30-
// or hexadecimal (0x[0-9a-f]+). It might be followed by a single 'L'
31-
// character (that is the difference between our representation and
32-
// StringRef's one). If Representation is correct, 'true' is returned and
33-
// the return value is put back in Num.
29+
// It should be a 32-bit unsigned integer, either decimal or hexadecimal
30+
// (0x[0-9a-f]+). For Windres mode, it can also be octal (0[0-7]+).
31+
// It might be followed by a single 'L' character (that is the difference
32+
// between our representation and StringRef's one). If Representation is
33+
// correct, 'true' is returned and the return value is put back in Num.
3434
static bool rcGetAsInteger(StringRef Representation, uint32_t &Num) {
3535
size_t Length = Representation.size();
3636
if (Length == 0)
@@ -95,7 +95,8 @@ namespace {
9595

9696
class Tokenizer {
9797
public:
98-
Tokenizer(StringRef Input) : Data(Input), DataLength(Input.size()), Pos(0) {}
98+
Tokenizer(StringRef Input, bool IsWindres)
99+
: Data(Input), DataLength(Input.size()), Pos(0), IsWindres(IsWindres) {}
99100

100101
Expected<std::vector<RCToken>> run();
101102

@@ -128,6 +129,7 @@ class Tokenizer {
128129
// character.
129130
bool canStartInt() const;
130131
bool canContinueInt() const;
132+
void trimIntString(StringRef &Str) const;
131133

132134
bool canStartString() const;
133135

@@ -153,6 +155,7 @@ class Tokenizer {
153155

154156
StringRef Data;
155157
size_t DataLength, Pos;
158+
bool IsWindres;
156159
};
157160

158161
void Tokenizer::skipCurrentLine() {
@@ -187,7 +190,12 @@ Expected<std::vector<RCToken>> Tokenizer::run() {
187190
if (TokenKind == Kind::LineComment || TokenKind == Kind::StartComment)
188191
continue;
189192

190-
RCToken Token(TokenKind, Data.take_front(Pos).drop_front(TokenStart));
193+
StringRef Contents = Data.take_front(Pos).drop_front(TokenStart);
194+
195+
if (TokenKind == Kind::Int)
196+
trimIntString(Contents);
197+
198+
RCToken Token(TokenKind, Contents);
191199
if (TokenKind == Kind::Identifier) {
192200
processIdentifier(Token);
193201
} else if (TokenKind == Kind::Int) {
@@ -366,12 +374,30 @@ void Tokenizer::processIdentifier(RCToken &Token) const {
366374
Token = RCToken(Kind::BlockEnd, Name);
367375
}
368376

377+
void Tokenizer::trimIntString(StringRef &Str) const {
378+
if (!IsWindres) {
379+
// For compatibility with rc.exe, strip leading zeros that make the
380+
// integer literal interpreted as octal.
381+
//
382+
// We do rely on Stringref::getAsInteger for autodetecting between
383+
// decimal and hexadecimal literals, but we want to avoid interpreting
384+
// literals as octal.
385+
//
386+
// This omits the leading zeros from the RCToken's value string entirely,
387+
// which also has a visible effect when dumping the tokenizer output.
388+
// Alternatively, we could store the IsWindres flag in RCToken and defer
389+
// the trimming to RCToken::intValue.
390+
while (Str.size() >= 2 && Str[0] == '0' && std::isdigit(Str[1]))
391+
Str = Str.drop_front(1);
392+
}
393+
}
394+
369395
} // anonymous namespace
370396

371397
namespace llvm {
372398

373-
Expected<std::vector<RCToken>> tokenizeRC(StringRef Input) {
374-
return Tokenizer(Input).run();
399+
Expected<std::vector<RCToken>> tokenizeRC(StringRef Input, bool IsWindres) {
400+
return Tokenizer(Input, IsWindres).run();
375401
}
376402

377403
} // namespace llvm

llvm/tools/llvm-rc/ResourceScriptToken.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ class RCToken {
7676
// Tokens returned by this function hold only references to the parts
7777
// of the Input. Memory buffer containing Input cannot be freed,
7878
// modified or reallocated.
79-
Expected<std::vector<RCToken>> tokenizeRC(StringRef Input);
79+
Expected<std::vector<RCToken>> tokenizeRC(StringRef Input, bool IsWindres);
8080

8181
} // namespace llvm
8282

llvm/tools/llvm-rc/ResourceScriptTokenList.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
// Long tokens. They might consist of more than one character.
1616
TOKEN(Invalid) // Invalid token. Should not occur in a valid script.
17-
TOKEN(Int) // Integer (decimal, octal or hexadecimal).
17+
TOKEN(Int) // Integer (decimal or hexadecimal, and possibly octal for windres).
1818
TOKEN(String) // String value.
1919
TOKEN(Identifier) // Script identifier (resource name or type).
2020
TOKEN(LineComment) // Beginning of single-line comment.

llvm/tools/llvm-rc/llvm-rc.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -619,7 +619,8 @@ void doRc(std::string Src, std::string Dest, RcOptions &Opts,
619619
StringRef Contents = FileContents->getBuffer();
620620

621621
std::string FilteredContents = filterCppOutput(Contents);
622-
std::vector<RCToken> Tokens = ExitOnErr(tokenizeRC(FilteredContents));
622+
std::vector<RCToken> Tokens =
623+
ExitOnErr(tokenizeRC(FilteredContents, Opts.IsWindres));
623624

624625
if (Opts.BeVerbose) {
625626
const Twine TokenNames[] = {

0 commit comments

Comments
 (0)