Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
221 changes: 102 additions & 119 deletions src/java.base/share/classes/java/lang/String.java
Original file line number Diff line number Diff line change
Expand Up @@ -553,135 +553,125 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
* disambiguate it against other similar methods of this class.
*/
private String(Charset charset, byte[] bytes, int offset, int length) {
String str;
if (length == 0) {
this.value = "".value;
this.coder = "".coder;
str = "";
} else if (charset == UTF_8.INSTANCE) {
if (COMPACT_STRINGS) {
int dp = StringCoding.countPositives(bytes, offset, length);
if (dp == length) {
this.value = Arrays.copyOfRange(bytes, offset, offset + length);
this.coder = LATIN1;
return;
str = utf8(bytes, offset, length);
} else if (charset == ISO_8859_1.INSTANCE) {
str = iso88591(bytes, offset, length);
} else if (charset == US_ASCII.INSTANCE) {
str = ascii(bytes, offset, length);
} else {
str = decode(charset, bytes, offset, length);
}
this(str);
}

private static String utf8(byte[] bytes, int offset, int length) {
if (COMPACT_STRINGS) {
int dp = StringCoding.countPositives(bytes, offset, length);
if (dp == length) {
return new String(Arrays.copyOfRange(bytes, offset, offset + length), LATIN1);
}
// Decode with a stable copy, to be the result if the decoded length is the same
byte[] latin1 = Arrays.copyOfRange(bytes, offset, offset + length);
int sp = dp; // first dp bytes are already in the copy
while (sp < length) {
int b1 = latin1[sp++];
if (b1 >= 0) {
latin1[dp++] = (byte) b1;
continue;
}
// Decode with a stable copy, to be the result if the decoded length is the same
byte[] latin1 = Arrays.copyOfRange(bytes, offset, offset + length);
int sp = dp; // first dp bytes are already in the copy
while (sp < length) {
int b1 = latin1[sp++];
if (b1 >= 0) {
latin1[dp++] = (byte)b1;
if ((b1 & 0xfe) == 0xc2 && sp < length) { // b1 either 0xc2 or 0xc3
int b2 = latin1[sp];
if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
latin1[dp++] = (byte) decode2(b1, b2);
sp++;
continue;
}
if ((b1 & 0xfe) == 0xc2 && sp < length) { // b1 either 0xc2 or 0xc3
int b2 = latin1[sp];
if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
latin1[dp++] = (byte)decode2(b1, b2);
sp++;
continue;
}
}
// anything not a latin1, including the REPL
// we have to go with the utf16
sp--;
break;
}
if (sp == length) {
if (dp != latin1.length) {
latin1 = Arrays.copyOf(latin1, dp);
}
this.value = latin1;
this.coder = LATIN1;
return;
}
byte[] utf16 = StringUTF16.newBytesFor(length);
StringLatin1.inflate(latin1, 0, utf16, 0, dp);
dp = decodeUTF8_UTF16(latin1, sp, length, utf16, dp, true);
if (dp != length) {
utf16 = Arrays.copyOf(utf16, dp << 1);
}
this.value = utf16;
this.coder = UTF16;
} else { // !COMPACT_STRINGS
byte[] dst = StringUTF16.newBytesFor(length);
int dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, true);
if (dp != length) {
dst = Arrays.copyOf(dst, dp << 1);
// anything not a latin1, including the REPL
// we have to go with the utf16
sp--;
break;
}
if (sp == length) {
if (dp != latin1.length) {
latin1 = Arrays.copyOf(latin1, dp);
}
this.value = dst;
this.coder = UTF16;
return new String(latin1, LATIN1);
}
} else if (charset == ISO_8859_1.INSTANCE) {
if (COMPACT_STRINGS) {
this.value = Arrays.copyOfRange(bytes, offset, offset + length);
this.coder = LATIN1;
} else {
this.value = StringLatin1.inflate(bytes, offset, length);
this.coder = UTF16;
byte[] utf16 = StringUTF16.newBytesFor(length);
StringLatin1.inflate(latin1, 0, utf16, 0, dp);
dp = decodeUTF8_UTF16(latin1, sp, length, utf16, dp, true);
if (dp != length) {
utf16 = Arrays.copyOf(utf16, dp << 1);
}
} else if (charset == US_ASCII.INSTANCE) {
if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
this.value = Arrays.copyOfRange(bytes, offset, offset + length);
this.coder = LATIN1;
} else {
byte[] dst = StringUTF16.newBytesFor(length);
int dp = 0;
while (dp < length) {
int b = bytes[offset++];
StringUTF16.putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
}
this.value = dst;
this.coder = UTF16;
return new String(utf16, UTF16);
} else { // !COMPACT_STRINGS
byte[] dst = StringUTF16.newBytesFor(length);
int dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, true);
if (dp != length) {
dst = Arrays.copyOf(dst, dp << 1);
}
return new String(dst, UTF16);
}
}

private static String iso88591(byte[] bytes, int offset, int length) {
if (COMPACT_STRINGS) {
return new String(Arrays.copyOfRange(bytes, offset, offset + length), LATIN1);
} else {
// (1)We never cache the "external" cs, the only benefit of creating
// an additional StringDe/Encoder object to wrap it is to share the
// de/encode() method. These SD/E objects are short-lived, the young-gen
// gc should be able to take care of them well. But the best approach
// is still not to generate them if not really necessary.
// (2)The defensive copy of the input byte/char[] has a big performance
// impact, as well as the outgoing result byte/char[]. Need to do the
// optimization check of (sm==null && classLoader0==null) for both.
CharsetDecoder cd = charset.newDecoder();
// ArrayDecoder fastpaths
if (cd instanceof ArrayDecoder ad) {
// ascii
if (ad.isASCIICompatible() && !StringCoding.hasNegatives(bytes, offset, length)) {
if (COMPACT_STRINGS) {
this.value = Arrays.copyOfRange(bytes, offset, offset + length);
this.coder = LATIN1;
return;
}
this.value = StringLatin1.inflate(bytes, offset, length);
this.coder = UTF16;
return;
}
return new String(StringLatin1.inflate(bytes, offset, length), UTF16);
}
}

private static String ascii(byte[] bytes, int offset, int length) {
if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
return new String(Arrays.copyOfRange(bytes, offset, offset + length), LATIN1);
} else {
byte[] dst = StringUTF16.newBytesFor(length);
int dp = 0;
while (dp < length) {
int b = bytes[offset++];
StringUTF16.putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
}
return new String(dst, UTF16);
}
}

private static String decode(Charset charset, byte[] bytes, int offset, int length) {
// (1)We never cache the "external" cs, the only benefit of creating
// an additional StringDe/Encoder object to wrap it is to share the
// de/encode() method. These SD/E objects are short-lived, the young-gen
// gc should be able to take care of them well. But the best approach
// is still not to generate them if not really necessary.
// (2)The defensive copy of the input byte/char[] has a big performance
// impact, as well as the outgoing result byte/char[]. Need to do the
// optimization check of (sm==null && classLoader0==null) for both.
CharsetDecoder cd = charset.newDecoder();
// ArrayDecoder fastpaths
if (cd instanceof ArrayDecoder ad) {
// ascii
if (ad.isASCIICompatible() && !StringCoding.hasNegatives(bytes, offset, length)) {
return iso88591(bytes, offset, length);
} else {
// fastpath for always Latin1 decodable single byte
if (COMPACT_STRINGS && ad.isLatin1Decodable()) {
byte[] dst = new byte[length];
ad.decodeToLatin1(bytes, offset, length, dst);
this.value = dst;
this.coder = LATIN1;
return;
}

int en = scale(length, cd.maxCharsPerByte());
cd.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
char[] ca = new char[en];
int clen = ad.decode(bytes, offset, length, ca);
if (COMPACT_STRINGS) {
byte[] val = StringUTF16.compress(ca, 0, clen);;
this.coder = StringUTF16.coderFromArrayLen(val, clen);
this.value = val;
return;
return new String(dst, LATIN1);
} else {
int en = scale(length, cd.maxCharsPerByte());
cd.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
char[] ca = new char[en];
int clen = ad.decode(bytes, offset, length, ca);
return new String(ca, 0, clen, null);
}
coder = UTF16;
value = StringUTF16.toBytes(ca, 0, clen);
return;
}

} else {
// decode using CharsetDecoder
int en = scale(length, cd.maxCharsPerByte());
cd.onMalformedInput(CodingErrorAction.REPLACE)
Expand All @@ -694,14 +684,7 @@ private String(Charset charset, byte[] bytes, int offset, int length) {
// Substitution is enabled, so this shouldn't happen
throw new Error(x);
}
if (COMPACT_STRINGS) {
byte[] val = StringUTF16.compress(ca, 0, caLen);
this.coder = StringUTF16.coderFromArrayLen(val, caLen);
this.value = val;
return;
}
coder = UTF16;
value = StringUTF16.toBytes(ca, 0, caLen);
return new String(ca, 0, caLen, null);
}
}

Expand Down