Skip to content

Commit dcecc4c

Browse files
committed
Refactor PString native representation
1 parent a1322e5 commit dcecc4c

File tree

9 files changed

+184
-245
lines changed

9 files changed

+184
-245
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextSlotBuiltins.java

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@
7777
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.setter;
7878
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.vectorcallfunc;
7979
import static com.oracle.graal.python.nodes.HiddenAttr.METHOD_DEF_PTR;
80-
import static com.oracle.graal.python.nodes.HiddenAttr.NATIVE_STORAGE;
8180
import static com.oracle.graal.python.nodes.HiddenAttr.PROMOTED_START;
8281
import static com.oracle.graal.python.nodes.HiddenAttr.PROMOTED_STEP;
8382
import static com.oracle.graal.python.nodes.HiddenAttr.PROMOTED_STOP;
@@ -111,7 +110,7 @@
111110
import com.oracle.graal.python.builtins.objects.object.PythonObject;
112111
import com.oracle.graal.python.builtins.objects.set.PBaseSet;
113112
import com.oracle.graal.python.builtins.objects.slice.PSlice;
114-
import com.oracle.graal.python.builtins.objects.str.NativeCharSequence;
113+
import com.oracle.graal.python.builtins.objects.str.NativeStringData;
115114
import com.oracle.graal.python.builtins.objects.str.PString;
116115
import com.oracle.graal.python.builtins.objects.str.StringNodes;
117116
import com.oracle.graal.python.builtins.objects.str.StringNodes.StringLenNode;
@@ -167,10 +166,12 @@ abstract static class GraalPyPrivate_Get_PyASCIIObject_state_ascii extends CApiU
167166
int get(PString object,
168167
@Bind Node inliningTarget,
169168
@Cached InlinedConditionProfile storageProfile,
169+
@Cached HiddenAttr.ReadNode readAttrNode,
170170
@Cached TruffleString.GetCodeRangeNode getCodeRangeNode) {
171171
// important: avoid materialization of native sequences
172-
if (storageProfile.profile(inliningTarget, object.isNativeCharSequence())) {
173-
return object.getNativeCharSequence().isAsciiOnly() ? 1 : 0;
172+
NativeStringData nativeData = object.getNativeStringData(inliningTarget, readAttrNode);
173+
if (storageProfile.profile(inliningTarget, nativeData != null)) {
174+
return nativeData.isAscii() ? 1 : 0;
174175
}
175176

176177
TruffleString string = object.getMaterialized();
@@ -205,10 +206,12 @@ abstract static class GraalPyPrivate_Get_PyASCIIObject_state_kind extends CApiUn
205206
static int get(PString object,
206207
@Bind Node inliningTarget,
207208
@Cached InlinedConditionProfile storageProfile,
209+
@Cached HiddenAttr.ReadNode readAttrNode,
208210
@Cached TruffleString.GetCodeRangeNode getCodeRangeNode) {
209211
// important: avoid materialization of native sequences
210-
if (storageProfile.profile(inliningTarget, object.isNativeCharSequence())) {
211-
return object.getNativeCharSequence().getElementSize() & 0b111;
212+
NativeStringData nativeData = object.getNativeStringData(inliningTarget, readAttrNode);
213+
if (storageProfile.profile(inliningTarget, nativeData != null)) {
214+
return nativeData.getCharSize();
212215
}
213216
TruffleString string = object.getMaterialized();
214217
TruffleString.CodeRange range = getCodeRangeNode.execute(string, TS_ENCODING);
@@ -677,10 +680,12 @@ static Object get(PString object,
677680
@Cached TruffleString.SwitchEncodingNode switchEncodingNode,
678681
@Cached CStructAccess.AllocateNode allocateNode,
679682
@Cached CStructAccess.WriteTruffleStringNode writeTruffleStringNode,
680-
@Cached HiddenAttr.WriteNode writeAttribute) {
681-
if (object.isNativeCharSequence()) {
683+
@Cached HiddenAttr.ReadNode readAttrNode,
684+
@Cached HiddenAttr.WriteNode writeAttrNode) {
685+
NativeStringData nativeData = object.getNativeStringData(inliningTarget, readAttrNode);
686+
if (nativeData != null) {
682687
// in this case, we can just return the pointer
683-
return object.getNativeCharSequence().getPtr();
688+
return nativeData.getPtr();
684689
}
685690
TruffleString string = object.getMaterialized();
686691
TruffleString.CodeRange range = getCodeRangeNode.execute(string, TS_ENCODING);
@@ -702,20 +707,14 @@ static Object get(PString object,
702707
encoding = TruffleString.Encoding.UTF_32;
703708
}
704709
string = switchEncodingNode.execute(string, encoding);
705-
int byteLength = string.byteLength(encoding) + /* null terminator */ charSize;
706-
Object ptr = allocateNode.alloc(byteLength);
710+
int byteLength = string.byteLength(encoding);
711+
Object ptr = allocateNode.alloc(byteLength + /* null terminator */ charSize);
707712
writeTruffleStringNode.write(ptr, string, encoding);
708713
/*
709-
* Set native char sequence, so we can just return the pointer the next time.
714+
* Set native data, so we can just return the pointer the next time.
710715
*/
711-
NativeCharSequence nativeSequence = new NativeCharSequence(ptr, string.byteLength(encoding) / charSize, charSize, isAscii);
712-
object.setNativeCharSequence(nativeSequence);
713-
/*
714-
* Create a native sequence storage to manage the lifetime of the native memory.
715-
*
716-
* TODO it would be nicer if the native char sequence could manage its own memory
717-
*/
718-
writeAttribute.execute(inliningTarget, object, NATIVE_STORAGE, NativeByteSequenceStorage.create(ptr, byteLength, byteLength, true));
716+
NativeStringData data = NativeStringData.create(charSize, isAscii, ptr, byteLength);
717+
object.setNativeStringData(inliningTarget, writeAttrNode, data);
719718
return ptr;
720719
}
721720
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextUnicodeBuiltins.java

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@
124124
import com.oracle.graal.python.builtins.objects.dict.PDict;
125125
import com.oracle.graal.python.builtins.objects.ints.PInt;
126126
import com.oracle.graal.python.builtins.objects.memoryview.PMemoryView;
127-
import com.oracle.graal.python.builtins.objects.str.NativeCharSequence;
127+
import com.oracle.graal.python.builtins.objects.str.NativeStringData;
128128
import com.oracle.graal.python.builtins.objects.str.PString;
129129
import com.oracle.graal.python.builtins.objects.str.StringBuiltins;
130130
import com.oracle.graal.python.builtins.objects.str.StringBuiltins.EncodeNode;
@@ -812,12 +812,22 @@ static int doGeneric(Object type, long lindex,
812812
}
813813
}
814814

815-
@CApiBuiltin(ret = PyObjectTransfer, args = {Pointer, Py_ssize_t, Py_ssize_t, PY_UCS4}, call = Ignored)
815+
@CApiBuiltin(ret = PyObjectTransfer, args = {Pointer, Py_ssize_t, Int, Int}, call = Ignored)
816816
abstract static class GraalPyPrivate_Unicode_New extends CApiQuaternaryBuiltinNode {
817817
@Specialization
818-
static Object doGeneric(Object ptr, long elements, long elementSize, int isAscii,
819-
@Bind PythonLanguage language) {
820-
return PFactory.createString(language, new NativeCharSequence(ptr, (int) elements, (int) elementSize, isAscii != 0));
818+
static Object doGeneric(Object ptr, long elements, int charSize, int isAscii,
819+
@Bind Node inliningTarget,
820+
@Bind PythonLanguage language,
821+
@Cached HiddenAttr.WriteNode writeNode,
822+
@Cached PRaiseNode raiseNode) {
823+
long size = elements * charSize;
824+
if (!PInt.isIntRange(size)) {
825+
throw raiseNode.raise(inliningTarget, MemoryError);
826+
}
827+
PString s = PFactory.createString(language, null);
828+
NativeStringData data = NativeStringData.create(charSize, isAscii != 0, ptr, (int) size);
829+
s.setNativeStringData(inliningTarget, writeNode, data);
830+
return s;
821831
}
822832
}
823833

@@ -1246,9 +1256,6 @@ abstract static class GraalPyPrivate_Unicode_IsMaterialized extends CApiUnaryBui
12461256

12471257
@Specialization
12481258
static int pstring(PString s) {
1249-
if (s.isNativeCharSequence()) {
1250-
return s.isNativeMaterialized() ? 1 : 0;
1251-
}
12521259
return s.isMaterialized() ? 1 : 0;
12531260
}
12541261

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/NativeCharSequence.java

Lines changed: 0 additions & 146 deletions
This file was deleted.
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/*
2+
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* The Universal Permissive License (UPL), Version 1.0
6+
*
7+
* Subject to the condition set forth below, permission is hereby granted to any
8+
* person obtaining a copy of this software, associated documentation and/or
9+
* data (collectively the "Software"), free of charge and under any and all
10+
* copyright rights in the Software, and any and all patent rights owned or
11+
* freely licensable by each licensor hereunder covering either (i) the
12+
* unmodified Software as contributed to or provided by such licensor, or (ii)
13+
* the Larger Works (as defined below), to deal in both
14+
*
15+
* (a) the Software, and
16+
*
17+
* (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
18+
* one is included with the Software each a "Larger Work" to which the Software
19+
* is contributed by such licensors),
20+
*
21+
* without restriction, including without limitation the rights to copy, create
22+
* derivative works of, display, perform, and distribute the Software and make,
23+
* use, sell, offer for sale, import, export, have made, and have sold the
24+
* Software and the Larger Work(s), and to sublicense the foregoing rights on
25+
* either these or other terms.
26+
*
27+
* This license is subject to the following condition:
28+
*
29+
* The above copyright notice and either this complete permission notice or at a
30+
* minimum a reference to the UPL must be included in all copies or substantial
31+
* portions of the Software.
32+
*
33+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
36+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
37+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
38+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
39+
* SOFTWARE.
40+
*/
41+
package com.oracle.graal.python.builtins.objects.str;
42+
43+
import com.oracle.graal.python.runtime.sequence.storage.NativeByteSequenceStorage;
44+
import com.oracle.truffle.api.CompilerDirectives;
45+
import com.oracle.truffle.api.strings.TruffleString;
46+
47+
public final class NativeStringData {
48+
private static final byte KIND_ASCII = 0;
49+
private static final byte KIND_1BYTE = 1;
50+
private static final byte KIND_2BYTE = 2;
51+
private static final byte KIND_4BYTE = 4;
52+
private final byte kind;
53+
// We need the storage object for memory management, don't inline its fields here
54+
private final NativeByteSequenceStorage storage;
55+
56+
private NativeStringData(int charSize, boolean isAscii, NativeByteSequenceStorage storage) {
57+
assert charSize == 1 || charSize == 2 || charSize == 4;
58+
assert !isAscii || charSize == 1;
59+
this.kind = isAscii ? KIND_ASCII : (byte) charSize;
60+
this.storage = storage;
61+
}
62+
63+
public static NativeStringData create(int charSize, boolean isAscii, Object ptr, int length) {
64+
return new NativeStringData(charSize, isAscii, NativeByteSequenceStorage.create(ptr, length, length, true));
65+
}
66+
67+
public boolean isAscii() {
68+
return kind == KIND_ASCII;
69+
}
70+
71+
public int getCharSize() {
72+
return kind != 0 ? kind : KIND_1BYTE;
73+
}
74+
75+
public Object getPtr() {
76+
return storage.getPtr();
77+
}
78+
79+
public int length() {
80+
return storage.length();
81+
}
82+
83+
public TruffleString toTruffleString(TruffleString.FromNativePointerNode fromNativePointerNode) {
84+
TruffleString.Encoding encoding = switch (kind) {
85+
case KIND_ASCII -> TruffleString.Encoding.US_ASCII;
86+
case KIND_1BYTE -> TruffleString.Encoding.ISO_8859_1;
87+
case KIND_2BYTE -> TruffleString.Encoding.UTF_16;
88+
case KIND_4BYTE -> TruffleString.Encoding.UTF_32;
89+
default -> throw CompilerDirectives.shouldNotReachHere();
90+
};
91+
// NativeByteSequenceStorage implements asPointer
92+
return fromNativePointerNode.execute(storage, 0, storage.length(), encoding, false);
93+
}
94+
}

0 commit comments

Comments
 (0)