4040 */
4141package com .oracle .graal .python .builtins .modules ;
4242
43- import static com .oracle .graal .python .runtime .exception .PythonErrorType .TypeError ;
4443import static com .oracle .graal .python .nodes .BuiltinNames .J_UNICODEDATA ;
4544import static com .oracle .graal .python .nodes .BuiltinNames .T_UNICODEDATA ;
4645import static com .oracle .graal .python .runtime .exception .PythonErrorType .KeyError ;
5049
5150import java .util .List ;
5251
53- import com .oracle .graal .python .builtins .objects .module .PythonModule ;
54- import com .oracle .graal .python .nodes .function .builtins .PythonUnaryBuiltinNode ;
55- import com .oracle .graal .python .nodes .util .CannotCastException ;
56- import com .oracle .graal .python .nodes .util .CastToTruffleStringNode ;
57- import com .oracle .truffle .api .strings .TruffleString .CodePointAtByteIndexNode ;
58- import com .oracle .truffle .api .strings .TruffleString .CodePointLengthNode ;
59- import com .oracle .truffle .api .strings .TruffleString .FromJavaStringNode ;
60- import com .oracle .truffle .api .strings .TruffleString .ToJavaStringNode ;
6152import org .graalvm .shadowed .com .ibm .icu .lang .UCharacter ;
53+ import org .graalvm .shadowed .com .ibm .icu .lang .UCharacter .DecompositionType ;
6254import org .graalvm .shadowed .com .ibm .icu .lang .UProperty ;
6355import org .graalvm .shadowed .com .ibm .icu .text .Normalizer2 ;
6456import org .graalvm .shadowed .com .ibm .icu .util .VersionInfo ;
6961import com .oracle .graal .python .builtins .Python3Core ;
7062import com .oracle .graal .python .builtins .PythonBuiltins ;
7163import com .oracle .graal .python .builtins .objects .PNone ;
64+ import com .oracle .graal .python .builtins .objects .module .PythonModule ;
7265import com .oracle .graal .python .nodes .ErrorMessages ;
7366import com .oracle .graal .python .nodes .PRaiseNode ;
7467import com .oracle .graal .python .nodes .function .PythonBuiltinBaseNode ;
8578import com .oracle .truffle .api .dsl .Specialization ;
8679import com .oracle .truffle .api .nodes .Node ;
8780import com .oracle .truffle .api .strings .TruffleString ;
81+ import com .oracle .truffle .api .strings .TruffleString .FromJavaStringNode ;
82+ import com .oracle .truffle .api .strings .TruffleString .ToJavaStringNode ;
8883
8984@ CoreFunctions (defineModule = J_UNICODEDATA , isEager = true )
9085public final class UnicodeDataModuleBuiltins extends PythonBuiltins {
@@ -143,7 +138,7 @@ static Normalizer2 getNormalizer(TruffleString form) {
143138 @ ArgumentClinic (name = "unistr" , conversion = ArgumentClinic .ClinicConversion .TString )
144139 @ GenerateNodeFactory
145140 @ ImportStatic (UnicodeDataModuleBuiltins .class )
146- public abstract static class NormalizeNode extends PythonBinaryClinicBuiltinNode {
141+ abstract static class NormalizeNode extends PythonBinaryClinicBuiltinNode {
147142 @ Specialization (guards = {"cachedNormalizer != null" , "stringEquals(form, cachedForm, equalNode)" }, limit = "NORMALIZER_FORM_COUNT" )
148143 static TruffleString normalize (@ SuppressWarnings ("unused" ) TruffleString form , TruffleString unistr ,
149144 @ SuppressWarnings ("unused" ) @ Cached ("form" ) TruffleString cachedForm ,
@@ -155,8 +150,9 @@ static TruffleString normalize(@SuppressWarnings("unused") TruffleString form, T
155150 }
156151
157152 @ Specialization (guards = "getNormalizer(form) == null" )
158- TruffleString invalidForm (@ SuppressWarnings ("unused" ) TruffleString form , @ SuppressWarnings ("unused" ) TruffleString unistr ) {
159- throw PRaiseNode .raiseStatic (this , ValueError , ErrorMessages .INVALID_NORMALIZATION_FORM );
153+ static TruffleString invalidForm (@ SuppressWarnings ("unused" ) TruffleString form , @ SuppressWarnings ("unused" ) TruffleString unistr ,
154+ @ Bind Node inliningTarget ) {
155+ throw PRaiseNode .raiseStatic (inliningTarget , ValueError , ErrorMessages .INVALID_NORMALIZATION_FORM );
160156 }
161157
162158 @ TruffleBoundary
@@ -176,19 +172,20 @@ protected ArgumentClinicProvider getArgumentClinic() {
176172 @ ArgumentClinic (name = "unistr" , conversion = ArgumentClinic .ClinicConversion .TString )
177173 @ GenerateNodeFactory
178174 @ ImportStatic (UnicodeDataModuleBuiltins .class )
179- public abstract static class IsNormalizedNode extends PythonBinaryClinicBuiltinNode {
175+ abstract static class IsNormalizedNode extends PythonBinaryClinicBuiltinNode {
180176 @ Specialization (guards = {"cachedNormalizer != null" , "stringEquals(form, cachedForm, equalNode)" }, limit = "NORMALIZER_FORM_COUNT" )
181177 @ TruffleBoundary
182- boolean isNormalized (@ SuppressWarnings ("unused" ) TruffleString form , TruffleString unistr ,
178+ static boolean isNormalized (@ SuppressWarnings ("unused" ) TruffleString form , TruffleString unistr ,
183179 @ SuppressWarnings ("unused" ) @ Cached ("form" ) TruffleString cachedForm ,
184180 @ Cached ("getNormalizer(cachedForm)" ) Normalizer2 cachedNormalizer ,
185181 @ SuppressWarnings ("unused" ) @ Cached TruffleString .EqualNode equalNode ) {
186182 return cachedNormalizer .isNormalized (unistr .toJavaStringUncached ());
187183 }
188184
189185 @ Specialization (guards = "getNormalizer(form) == null" )
190- TruffleString invalidForm (@ SuppressWarnings ("unused" ) TruffleString form , @ SuppressWarnings ("unused" ) TruffleString unistr ) {
191- throw PRaiseNode .raiseStatic (this , ValueError , ErrorMessages .INVALID_NORMALIZATION_FORM );
186+ static TruffleString invalidForm (@ SuppressWarnings ("unused" ) TruffleString form , @ SuppressWarnings ("unused" ) TruffleString unistr ,
187+ @ Bind Node inliningTarget ) {
188+ throw PRaiseNode .raiseStatic (inliningTarget , ValueError , ErrorMessages .INVALID_NORMALIZATION_FORM );
192189 }
193190
194191 @ Override
@@ -201,7 +198,7 @@ protected ArgumentClinicProvider getArgumentClinic() {
201198 @ Builtin (name = "lookup" , minNumOfPositionalArgs = 1 , numOfPositionalOnlyArgs = 1 , parameterNames = {"name" })
202199 @ ArgumentClinic (name = "name" , conversion = ArgumentClinic .ClinicConversion .TString )
203200 @ GenerateNodeFactory
204- public abstract static class LookupNode extends PythonUnaryClinicBuiltinNode {
201+ abstract static class LookupNode extends PythonUnaryClinicBuiltinNode {
205202
206203 private static final int NAME_MAX_LENGTH = 256 ;
207204
@@ -267,7 +264,7 @@ private static String getCharacterByUnicodeNameAlias(String unicodeName) {
267264 @ Builtin (name = "name" , minNumOfPositionalArgs = 1 , parameterNames = {"chr" , "default" })
268265 @ ArgumentClinic (name = "chr" , conversion = ArgumentClinic .ClinicConversion .CodePoint )
269266 @ GenerateNodeFactory
270- public abstract static class NameNode extends PythonBinaryClinicBuiltinNode {
267+ abstract static class NameNode extends PythonBinaryClinicBuiltinNode {
271268
272269 @ Specialization
273270 static Object name (int cp , Object defaultValue ,
@@ -294,7 +291,7 @@ protected ArgumentClinicProvider getArgumentClinic() {
294291 @ Builtin (name = "bidirectional" , minNumOfPositionalArgs = 1 , numOfPositionalOnlyArgs = 1 , parameterNames = {"chr" })
295292 @ ArgumentClinic (name = "chr" , conversion = ArgumentClinic .ClinicConversion .CodePoint )
296293 @ GenerateNodeFactory
297- public abstract static class BidirectionalNode extends PythonUnaryClinicBuiltinNode {
294+ abstract static class BidirectionalNode extends PythonUnaryClinicBuiltinNode {
298295 @ Specialization
299296 static TruffleString bidirectional (int chr ,
300297 @ Cached FromJavaStringNode fromJavaStringNode ) {
@@ -316,7 +313,7 @@ protected ArgumentClinicProvider getArgumentClinic() {
316313 @ Builtin (name = "category" , minNumOfPositionalArgs = 1 , numOfPositionalOnlyArgs = 1 , parameterNames = {"chr" })
317314 @ ArgumentClinic (name = "chr" , conversion = ArgumentClinic .ClinicConversion .CodePoint )
318315 @ GenerateNodeFactory
319- public abstract static class CategoryNode extends PythonUnaryClinicBuiltinNode {
316+ abstract static class CategoryNode extends PythonUnaryClinicBuiltinNode {
320317 @ Specialization
321318 static TruffleString category (int chr ,
322319 @ Cached FromJavaStringNode fromJavaStringNode ) {
@@ -336,57 +333,90 @@ protected ArgumentClinicProvider getArgumentClinic() {
336333
337334 // unicodedata.combining(chr)
338335 @ Builtin (name = "combining" , minNumOfPositionalArgs = 1 , numOfPositionalOnlyArgs = 1 , parameterNames = {"chr" })
336+ @ ArgumentClinic (name = "chr" , conversion = ArgumentClinic .ClinicConversion .CodePoint )
339337 @ GenerateNodeFactory
340- public abstract static class CombiningNode extends PythonUnaryBuiltinNode {
338+ abstract static class CombiningNode extends PythonUnaryClinicBuiltinNode {
341339
342340 @ Specialization
343341 @ TruffleBoundary
344- static Object combining (Object object ,
345- @ Bind Node inliningTarget ) {
346- final TruffleString chr ;
342+ static Object combining (int codepoint ) {
343+ return UCharacter . getCombiningClass ( codepoint );
344+ }
347345
348- try {
349- chr = CastToTruffleStringNode . getUncached (). execute ( inliningTarget , object );
350- } catch ( CannotCastException e ) {
351- throw PRaiseNode . raiseStatic ( inliningTarget , TypeError , ErrorMessages . S_ARG_MUST_BE_S_NOT_P , "combining()" , "a unicode character" , object );
352- }
346+ @ Override
347+ protected ArgumentClinicProvider getArgumentClinic () {
348+ return UnicodeDataModuleBuiltinsClinicProviders . CombiningNodeClinicProviderGen . INSTANCE ;
349+ }
350+ }
353351
354- if (CodePointLengthNode .getUncached ().execute (chr , TS_ENCODING ) != 1 ) {
355- throw PRaiseNode .raiseStatic (inliningTarget , TypeError , ErrorMessages .S_ARG_MUST_BE_S_NOT_P , "combining()" , "a unicode character" , object );
352+ // unicodedata.decomposition(chr)
353+ @ Builtin (name = "decomposition" , minNumOfPositionalArgs = 1 , numOfPositionalOnlyArgs = 1 , parameterNames = {"chr" })
354+ @ ArgumentClinic (name = "chr" , conversion = ArgumentClinic .ClinicConversion .CodePoint )
355+ @ GenerateNodeFactory
356+ abstract static class DecompositionNode extends PythonUnaryClinicBuiltinNode {
357+ @ Specialization
358+ @ TruffleBoundary
359+ static TruffleString decomposition (int codepoint ) {
360+ int type = UCharacter .getIntPropertyValue (codepoint , UProperty .DECOMPOSITION_TYPE );
361+ String prefix = getDecompositionPrefix (type );
362+ String decomposition = Normalizer2 .getNFKDInstance ().getDecomposition (codepoint );
363+
364+ StringBuilder sb = new StringBuilder ();
365+ if (prefix != null ) {
366+ sb .append (prefix );
367+ }
368+ if (decomposition != null ) {
369+ int cp ;
370+ for (int i = 0 ; i < decomposition .length (); i += Character .charCount (cp )) {
371+ if (!sb .isEmpty ()) {
372+ sb .append (' ' );
373+ }
374+ cp = decomposition .codePointAt (i );
375+ sb .append (String .format ("%04x" , cp ));
376+ }
356377 }
357378
358- int codepoint = CodePointAtByteIndexNode .getUncached ().execute (chr , 0 , TS_ENCODING );
359- return UCharacter .getCombiningClass (codepoint );
379+ return FromJavaStringNode .getUncached ().execute (sb .toString (), TS_ENCODING );
380+ }
381+
382+ private static String getDecompositionPrefix (int type ) {
383+ return switch (type ) {
384+ case DecompositionType .NOBREAK -> "<noBreak>" ;
385+ case DecompositionType .COMPAT -> "<compat>" ;
386+ case DecompositionType .SUPER -> "<super>" ;
387+ case DecompositionType .FRACTION -> "<fraction>" ;
388+ case DecompositionType .SUB -> "<sub>" ;
389+ case DecompositionType .FONT -> "<font>" ;
390+ case DecompositionType .CIRCLE -> "<circle>" ;
391+ case DecompositionType .WIDE -> "<wide>" ;
392+ case DecompositionType .VERTICAL -> "<vertical>" ;
393+ case DecompositionType .SQUARE -> "<square>" ;
394+ case DecompositionType .ISOLATED -> "<isolated>" ;
395+ case DecompositionType .FINAL -> "<final>" ;
396+ case DecompositionType .INITIAL -> "<initial>" ;
397+ case DecompositionType .MEDIAL -> "<medial>" ;
398+ case DecompositionType .SMALL -> "<small>" ;
399+ case DecompositionType .NARROW -> "<narrow>" ;
400+ default -> null ;
401+ };
402+ }
403+
404+ @ Override
405+ protected ArgumentClinicProvider getArgumentClinic () {
406+ return UnicodeDataModuleBuiltinsClinicProviders .DecompositionNodeClinicProviderGen .INSTANCE ;
360407 }
361408 }
362409
363410 // unicode.east_asia_width(chr)
364411 @ Builtin (name = "east_asian_width" , minNumOfPositionalArgs = 1 , numOfPositionalOnlyArgs = 1 , parameterNames = {"chr" })
412+ @ ArgumentClinic (name = "chr" , conversion = ArgumentClinic .ClinicConversion .CodePoint )
365413 @ GenerateNodeFactory
366- public abstract static class EastAsianWidthNode extends PythonUnaryBuiltinNode {
414+ abstract static class EastAsianWidthNode extends PythonUnaryClinicBuiltinNode {
367415 @ Specialization
368416 @ TruffleBoundary
369- static TruffleString eastAsianWidth (Object object ,
370- @ Bind Node inliningTarget ,
371- @ Cached CastToTruffleStringNode castToTruffleStringNode ,
372- @ Cached CodePointLengthNode codePointLengthNode ,
373- @ Cached CodePointAtByteIndexNode codePointAtByteIndexNode ,
374- @ Cached FromJavaStringNode fromJavaStringNode ) {
375- final TruffleString chr ;
376-
377- try {
378- chr = CastToTruffleStringNode .getUncached ().execute (inliningTarget , object );
379- } catch (CannotCastException e ) {
380- throw PRaiseNode .raiseStatic (inliningTarget , TypeError , ErrorMessages .S_ARG_MUST_BE_S_NOT_P , "east_asian_width()" , "a unicode character" , object );
381- }
382-
383- if (CodePointLengthNode .getUncached ().execute (chr , TS_ENCODING ) != 1 ) {
384- throw PRaiseNode .raiseStatic (inliningTarget , TypeError , ErrorMessages .S_ARG_MUST_BE_S_NOT_P , "east_asian_width()" , "a unicode character" , object );
385- }
386-
387- int codepoint = CodePointAtByteIndexNode .getUncached ().execute (chr , 0 , TS_ENCODING );
417+ static TruffleString eastAsianWidth (int codepoint ) {
388418 String widthName = getWidthName (codepoint );
389- return fromJavaStringNode .execute (widthName , TS_ENCODING );
419+ return FromJavaStringNode . getUncached () .execute (widthName , TS_ENCODING );
390420 }
391421
392422 @ TruffleBoundary
@@ -406,5 +436,10 @@ private static String getWidthName(int codepoint) {
406436
407437 return widthName ;
408438 }
439+
440+ @ Override
441+ protected ArgumentClinicProvider getArgumentClinic () {
442+ return UnicodeDataModuleBuiltinsClinicProviders .EastAsianWidthNodeClinicProviderGen .INSTANCE ;
443+ }
409444 }
410445}
0 commit comments