@@ -3169,22 +3169,31 @@ Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
31693169 PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF ;
31703170 assert (s < send );
31713171
3172- if (! ckWARN_d ( WARN_UTF8 )) {
3172+ UV cp ;
31733173
3174- /* EMPTY is not really allowed, and asserts on debugging builds. But
3175- * on non-debugging we have to deal with it, and this causes it to
3176- * return the REPLACEMENT CHARACTER, as the documentation indicates */
3177- return utf8n_to_uvchr (s , send - s , retlen ,
3178- (UTF8_ALLOW_ANY | UTF8_ALLOW_EMPTY ));
3174+ /* When everything is legal, just return that; but when not:
3175+ * 1) if warnings are enabled return 0 and retlen to -1
3176+ * 2) if warnings are disabled, set 'flags' to accept any malformation,
3177+ * but that will just cause the REPLACEMENT CHARACTER to be returned,
3178+ * as the documentation indicates. EMPTY is not really allowed, and
3179+ * asserts on debugging builds. But on non-debugging we have to deal
3180+ * with it.
3181+ * This API means 0 can mean a legal NUL, or the input is malformed; and
3182+ * the caller has to know if warnings are disabled to know if it can rely on
3183+ * 'retlen'. Best to use utf8_to_uv() instead */
3184+ U32 flags = (ckWARN_d (WARN_UTF8 )) ? 0 : (UTF8_ALLOW_ANY | UTF8_ALLOW_EMPTY );
3185+
3186+ if ( LIKELY (utf8_to_uv_flags (s , send , & cp , retlen , flags ))
3187+ || flags )
3188+ {
3189+ return cp ;
31793190 }
3180- else {
3181- UV ret = utf8n_to_uvchr (s , send - s , retlen , 0 );
3182- if (retlen && ret == 0 && (send <= s || * s != '\0' )) {
3183- * retlen = (STRLEN ) - 1 ;
3184- }
31853191
3186- return ret ;
3192+ if (retlen ) {
3193+ * retlen = (STRLEN ) - 1 ;
31873194 }
3195+
3196+ return 0 ;
31883197}
31893198
31903199/* ------------------------------- perl.h ----------------------------- */
0 commit comments