@@ -1673,6 +1673,39 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
16731673 * msgs = NULL ;
16741674 }
16751675
1676+
1677+ /* Returns 0 if no message needs to be generated for this problem even
1678+ * if everything else says to. Otherwise returns the warning category
1679+ * to use for the message.
1680+ *
1681+ * No message need be generated if the UTF8_CHECK_ONLY flag has been
1682+ * set by the caller. Otherwise, a message should be generated if
1683+ * either:
1684+ * 1) the caller has furnished a structure into which messages should
1685+ * be returned to it (so it itself can decide what to do); or
1686+ * 2) warnings are enabled for either of the category parameters to the
1687+ * macro.
1688+ *
1689+ * The 'warning' parameter is the higher priority warning category to
1690+ * check. The macro calls ckWARN_d(warning), so warnings for it are
1691+ * considered to be on by default.
1692+ *
1693+ * The second, lower priority category is optional. To specify not to
1694+ * use one, call the macro
1695+ * like: NEED_MESSAGE(WARN_FOO,,)
1696+ * Otherwise like: NEED_MESSAGE(WARN_FOO, ckWARN_d, WARN_BAR)
1697+ *
1698+ * The second parameter could also have been ckWARN to specify that the
1699+ * second category isn't on by default.
1700+ *
1701+ * When called without a second category, the macro outputs a bunch of
1702+ * zeroes that the compiler should fold to nothing */
1703+ #define NEED_MESSAGE (warning , extra_ckWARN , extra_category ) \
1704+ ((flags & UTF8_CHECK_ONLY) ? 0 : \
1705+ ((ckWARN_d(warning)) ? warning : \
1706+ ((extra_ckWARN(extra_category +0)) ? extra_category +0 : \
1707+ ((msgs) ? warning : 0))))
1708+
16761709 while (possible_problems ) { /* Handle each possible problem */
16771710 U32 pack_warn = 0 ;
16781711 char * message = NULL ;
@@ -1722,29 +1755,24 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
17221755 || (flags & (UTF8_WARN_SUPER |UTF8_WARN_PERL_EXTENDED )))
17231756 {
17241757
1725- /* The warnings code explicitly says it doesn't handle the
1726- * case of packWARN2 and two categories which have
1727- * parent-child relationship. Even if it works now to
1728- * raise the warning if either is enabled, it wouldn't
1729- * necessarily do so in the future. We output (only) the
1730- * most dire warning */
1731- if (! (flags & UTF8_CHECK_ONLY )) {
1732- if (ckWARN_d (WARN_UTF8 )) {
1733- pack_warn = packWARN (WARN_UTF8 );
1734- }
1735- else if (ckWARN_d (WARN_NON_UNICODE )) {
1736- pack_warn = packWARN (WARN_NON_UNICODE );
1737- }
1738- else if (msgs ) {
1739- pack_warn = packWARN (WARN_UTF8 );
1740- }
1741-
1742- if (pack_warn ) {
1743- message = Perl_form (aTHX_ "%s: %s (overflows)" ,
1758+ /* Overflow is a hybrid. If the word size on this platform
1759+ * were wide enough for this to not overflow, a non-Unicode
1760+ * code point would have been generated. If the caller
1761+ * wanted warnings for such code points, the warning
1762+ * category would be WARN_NON_UNICODE, On the other hand,
1763+ * overflow is considered a malformation, which is serious,
1764+ * and the category would be just WARN_UTF8. We clearly
1765+ * should warn if either category is enabled, but which
1766+ * category to use? Historically, we've used 'utf8' if it
1767+ * is enabled; and that seems like the more severe
1768+ * category, more befitting a malformation. */
1769+ pack_warn = NEED_MESSAGE (WARN_UTF8 ,
1770+ ckWARN_d , WARN_NON_UNICODE );
1771+ if (pack_warn ) {
1772+ message = Perl_form (aTHX_ "%s: %s (overflows)" ,
17441773 malformed_text ,
17451774 _byte_dump_string (s0 , curlen , 0 ));
1746- this_flag_bit = UTF8_GOT_OVERFLOW ;
1747- }
1775+ this_flag_bit = UTF8_GOT_OVERFLOW ;
17481776 }
17491777 }
17501778
@@ -1761,9 +1789,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
17611789 assert (0 );
17621790
17631791 disallowed = TRUE;
1764- if ( (msgs
1765- || ckWARN_d (WARN_UTF8 )) && ! (flags & UTF8_CHECK_ONLY ))
1766- {
1792+ if (NEED_MESSAGE (WARN_UTF8 ,,)) {
17671793 pack_warn = packWARN (WARN_UTF8 );
17681794 message = Perl_form (aTHX_ "%s (empty string)" ,
17691795 malformed_text );
@@ -1778,9 +1804,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
17781804
17791805 if (! (flags & UTF8_ALLOW_CONTINUATION )) {
17801806 disallowed = TRUE;
1781- if (( msgs
1782- || ckWARN_d (WARN_UTF8 )) && ! (flags & UTF8_CHECK_ONLY ))
1783- {
1807+ if (NEED_MESSAGE (WARN_UTF8 ,,)) {
17841808 pack_warn = packWARN (WARN_UTF8 );
17851809 message = Perl_form (aTHX_
17861810 "%s: %s (unexpected continuation byte 0x%02x,"
@@ -1799,9 +1823,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
17991823
18001824 if (! (flags & UTF8_ALLOW_SHORT )) {
18011825 disallowed = TRUE;
1802- if (( msgs
1803- || ckWARN_d (WARN_UTF8 )) && ! (flags & UTF8_CHECK_ONLY ))
1804- {
1826+ if (NEED_MESSAGE (WARN_UTF8 ,,)) {
18051827 pack_warn = packWARN (WARN_UTF8 );
18061828 message = Perl_form (aTHX_
18071829 "%s: %s (too short; %d byte%s available, need %d)" ,
@@ -1822,9 +1844,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
18221844
18231845 if (! (flags & UTF8_ALLOW_NON_CONTINUATION )) {
18241846 disallowed = TRUE;
1825- if (( msgs
1826- || ckWARN_d (WARN_UTF8 )) && ! (flags & UTF8_CHECK_ONLY ))
1827- {
1847+ if (NEED_MESSAGE (WARN_UTF8 ,,)) {
18281848
18291849 /* If we don't know for sure that the input length is
18301850 * valid, avoid as much as possible reading past the
@@ -1849,9 +1869,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
18491869 if (flags & UTF8_WARN_SURROGATE ) {
18501870 * errors |= UTF8_GOT_SURROGATE ;
18511871
1852- if ( ! (flags & UTF8_CHECK_ONLY )
1853- && (msgs || ckWARN_d (WARN_SURROGATE )))
1854- {
1872+ if (NEED_MESSAGE (WARN_SURROGATE ,,)) {
18551873 pack_warn = packWARN (WARN_SURROGATE );
18561874
18571875 /* These are the only errors that can occur with a
@@ -1881,9 +1899,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
18811899 if (flags & UTF8_WARN_SUPER ) {
18821900 * errors |= UTF8_GOT_SUPER ;
18831901
1884- if ( ! (flags & UTF8_CHECK_ONLY )
1885- && (msgs || ckWARN_d (WARN_NON_UNICODE )))
1886- {
1902+ if (NEED_MESSAGE (WARN_NON_UNICODE ,,)) {
18871903 pack_warn = packWARN (WARN_NON_UNICODE );
18881904
18891905 if (orig_problems & UTF8_GOT_TOO_SHORT ) {
@@ -1904,10 +1920,8 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
19041920 * and before possibly bailing out, so that the more dire
19051921 * warning will override the regular one. */
19061922 if (UNLIKELY (UTF8_IS_PERL_EXTENDED (s0 ))) {
1907- if ( ! (flags & UTF8_CHECK_ONLY )
1908- && (flags & (UTF8_WARN_PERL_EXTENDED |UTF8_WARN_SUPER ))
1909- && (msgs || ( ckWARN_d (WARN_NON_UNICODE )
1910- || ckWARN (WARN_PORTABLE ))))
1923+ if ( (flags & (UTF8_WARN_PERL_EXTENDED |UTF8_WARN_SUPER ))
1924+ && NEED_MESSAGE (WARN_NON_UNICODE , ckWARN , WARN_PORTABLE ))
19111925 {
19121926 pack_warn = packWARN2 (WARN_NON_UNICODE , WARN_PORTABLE );
19131927
@@ -1957,9 +1971,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
19571971 if (flags & UTF8_WARN_NONCHAR ) {
19581972 * errors |= UTF8_GOT_NONCHAR ;
19591973
1960- if ( ! (flags & UTF8_CHECK_ONLY )
1961- && (msgs || ckWARN_d (WARN_NONCHAR )))
1962- {
1974+ if (NEED_MESSAGE (WARN_NONCHAR ,,)) {
19631975 /* The code above should have guaranteed that we don't
19641976 * get here with errors other than overlong */
19651977 assert (! (orig_problems
@@ -1994,9 +2006,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
19942006 else {
19952007 disallowed = TRUE;
19962008
1997- if (( msgs
1998- || ckWARN_d (WARN_UTF8 )) && ! (flags & UTF8_CHECK_ONLY ))
1999- {
2009+ if (NEED_MESSAGE (WARN_UTF8 ,,)) {
20002010 pack_warn = packWARN (WARN_UTF8 );
20012011
20022012 /* These error types cause 'uv' to be something that
0 commit comments