@@ -1693,9 +1693,10 @@ UnicodeUtil::Utf16Collation* UnicodeUtil::Utf16Collation::create(
16931693 continue ;
16941694
16951695 fb_assert (accessor.current ()->first .hasData ());
1696- USHORT ch = accessor.current ()->first [0 ];
1696+ USHORT firstCh = accessor.current ()->first [0 ];
1697+ USHORT lastCh = accessor.current ()->first .back ();
16971698
1698- if (ch >= 0xFDD0 && ch <= 0xFDEF )
1699+ if ((firstCh >= 0xFDD0 && firstCh <= 0xFDEF ) || UTF_IS_SURROGATE (lastCh) )
16991700 {
17001701 keySet.clear ();
17011702 keySet.add (Array<UCHAR>());
@@ -1879,6 +1880,9 @@ USHORT UnicodeUtil::Utf16Collation::stringToKey(USHORT srcLen, const USHORT* src
18791880 srcLenLong = p - src + 1 ;
18801881 }
18811882
1883+ auto originalDst = dst;
1884+ auto originalDstLen = dstLen;
1885+
18821886 if (!trailingNumbersRemoved)
18831887 {
18841888 for (int i = MIN (maxContractionsPrefixLength, srcLenLong); i > 0 ; --i)
@@ -1887,8 +1891,8 @@ USHORT UnicodeUtil::Utf16Collation::stringToKey(USHORT srcLen, const USHORT* src
18871891
18881892 if (keys)
18891893 {
1890- const UCHAR* dstStart = dst ;
1891- ULONG prefixLen;
1894+ UCHAR lastCharKey[ 100 ] ;
1895+ ULONG prefixLen, lastCharKeyLen ;
18921896
18931897 srcLenLong -= i;
18941898
@@ -1897,50 +1901,72 @@ USHORT UnicodeUtil::Utf16Collation::stringToKey(USHORT srcLen, const USHORT* src
18971901 prefixLen = icu->ucolGetSortKey (coll,
18981902 reinterpret_cast <const UChar*>(src), srcLenLong, dst + 2 , dstLen - 2 );
18991903
1900- if (prefixLen == 0 || prefixLen > dstLen - 2 || prefixLen > MAX_USHORT)
1904+ lastCharKeyLen = icu->ucolGetSortKey (coll,
1905+ reinterpret_cast <const UChar*>(src + srcLenLong), i, lastCharKey, sizeof (lastCharKey));
1906+
1907+ if (prefixLen == 0 || prefixLen > dstLen - 2 || prefixLen > MAX_USHORT ||
1908+ lastCharKeyLen == 0 )
1909+ {
19011910 return INTL_BAD_KEY_LENGTH;
1911+ }
19021912
19031913 fb_assert (dst[2 + prefixLen - 1 ] == ' \0 ' );
19041914 --prefixLen;
1905- dstLen -= 2 + prefixLen;
1915+
1916+ fb_assert (lastCharKey[lastCharKeyLen - 1 ] == ' \0 ' );
1917+ --lastCharKeyLen;
19061918 }
19071919 else
19081920 prefixLen = 0 ;
19091921
1922+ bool fallbackToPrefixKey = false ;
1923+
19101924 for (const auto & keyIt : *keys)
19111925 {
1912- const ULONG keyLen = prefixLen + keyIt.getCount ();
1926+ const UCHAR advance = prefixLen && lastCharKeyLen > 1 &&
1927+ keyIt.hasData () && lastCharKey[0 ] == keyIt.front () ? 1 : 0 ;
1928+
1929+ if (keyIt.getCount () - advance == 0 )
1930+ {
1931+ fallbackToPrefixKey = true ;
1932+ break ;
1933+ }
1934+
1935+ const ULONG keyLen = prefixLen + keyIt.getCount () - advance;
19131936
19141937 if (keyLen > dstLen - 2 || keyLen > MAX_USHORT)
19151938 return INTL_BAD_KEY_LENGTH;
19161939
19171940 dst[0 ] = UCHAR (keyLen & 0xFF );
19181941 dst[1 ] = UCHAR (keyLen >> 8 );
19191942
1920- if (dst != dstStart )
1921- memcpy (dst + 2 , dstStart + 2 , prefixLen);
1943+ if (dst != originalDst )
1944+ memcpy (dst + 2 , originalDst + 2 , prefixLen);
19221945
1923- memcpy (dst + 2 + prefixLen, keyIt.begin (), keyIt.getCount ());
1946+ memcpy (dst + 2 + prefixLen, keyIt.begin () + advance , keyIt.getCount () - advance );
19241947 dst += 2 + keyLen;
19251948 dstLen -= 2 + keyLen;
19261949 }
19271950
1928- return dst - dstStart;
1951+ if (fallbackToPrefixKey)
1952+ break ;
1953+
1954+ return dst - originalDst;
19291955 }
19301956 }
19311957 }
19321958
19331959 ULONG keyLen = icu->ucolGetSortKey (coll,
1934- reinterpret_cast <const UChar*>(src), srcLenLong, dst + 2 , dstLen - 3 );
1960+ reinterpret_cast <const UChar*>(src), srcLenLong, originalDst + 2 , originalDstLen - 3 );
19351961
1936- if (keyLen == 0 || keyLen > dstLen - 3 || keyLen > MAX_USHORT)
1962+ if (keyLen == 0 || keyLen > originalDstLen - 3 || keyLen > MAX_USHORT)
19371963 return INTL_BAD_KEY_LENGTH;
19381964
1939- fb_assert (dst [2 + keyLen - 1 ] == ' \0 ' );
1965+ fb_assert (originalDst [2 + keyLen - 1 ] == ' \0 ' );
19401966 --keyLen;
19411967
1942- dst [0 ] = UCHAR (keyLen & 0xFF );
1943- dst [1 ] = UCHAR (keyLen >> 8 );
1968+ originalDst [0 ] = UCHAR (keyLen & 0xFF );
1969+ originalDst [1 ] = UCHAR (keyLen >> 8 );
19441970
19451971 return keyLen + 2 ;
19461972 }
0 commit comments