Skip to content

Commit 918b2c1

Browse files
committed
Fix #7237 - Starting operator become unstable on indexed varchar fields.
1 parent 8460c48 commit 918b2c1

File tree

1 file changed

+42
-16
lines changed

1 file changed

+42
-16
lines changed

src/common/unicode_util.cpp

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1693,9 +1693,10 @@ UnicodeUtil::Utf16Collation* UnicodeUtil::Utf16Collation::create(
16931693
continue;
16941694

16951695
fb_assert(accessor.current()->first.hasData());
1696-
USHORT ch = accessor.current()->first[0];
1696+
USHORT firstCh = accessor.current()->first[0];
1697+
USHORT lastCh = accessor.current()->first.back();
16971698

1698-
if (ch >= 0xFDD0 && ch <= 0xFDEF)
1699+
if ((firstCh >= 0xFDD0 && firstCh <= 0xFDEF) || UTF_IS_SURROGATE(lastCh))
16991700
{
17001701
keySet.clear();
17011702
keySet.add(Array<UCHAR>());
@@ -1879,6 +1880,9 @@ USHORT UnicodeUtil::Utf16Collation::stringToKey(USHORT srcLen, const USHORT* src
18791880
srcLenLong = p - src + 1;
18801881
}
18811882

1883+
auto originalDst = dst;
1884+
auto originalDstLen = dstLen;
1885+
18821886
if (!trailingNumbersRemoved)
18831887
{
18841888
for (int i = MIN(maxContractionsPrefixLength, srcLenLong); i > 0; --i)
@@ -1887,8 +1891,8 @@ USHORT UnicodeUtil::Utf16Collation::stringToKey(USHORT srcLen, const USHORT* src
18871891

18881892
if (keys)
18891893
{
1890-
const UCHAR* dstStart = dst;
1891-
ULONG prefixLen;
1894+
UCHAR lastCharKey[100];
1895+
ULONG prefixLen, lastCharKeyLen;
18921896

18931897
srcLenLong -= i;
18941898

@@ -1897,50 +1901,72 @@ USHORT UnicodeUtil::Utf16Collation::stringToKey(USHORT srcLen, const USHORT* src
18971901
prefixLen = icu->ucolGetSortKey(coll,
18981902
reinterpret_cast<const UChar*>(src), srcLenLong, dst + 2, dstLen - 2);
18991903

1900-
if (prefixLen == 0 || prefixLen > dstLen - 2 || prefixLen > MAX_USHORT)
1904+
lastCharKeyLen = icu->ucolGetSortKey(coll,
1905+
reinterpret_cast<const UChar*>(src + srcLenLong), i, lastCharKey, sizeof(lastCharKey));
1906+
1907+
if (prefixLen == 0 || prefixLen > dstLen - 2 || prefixLen > MAX_USHORT ||
1908+
lastCharKeyLen == 0)
1909+
{
19011910
return INTL_BAD_KEY_LENGTH;
1911+
}
19021912

19031913
fb_assert(dst[2 + prefixLen - 1] == '\0');
19041914
--prefixLen;
1905-
dstLen -= 2 + prefixLen;
1915+
1916+
fb_assert(lastCharKey[lastCharKeyLen - 1] == '\0');
1917+
--lastCharKeyLen;
19061918
}
19071919
else
19081920
prefixLen = 0;
19091921

1922+
bool fallbackToPrefixKey = false;
1923+
19101924
for (const auto& keyIt : *keys)
19111925
{
1912-
const ULONG keyLen = prefixLen + keyIt.getCount();
1926+
const UCHAR advance = prefixLen && lastCharKeyLen > 1 &&
1927+
keyIt.hasData() && lastCharKey[0] == keyIt.front() ? 1 : 0;
1928+
1929+
if (keyIt.getCount() - advance == 0)
1930+
{
1931+
fallbackToPrefixKey = true;
1932+
break;
1933+
}
1934+
1935+
const ULONG keyLen = prefixLen + keyIt.getCount() - advance;
19131936

19141937
if (keyLen > dstLen - 2 || keyLen > MAX_USHORT)
19151938
return INTL_BAD_KEY_LENGTH;
19161939

19171940
dst[0] = UCHAR(keyLen & 0xFF);
19181941
dst[1] = UCHAR(keyLen >> 8);
19191942

1920-
if (dst != dstStart)
1921-
memcpy(dst + 2, dstStart + 2, prefixLen);
1943+
if (dst != originalDst)
1944+
memcpy(dst + 2, originalDst + 2, prefixLen);
19221945

1923-
memcpy(dst + 2 + prefixLen, keyIt.begin(), keyIt.getCount());
1946+
memcpy(dst + 2 + prefixLen, keyIt.begin() + advance, keyIt.getCount() - advance);
19241947
dst += 2 + keyLen;
19251948
dstLen -= 2 + keyLen;
19261949
}
19271950

1928-
return dst - dstStart;
1951+
if (fallbackToPrefixKey)
1952+
break;
1953+
1954+
return dst - originalDst;
19291955
}
19301956
}
19311957
}
19321958

19331959
ULONG keyLen = icu->ucolGetSortKey(coll,
1934-
reinterpret_cast<const UChar*>(src), srcLenLong, dst + 2, dstLen - 3);
1960+
reinterpret_cast<const UChar*>(src), srcLenLong, originalDst + 2, originalDstLen - 3);
19351961

1936-
if (keyLen == 0 || keyLen > dstLen - 3 || keyLen > MAX_USHORT)
1962+
if (keyLen == 0 || keyLen > originalDstLen - 3 || keyLen > MAX_USHORT)
19371963
return INTL_BAD_KEY_LENGTH;
19381964

1939-
fb_assert(dst[2 + keyLen - 1] == '\0');
1965+
fb_assert(originalDst[2 + keyLen - 1] == '\0');
19401966
--keyLen;
19411967

1942-
dst[0] = UCHAR(keyLen & 0xFF);
1943-
dst[1] = UCHAR(keyLen >> 8);
1968+
originalDst[0] = UCHAR(keyLen & 0xFF);
1969+
originalDst[1] = UCHAR(keyLen >> 8);
19441970

19451971
return keyLen + 2;
19461972
}

0 commit comments

Comments
 (0)