Skip to content

Commit fe5a123

Browse files
deps(java): bump com.ibm.icu:icu4j from 77.1 to 78.1 (#15417)
* deps(java): bump com.ibm.icu:icu4j from 77.1 to 78.1 Bumps [com.ibm.icu:icu4j](https://github.com/unicode-org/icu) from 77.1 to 78.1. - [Release notes](https://github.com/unicode-org/icu/releases) - [Commits](https://github.com/unicode-org/icu/commits/release-78.1) --- updated-dependencies: - dependency-name: com.ibm.icu:icu4j dependency-version: '78.1' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com> * build: upgrade to icu 78.1 fix build refs to adapt to new tag structure, source tarball filenames, and win32 release binary names. regenerate sources, adapting to property changes * build: gradlew writeLocks * build: gradlew writeChecksums --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Robert Muir <rmuir@apache.org>
1 parent d086036 commit fe5a123

File tree

21 files changed

+91
-38
lines changed

21 files changed

+91
-38
lines changed

gradle/libs.versions.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ groovy = "5.0.2"
2121
# test assertions
2222
hamcrest = "3.0"
2323
# analysis/icu/, gradle regeneration unicode support
24-
icu4j = "77.1"
24+
icu4j = "78.1"
2525
# queryparsers/ grammars
2626
javacc = "7.0.13"
2727
# analysis/ tokenizer grammars
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
{
2-
"lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java": "8e3d5182fcd87cc981977746571a5cde39b6abdc",
3-
"property:icuConfig": "77.1"
2+
"lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java": "bc95fce925f26a56930b282c5fbfeed4318b4792",
3+
"property:icuConfig": "78.1"
44
}

lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@
2424

2525
/**
2626
* This file contains unicode properties used by various {@link CharTokenizer}s. The data was
27-
* generated using ICU4J v77.1.0.0, unicode version: 16.0.0.0.
27+
* generated using ICU4J v78.1.0.0, unicode version: 17.0.0.0.
2828
*/
2929
public final class UnicodeProps {
3030
private UnicodeProps() {}
3131

3232
/** Unicode version that was used to generate this file: {@value} */
33-
public static final String UNICODE_VERSION = "16.0.0.0";
33+
public static final String UNICODE_VERSION = "17.0.0.0";
3434

3535
/** Bitset with Unicode WHITESPACE code points. */
3636
public static final Bits WHITESPACE =

lucene/analysis/icu/build.gradle

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ tasks.register("regenerateUtr30DataFiles", {
107107
ignoreExitValue = false
108108
workingDir utr30DataDir
109109
args = [
110-
"release-${icu4jVersion.replace(".", "-")}"
110+
"release-${icu4jVersion}"
111111
]
112112
}
113113

@@ -171,7 +171,7 @@ tasks.register("compileIcuWindows", {
171171
icuBuildDir.mkdirs()
172172

173173
// Download binaries matching icu4j version in version.props
174-
def src = URI.create("https://github.com/unicode-org/icu/releases/download/release-${v.replace(".", "-")}/icu4c-${v.replace(".", "_")}-Win64-MSVC2019.zip")
174+
def src = URI.create("https://github.com/unicode-org/icu/releases/download/release-${v}/icu4c-${v}-Win64-MSVC2022.zip")
175175

176176
logger.lifecycle("Trying to download binary ICU version: ${v} from:\n ${src}")
177177
Files.write(icuBinZip.toPath(), src.toURL().openStream().bytes)
@@ -193,12 +193,12 @@ tasks.register("compileIcuLinux", {
193193
}
194194

195195
def v = icu4jVersion
196-
def icuSrcTgz = file("${icuBuildDir}/icu4c-${v.replace(".", "_")}-src.tgz")
196+
def icuSrcTgz = file("${icuBuildDir}/icu4c-${v}-sources.tgz")
197197

198198
// Download sources for version matching icu4j version in version.props
199199
if (!icuSrcTgz.exists()) {
200200
icuBuildDir.mkdirs()
201-
def src = URI.create("https://github.com/unicode-org/icu/releases/download/release-${v.replace(".", "-")}/icu4c-${v.replace(".", "_")}-src.tgz")
201+
def src = URI.create("https://github.com/unicode-org/icu/releases/download/release-${v}/icu4c-${v}-sources.tgz")
202202
logger.lifecycle("Trying to download and compile ICU version: ${v} from:\n ${src}")
203203
Files.write(icuSrcTgz.toPath(), src.toURL().openStream().bytes)
204204
logger.lifecycle("Downloaded ${icuSrcTgz.size()} bytes.")

lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@
4848
0384..0385>
4949
0483..0487>
5050
0559>
51-
0591..05A1>
52-
05A3..05BD>
51+
0591..05BD>
5352
05BF>
5453
05C1..05C2>
55-
05C4>
54+
05C4..05C5>
55+
05C7>
5656
064B..0652>
5757
0657..0658>
5858
06DF..06E0>
@@ -118,6 +118,8 @@
118118
1A7F>
119119
1AB0..1ABE>
120120
1AC1..1ACB>
121+
1ACF..1ADD>
122+
1AE0..1AEB>
121123
1B34>
122124
1B44>
123125
1B6B..1B73>
@@ -131,6 +133,7 @@
131133
1CF4>
132134
1CF7..1CF9>
133135
1D2C..1D6A>
136+
1D9B..1DBE>
134137
1DC4..1DCF>
135138
1DF5..1DFF>
136139
1FBD>
@@ -151,6 +154,7 @@ A69C..A69D>
151154
A6F0..A6F1>
152155
A700..A721>
153156
A788..A78A>
157+
A7F1>
154158
A7F8..A7F9>
155159
A806>
156160
A82C>
@@ -184,6 +188,7 @@ FFE3>
184188
10D22..10D27>
185189
10D4E>
186190
10D69..10D6D>
191+
10EFA>
187192
10EFD..10EFF>
188193
10F46..10F50>
189194
10F82..10F85>
@@ -221,6 +226,7 @@ FFE3>
221226
11D42>
222227
11D44..11D45>
223228
11D97>
229+
11DD9>
224230
11F41..11F42>
225231
11F5A>
226232
13447..13455>
@@ -622,9 +628,6 @@ A7FF>004D
622628
# Additional signs and diacritic, from examination of [:Mark:]&[:Lm:]
623629
# Rule: verbatim
624630
0358..035C>
625-
05A2>
626-
05C5>
627-
05C7>
628631
0610..061A>
629632
0640>
630633
06D6..06DE>

lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
660660
11DA7>0037 # GUNJALA GONDI DIGIT SEVEN
661661
11DA8>0038 # GUNJALA GONDI DIGIT EIGHT
662662
11DA9>0039 # GUNJALA GONDI DIGIT NINE
663+
11DE0>0030 # TOLONG SIKI DIGIT ZERO
664+
11DE1>0031 # TOLONG SIKI DIGIT ONE
665+
11DE2>0032 # TOLONG SIKI DIGIT TWO
666+
11DE3>0033 # TOLONG SIKI DIGIT THREE
667+
11DE4>0034 # TOLONG SIKI DIGIT FOUR
668+
11DE5>0035 # TOLONG SIKI DIGIT FIVE
669+
11DE6>0036 # TOLONG SIKI DIGIT SIX
670+
11DE7>0037 # TOLONG SIKI DIGIT SEVEN
671+
11DE8>0038 # TOLONG SIKI DIGIT EIGHT
672+
11DE9>0039 # TOLONG SIKI DIGIT NINE
663673
11F50>0030 # KAWI DIGIT ZERO
664674
11F51>0031 # KAWI DIGIT ONE
665675
11F52>0032 # KAWI DIGIT TWO

lucene/analysis/icu/src/data/utr30/nfc.txt

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#
1010
# Complete data for Unicode NFC normalization.
1111

12-
* Unicode 16.0.0
12+
* Unicode 17.0.0
1313

1414
# Canonical_Combining_Class (ccc) values
1515
0300..0314:230
@@ -222,7 +222,12 @@
222222
1AC3..1AC4:220
223223
1AC5..1AC9:230
224224
1ACA:220
225-
1ACB..1ACE:230
225+
1ACB..1ADC:230
226+
1ADD:220
227+
1AE0..1AE5:230
228+
1AE6:220
229+
1AE7..1AEA:230
230+
1AEB:234
226231
1B34:7
227232
1B44:9
228233
1B6B:230
@@ -321,6 +326,7 @@ FE2E..FE2F:230
321326
10D24..10D27:230
322327
10D69..10D6D:230
323328
10EAB..10EAC:230
329+
10EFA..10EFB:220
324330
10EFD..10EFF:220
325331
10F46..10F47:220
326332
10F48..10F4A:230
@@ -402,6 +408,10 @@ FE2E..FE2F:230
402408
1E4EF:230
403409
1E5EE:230
404410
1E5EF:220
411+
1E6E3:230
412+
1E6E6:230
413+
1E6EE..1E6EF:230
414+
1E6F5:230
405415
1E8D0..1E8D6:220
406416
1E944..1E949:230
407417
1E94A:7

lucene/analysis/icu/src/data/utr30/nfkc.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# to NFKC one-way mappings.
1414
# Use this file as the second gennorm2 input file after nfc.txt.
1515

16-
* Unicode 16.0.0
16+
* Unicode 17.0.0
1717

1818
00A0>0020
1919
00A8>0020 0308
@@ -1367,6 +1367,7 @@
13671367
A69C>044A
13681368
A69D>044C
13691369
A770>A76F
1370+
A7F1>0053
13701371
A7F2>0043
13711372
A7F3>0046
13721373
A7F4>0051

lucene/analysis/icu/src/data/utr30/nfkc_cf.txt

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# and reformatted into syntax for the gennorm2 Normalizer2 data generator tool.
1313
# Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt.
1414

15-
* Unicode 16.0.0
15+
* Unicode 17.0.0
1616

1717
0041>0061
1818
0042>0062
@@ -2465,11 +2465,15 @@ A7C7>A7C8
24652465
A7C9>A7CA
24662466
A7CB>0264
24672467
A7CC>A7CD
2468+
A7CE>A7CF
24682469
A7D0>A7D1
2470+
A7D2>A7D3
2471+
A7D4>A7D5
24692472
A7D6>A7D7
24702473
A7D8>A7D9
24712474
A7DA>A7DB
24722475
A7DC>019B
2476+
A7F1>0073
24732477
A7F2>0063
24742478
A7F3>0066
24752479
A7F4>0071
@@ -4201,6 +4205,31 @@ FFF0..FFF8>
42014205
16E5D>16E7D
42024206
16E5E>16E7E
42034207
16E5F>16E7F
4208+
16EA0>16EBB
4209+
16EA1>16EBC
4210+
16EA2>16EBD
4211+
16EA3>16EBE
4212+
16EA4>16EBF
4213+
16EA5>16EC0
4214+
16EA6>16EC1
4215+
16EA7>16EC2
4216+
16EA8>16EC3
4217+
16EA9>16EC4
4218+
16EAA>16EC5
4219+
16EAB>16EC6
4220+
16EAC>16EC7
4221+
16EAD>16EC8
4222+
16EAE>16EC9
4223+
16EAF>16ECA
4224+
16EB0>16ECB
4225+
16EB1>16ECC
4226+
16EB2>16ECD
4227+
16EB3>16ECE
4228+
16EB4>16ECF
4229+
16EB5>16ED0
4230+
16EB6>16ED1
4231+
16EB7>16ED2
4232+
16EB8>16ED3
42044233
1BCA0..1BCA3>
42054234
1CCD6>0061
42064235
1CCD7>0062
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"lucene/analysis/icu/src/data/uax29/Default.rbbi": "161deef4109a0dcf68a3f52339e973e07ad9579d",
33
"lucene/analysis/icu/src/data/uax29/MyanmarSyllable.rbbi": "fe2ae2b8d556fd64e96f1cd45434035e107ada10",
4-
"lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk": "afa6f8ffbf50054911ba7beaecf878e9a7464876",
5-
"lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk": "6ad4e9443110cb83412a0bff1ba43031c1604cde",
6-
"property:icuConfig": "77.1"
4+
"lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk": "80484c29b39798ae7f927415735c9e5e7b564086",
5+
"lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk": "bc65ccfdffc5597a60b8c1608262be218a4c646f",
6+
"property:icuConfig": "78.1"
77
}

0 commit comments

Comments
 (0)