Skip to content

Commit 46af4f1

Browse files
committed
HTTPCORE-637: RFC 3986 URI: parse/resolve/normalize.
Table-driven ASCII, no regex; correct §5.2.4 dot-segment trailing slash. Drop Rfc3986UriBuilder; helpers @internal; RFC example tests green.
1 parent 1ee9e08 commit 46af4f1

File tree

9 files changed

+1475
-49
lines changed

9 files changed

+1475
-49
lines changed

httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java

Lines changed: 7 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,14 @@
3636
import java.util.Arrays;
3737
import java.util.BitSet;
3838
import java.util.Collections;
39-
import java.util.LinkedList;
4039
import java.util.List;
4140

4241
import org.apache.hc.core5.http.HttpHost;
4342
import org.apache.hc.core5.http.NameValuePair;
4443
import org.apache.hc.core5.http.URIScheme;
4544
import org.apache.hc.core5.http.message.BasicNameValuePair;
4645
import org.apache.hc.core5.http.message.ParserCursor;
46+
import org.apache.hc.core5.net.uri.Rfc3986Uri;
4747
import org.apache.hc.core5.util.Args;
4848
import org.apache.hc.core5.util.TextUtils;
4949
import org.apache.hc.core5.util.Tokenizer;
@@ -1118,58 +1118,16 @@ public URIBuilder normalizeSyntax() {
11181118
* @since 5.3
11191119
*/
11201120
public URIBuilder optimize() {
1121-
final String scheme = this.scheme;
1122-
if (scheme != null) {
1123-
this.scheme = TextUtils.toLowerCase(scheme);
1124-
}
1125-
1126-
if (this.pathRootless) {
1121+
final String raw = this.toString();
1122+
try {
1123+
final Rfc3986Uri u = Rfc3986Uri.parse(raw).optimize();
1124+
return new URIBuilder(u.toString());
1125+
} catch (final IllegalArgumentException | URISyntaxException ex) {
11271126
return this;
11281127
}
1129-
1130-
// Force Percent-Encoding re-encoding
1131-
this.encodedSchemeSpecificPart = null;
1132-
this.encodedAuthority = null;
1133-
this.encodedUserInfo = null;
1134-
this.encodedPath = null;
1135-
this.encodedQuery = null;
1136-
this.encodedFragment = null;
1137-
1138-
final String host = this.host;
1139-
if (host != null) {
1140-
this.host = TextUtils.toLowerCase(host);
1141-
}
1142-
1143-
if (this.pathSegments != null) {
1144-
final List<String> inputSegments = this.pathSegments;
1145-
if (!inputSegments.isEmpty()) {
1146-
final LinkedList<String> outputSegments = new LinkedList<>();
1147-
for (final String inputSegment : inputSegments) {
1148-
if (!inputSegment.isEmpty() && !".".equals(inputSegment)) {
1149-
if ("..".equals(inputSegment)) {
1150-
if (!outputSegments.isEmpty()) {
1151-
outputSegments.removeLast();
1152-
}
1153-
} else {
1154-
outputSegments.addLast(inputSegment);
1155-
}
1156-
}
1157-
}
1158-
if (!inputSegments.isEmpty()) {
1159-
final String lastSegment = inputSegments.get(inputSegments.size() - 1);
1160-
if (lastSegment.isEmpty()) {
1161-
outputSegments.addLast("");
1162-
}
1163-
}
1164-
this.pathSegments = outputSegments;
1165-
} else {
1166-
this.pathSegments = Collections.singletonList("");
1167-
}
1168-
}
1169-
1170-
return this;
11711128
}
11721129

1130+
11731131
/**
11741132
* Converts this instance to a URI string.
11751133
*
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
/*
2+
* ====================================================================
3+
* Licensed to the Apache Software Foundation (ASF) under one
4+
* or more contributor license agreements. See the NOTICE file
5+
* distributed with this work for additional information
6+
* regarding copyright ownership. The ASF licenses this file
7+
* to you under the Apache License, Version 2.0 (the
8+
* "License"); you may not use this file except in compliance
9+
* with the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing,
14+
* software distributed under the License is distributed on an
15+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
* KIND, either express or implied. See the License for the
17+
* specific language governing permissions and limitations
18+
* under the License.
19+
* ====================================================================
20+
*
21+
* This software consists of voluntary contributions made by many
22+
* individuals on behalf of the Apache Software Foundation. For more
23+
* information on the Apache Software Foundation, please see
24+
* <http://www.apache.org/>.
25+
*
26+
*/
27+
package org.apache.hc.core5.net.uri;
28+
29+
import org.apache.hc.core5.annotation.Contract;
30+
import org.apache.hc.core5.annotation.Internal;
31+
import org.apache.hc.core5.annotation.ThreadingBehavior;
32+
33+
/**
34+
* ASCII classification & utilities backed by a compact lookup table.
35+
*
36+
*/
37+
@Internal
38+
@Contract(threading = ThreadingBehavior.STATELESS)
39+
final class Ascii {
40+
41+
private static final byte ALPHA = 1 << 0;
42+
private static final byte DIGIT = 1 << 1;
43+
private static final byte UNRESERVED = 1 << 2;
44+
private static final byte SUBDELIM = 1 << 3;
45+
private static final byte GENDELIM = 1 << 4;
46+
private static final byte HEXDIGIT = 1 << 5;
47+
private static final byte PCHAR_EX = 1 << 6; // ':' or '@'
48+
49+
private static final byte[] CLASS = new byte[128];
50+
51+
static {
52+
// alpha
53+
for (int c = 'A'; c <= 'Z'; c++) {
54+
CLASS[c] |= ALPHA;
55+
}
56+
for (int c = 'a'; c <= 'z'; c++) {
57+
CLASS[c] |= ALPHA;
58+
}
59+
// digit
60+
for (int c = '0'; c <= '9'; c++) {
61+
CLASS[c] |= DIGIT | HEXDIGIT;
62+
}
63+
for (int c = 'A'; c <= 'F'; c++) {
64+
CLASS[c] |= HEXDIGIT;
65+
}
66+
for (int c = 'a'; c <= 'f'; c++) {
67+
CLASS[c] |= HEXDIGIT;
68+
}
69+
70+
// unreserved = alpha / digit / "-" / "." / "_" / "~"
71+
set('-', UNRESERVED);
72+
set('.', UNRESERVED);
73+
set('_', UNRESERVED);
74+
set('~', UNRESERVED);
75+
for (int c = 0; c < 128; c++) {
76+
if ((CLASS[c] & (ALPHA | DIGIT)) != 0) {
77+
CLASS[c] |= UNRESERVED;
78+
}
79+
}
80+
81+
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
82+
for (final char ch : new char[]{'!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '='}) {
83+
set(ch, SUBDELIM);
84+
}
85+
86+
// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
87+
for (final char ch : new char[]{':', '/', '?', '#', '[', ']', '@'}) {
88+
set(ch, GENDELIM);
89+
}
90+
91+
// pchar extras (':' '@') – used to define pchar without allowing raw '%'
92+
set(':', PCHAR_EX);
93+
set('@', PCHAR_EX);
94+
}
95+
96+
private static void set(final char c, final byte mask) {
97+
CLASS[c] |= mask;
98+
}
99+
100+
static boolean isAscii(final int c) {
101+
return (c & ~0x7F) == 0;
102+
}
103+
104+
static boolean isAlpha(final int c) {
105+
return isAscii(c) && (CLASS[c] & ALPHA) != 0;
106+
}
107+
108+
static boolean isDigit(final int c) {
109+
return isAscii(c) && (CLASS[c] & DIGIT) != 0;
110+
}
111+
112+
static boolean isHexDigit(final int c) {
113+
return isAscii(c) && (CLASS[c] & HEXDIGIT) != 0;
114+
}
115+
116+
static boolean isUnreserved(final int c) {
117+
return isAscii(c) && (CLASS[c] & UNRESERVED) != 0;
118+
}
119+
120+
static boolean isSubDelim(final int c) {
121+
return isAscii(c) && (CLASS[c] & SUBDELIM) != 0;
122+
}
123+
124+
static boolean isGenDelim(final int c) {
125+
return isAscii(c) && (CLASS[c] & GENDELIM) != 0;
126+
}
127+
128+
/**
129+
* pchar = unreserved / sub-delims / ":" / "@"; '%' not included (reserved for pct-encoding).
130+
*/
131+
static boolean isPchar(final int c) {
132+
return isAscii(c) && ((CLASS[c] & UNRESERVED) != 0 || (CLASS[c] & SUBDELIM) != 0 || (CLASS[c] & PCHAR_EX) != 0);
133+
}
134+
135+
/**
136+
* Lower-case ASCII letters; leaves non-ASCII untouched.
137+
*/
138+
static String lowerAscii(final String s) {
139+
if (s == null) {
140+
return null;
141+
}
142+
boolean any = false;
143+
for (int i = 0; i < s.length(); i++) {
144+
final char ch = s.charAt(i);
145+
if (ch >= 'A' && ch <= 'Z') {
146+
any = true;
147+
break;
148+
}
149+
}
150+
if (!any) {
151+
return s;
152+
}
153+
final char[] a = s.toCharArray();
154+
for (int i = 0; i < a.length; i++) {
155+
final char ch = a[i];
156+
if (ch >= 'A' && ch <= 'Z') {
157+
a[i] = (char) (ch + 32);
158+
}
159+
}
160+
return new String(a);
161+
}
162+
163+
private Ascii() {
164+
}
165+
}

0 commit comments

Comments
 (0)