Skip to content

Commit effa8ed

Browse files
committed
HTTPCORE-637: RFC 3986 URI: parse/resolve/normalize.
Table-driven ASCII, no regex; correct §5.2.4 dot-segment trailing slash. Drop Rfc3986UriBuilder; helpers @internal; RFC example tests green.
1 parent 1ee9e08 commit effa8ed

File tree

9 files changed

+1417
-0
lines changed

9 files changed

+1417
-0
lines changed
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
/*
2+
* ====================================================================
3+
* Licensed to the Apache Software Foundation (ASF) under one
4+
* or more contributor license agreements. See the NOTICE file
5+
* distributed with this work for additional information
6+
* regarding copyright ownership. The ASF licenses this file
7+
* to you under the Apache License, Version 2.0 (the
8+
* "License"); you may not use this file except in compliance
9+
* with the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing,
14+
* software distributed under the License is distributed on an
15+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
* KIND, either express or implied. See the License for the
17+
* specific language governing permissions and limitations
18+
* under the License.
19+
* ====================================================================
20+
*
21+
* This software consists of voluntary contributions made by many
22+
* individuals on behalf of the Apache Software Foundation. For more
23+
* information on the Apache Software Foundation, please see
24+
* <http://www.apache.org/>.
25+
*
26+
*/
27+
package org.apache.hc.core5.net.uri;
28+
29+
import org.apache.hc.core5.annotation.Contract;
30+
import org.apache.hc.core5.annotation.Internal;
31+
import org.apache.hc.core5.annotation.ThreadingBehavior;
32+
33+
/**
34+
* ASCII classification & utilities backed by a compact lookup table.
35+
*
36+
*/
37+
@Internal
38+
@Contract(threading = ThreadingBehavior.STATELESS)
39+
final class Ascii {
40+
41+
private static final byte ALPHA = 1 << 0;
42+
private static final byte DIGIT = 1 << 1;
43+
private static final byte UNRESERVED = 1 << 2;
44+
private static final byte SUBDELIM = 1 << 3;
45+
private static final byte GENDELIM = 1 << 4;
46+
private static final byte HEXDIGIT = 1 << 5;
47+
private static final byte PCHAR_EX = 1 << 6; // ':' or '@'
48+
49+
private static final byte[] CLASS = new byte[128];
50+
51+
static {
52+
// alpha
53+
for (int c = 'A'; c <= 'Z'; c++) {
54+
CLASS[c] |= ALPHA;
55+
}
56+
for (int c = 'a'; c <= 'z'; c++) {
57+
CLASS[c] |= ALPHA;
58+
}
59+
// digit
60+
for (int c = '0'; c <= '9'; c++) {
61+
CLASS[c] |= DIGIT | HEXDIGIT;
62+
}
63+
for (int c = 'A'; c <= 'F'; c++) {
64+
CLASS[c] |= HEXDIGIT;
65+
}
66+
for (int c = 'a'; c <= 'f'; c++) {
67+
CLASS[c] |= HEXDIGIT;
68+
}
69+
70+
// unreserved = alpha / digit / "-" / "." / "_" / "~"
71+
set('-', UNRESERVED);
72+
set('.', UNRESERVED);
73+
set('_', UNRESERVED);
74+
set('~', UNRESERVED);
75+
for (int c = 0; c < 128; c++) {
76+
if ((CLASS[c] & (ALPHA | DIGIT)) != 0) {
77+
CLASS[c] |= UNRESERVED;
78+
}
79+
}
80+
81+
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
82+
for (final char ch : new char[]{'!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '='}) {
83+
set(ch, SUBDELIM);
84+
}
85+
86+
// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
87+
for (final char ch : new char[]{':', '/', '?', '#', '[', ']', '@'}) {
88+
set(ch, GENDELIM);
89+
}
90+
91+
// pchar extras (':' '@') – used to define pchar without allowing raw '%'
92+
set(':', PCHAR_EX);
93+
set('@', PCHAR_EX);
94+
}
95+
96+
private static void set(final char c, final byte mask) {
97+
CLASS[c] |= mask;
98+
}
99+
100+
static boolean isAscii(final int c) {
101+
return (c & ~0x7F) == 0;
102+
}
103+
104+
static boolean isAlpha(final int c) {
105+
return isAscii(c) && (CLASS[c] & ALPHA) != 0;
106+
}
107+
108+
static boolean isDigit(final int c) {
109+
return isAscii(c) && (CLASS[c] & DIGIT) != 0;
110+
}
111+
112+
static boolean isHexDigit(final int c) {
113+
return isAscii(c) && (CLASS[c] & HEXDIGIT) != 0;
114+
}
115+
116+
static boolean isUnreserved(final int c) {
117+
return isAscii(c) && (CLASS[c] & UNRESERVED) != 0;
118+
}
119+
120+
static boolean isSubDelim(final int c) {
121+
return isAscii(c) && (CLASS[c] & SUBDELIM) != 0;
122+
}
123+
124+
static boolean isGenDelim(final int c) {
125+
return isAscii(c) && (CLASS[c] & GENDELIM) != 0;
126+
}
127+
128+
/**
129+
* pchar = unreserved / sub-delims / ":" / "@"; '%' not included (reserved for pct-encoding).
130+
*/
131+
static boolean isPchar(final int c) {
132+
return isAscii(c) && ((CLASS[c] & UNRESERVED) != 0 || (CLASS[c] & SUBDELIM) != 0 || (CLASS[c] & PCHAR_EX) != 0);
133+
}
134+
135+
/**
136+
* Lower-case ASCII letters; leaves non-ASCII untouched.
137+
*/
138+
static String lowerAscii(final String s) {
139+
if (s == null) {
140+
return null;
141+
}
142+
boolean any = false;
143+
for (int i = 0; i < s.length(); i++) {
144+
final char ch = s.charAt(i);
145+
if (ch >= 'A' && ch <= 'Z') {
146+
any = true;
147+
break;
148+
}
149+
}
150+
if (!any) {
151+
return s;
152+
}
153+
final char[] a = s.toCharArray();
154+
for (int i = 0; i < a.length; i++) {
155+
final char ch = a[i];
156+
if (ch >= 'A' && ch <= 'Z') {
157+
a[i] = (char) (ch + 32);
158+
}
159+
}
160+
return new String(a);
161+
}
162+
163+
private Ascii() {
164+
}
165+
}
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/*
2+
* ====================================================================
3+
* Licensed to the Apache Software Foundation (ASF) under one
4+
* or more contributor license agreements. See the NOTICE file
5+
* distributed with this work for additional information
6+
* regarding copyright ownership. The ASF licenses this file
7+
* to you under the Apache License, Version 2.0 (the
8+
* "License"); you may not use this file except in compliance
9+
* with the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing,
14+
* software distributed under the License is distributed on an
15+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
* KIND, either express or implied. See the License for the
17+
* specific language governing permissions and limitations
18+
* under the License.
19+
* ====================================================================
20+
*
21+
* This software consists of voluntary contributions made by many
22+
* individuals on behalf of the Apache Software Foundation. For more
23+
* information on the Apache Software Foundation, please see
24+
* <http://www.apache.org/>.
25+
*
26+
*/
27+
package org.apache.hc.core5.net.uri;
28+
29+
import java.util.ArrayDeque;
30+
import java.util.Deque;
31+
32+
import org.apache.hc.core5.annotation.Contract;
33+
import org.apache.hc.core5.annotation.Internal;
34+
import org.apache.hc.core5.annotation.ThreadingBehavior;
35+
36+
/**
37+
* RFC 3986 §5.2.4 dot-segment removal.
38+
* <p>
39+
* - Preserves empty segments inside the path (e.g. {@code "/a//b"}).
40+
* - Does <strong>not</strong> preserve the artificial leading empty segment of absolute paths.
41+
* - Ensures a trailing slash when the terminal segment is {@code "."} or {@code ".."},
42+
* except for the pure relative {@code ".."} (no trailing slash).
43+
*
44+
*/
45+
@Internal
46+
@Contract(threading = ThreadingBehavior.STATELESS)
47+
final class DotSegments {
48+
49+
static String remove(final String path) {
50+
if (path == null || path.isEmpty()) {
51+
return path == null ? null : "";
52+
}
53+
54+
final boolean absolute = path.startsWith("/");
55+
final boolean hadTrailingSlash = path.endsWith("/");
56+
57+
final Deque<String> out = new ArrayDeque<>();
58+
59+
int i = 0;
60+
final int n = path.length();
61+
boolean firstSegment = true; // suppress the artificial leading "" for absolute paths
62+
boolean forceTrailingSlash = false; // terminal "." or ".." wants slash in most cases
63+
64+
while (i <= n) {
65+
final int j = i < n ? path.indexOf('/', i) : -1;
66+
67+
final String seg;
68+
if (j == -1) {
69+
seg = path.substring(i, n);
70+
i = n + 1;
71+
} else {
72+
seg = path.substring(i, j);
73+
i = j + 1;
74+
}
75+
76+
// Skip the artificial leading empty segment for absolute paths.
77+
if (firstSegment && absolute && seg.isEmpty()) {
78+
firstSegment = false;
79+
if (j == -1) {
80+
break; // path was "/" only
81+
}
82+
continue;
83+
}
84+
firstSegment = false;
85+
86+
final boolean isLast = j == -1;
87+
88+
if (seg.equals(".")) {
89+
// Drop "."; if last, remember to add trailing slash (except for empty relative).
90+
if (isLast && (absolute || !out.isEmpty())) {
91+
forceTrailingSlash = true;
92+
}
93+
} else if (seg.equals("..")) {
94+
if (!out.isEmpty()) {
95+
final String last = out.peekLast();
96+
if (!last.equals("..")) {
97+
out.removeLast();
98+
} else if (!absolute) {
99+
out.addLast("..");
100+
}
101+
} else if (!absolute) {
102+
out.addLast("..");
103+
}
104+
// Terminal ".." prefers trailing slash, but not for pure relative "..".
105+
if (isLast && (absolute || !out.isEmpty())) {
106+
forceTrailingSlash = true;
107+
}
108+
} else {
109+
// Normal (and internal empty) segments preserved.
110+
out.addLast(seg);
111+
}
112+
113+
if (j == -1) {
114+
break;
115+
}
116+
}
117+
118+
// Rebuild
119+
final StringBuilder b = new StringBuilder(path.length());
120+
if (absolute) {
121+
b.append('/');
122+
}
123+
boolean first = true;
124+
for (final String seg : out) {
125+
if (!first) {
126+
b.append('/');
127+
}
128+
b.append(seg);
129+
first = false;
130+
}
131+
132+
// Keep original trailing slash OR add one for terminal "."/".."
133+
// BUT: do not add for pure relative ".." (i.e., out = [".."], not absolute, and original had no trailing slash).
134+
final boolean wantsTrailing =
135+
hadTrailingSlash
136+
|| forceTrailingSlash && (absolute || !out.isEmpty() && !"..".equals(out.peekLast()));
137+
138+
if (wantsTrailing && (b.length() == 0 || b.charAt(b.length() - 1) != '/')) {
139+
b.append('/');
140+
}
141+
142+
// Absolute path that reduced to empty -> "/"
143+
if (absolute && b.length() == 0) {
144+
b.append('/');
145+
}
146+
147+
return b.toString();
148+
}
149+
150+
private DotSegments() {
151+
}
152+
}

0 commit comments

Comments
 (0)