Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Sources/RawStructuredFieldValues/ASCII.swift
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,12 @@ let asciiSlash = UInt8(ascii: "/")
let asciiPeriod = UInt8(ascii: ".")
let asciiComma = UInt8(ascii: ",")
let asciiCapitalA = UInt8(ascii: "A")
let asciiCapitalF = UInt8(ascii: "F")
let asciiCapitalZ = UInt8(ascii: "Z")
let asciiLowerA = UInt8(ascii: "a")
let asciiLowerF = UInt8(ascii: "f")
let asciiLowerZ = UInt8(ascii: "z")
let asciiCapitals = asciiCapitalA...asciiCapitalZ
let asciiLowercases = asciiLowerA...asciiLowerZ
let asciiHexCapitals = asciiCapitalA...asciiCapitalF
let asciiHexLowercases = asciiLowerA...asciiLowerF
5 changes: 5 additions & 0 deletions Sources/RawStructuredFieldValues/ComponentTypes.swift
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ extension BareItem {

case .date:
throw StructuredHeaderError.invalidItem
case .displayString:
throw StructuredHeaderError.invalidItem
}
}
}
Expand Down Expand Up @@ -141,6 +143,9 @@ public enum RFC9651BareItem: Sendable {

/// A date item.
case date(Int)

/// A display string item.
case displayString(String)
}

extension RFC9651BareItem: ExpressibleByBooleanLiteral {
Expand Down
2 changes: 2 additions & 0 deletions Sources/RawStructuredFieldValues/Errors.swift
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ public struct StructuredHeaderError: Error, Sendable {
case invalidBoolean
case invalidToken
case invalidDate
case invalidDisplayString
case invalidList
case invalidDictionary
case missingKey
Expand All @@ -53,6 +54,7 @@ extension StructuredHeaderError {
public static let invalidBoolean = StructuredHeaderError(.invalidBoolean)
public static let invalidToken = StructuredHeaderError(.invalidToken)
public static let invalidDate = StructuredHeaderError(.invalidDate)
public static let invalidDisplayString = StructuredHeaderError(.invalidDisplayString)
public static let invalidList = StructuredHeaderError(.invalidList)
public static let invalidDictionary = StructuredHeaderError(.invalidDictionary)
public static let missingKey = StructuredHeaderError(.missingKey)
Expand Down
119 changes: 119 additions & 0 deletions Sources/RawStructuredFieldValues/FieldParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ extension StructuredFieldValueParser {
return try self._parseAToken()
case asciiAt:
return try self._parseADate()
case asciiPercent:
return try self._parseADisplayString()
default:
throw StructuredHeaderError.invalidItem
}
Expand Down Expand Up @@ -491,6 +493,87 @@ extension StructuredFieldValueParser {
return try self._parseAnIntegerOrDecimal(isDate: true)
}

private mutating func _parseADisplayString() throws -> RFC9651BareItem {
assert(self.underlyingData.first == asciiPercent)
self.underlyingData.consumeFirst()

guard self.underlyingData.first == asciiDquote else {
throw StructuredHeaderError.invalidDisplayString
}

self.underlyingData.consumeFirst()

var byteArray = [UInt8]()

while let char = self.underlyingData.first {
self.underlyingData.consumeFirst()

switch char {
case 0x00...0x1F, 0x7F...:
throw StructuredHeaderError.invalidDisplayString
case asciiPercent:
if self.underlyingData.count < 2 {
throw StructuredHeaderError.invalidDisplayString
}

let octetHex = EncodedHex(ArraySlice(self.underlyingData.prefix(2)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the ArraySlice constructor should be necessary.

Suggested change
let octetHex = EncodedHex(ArraySlice(self.underlyingData.prefix(2)))
let octetHex = EncodedHex(self.underlyingData.prefix(2))


self.underlyingData = self.underlyingData.dropFirst(2)

guard let octet = octetHex.decode() else {
throw StructuredHeaderError.invalidDisplayString
}

byteArray.append(octet)
case asciiDquote:
#if compiler(>=6.0)
if #available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *) {
let unicodeSequence = String(validating: byteArray, as: UTF8.self)

guard let unicodeSequence else {
throw StructuredHeaderError.invalidDisplayString
}

return .displayString(unicodeSequence)
} else {
return try _decodeDisplayString(byteArray: &byteArray)
}
#else
return try _decodeDisplayString(byteArray: &byteArray)
#endif
default:
byteArray.append(char)
}
}

// Fail parsing — reached the end of the string without finding a closing DQUOTE.
throw StructuredHeaderError.invalidDisplayString
}

/// This method is called in environments where `String(validating:as:)` is unavailable. It uses
/// `String(validatingUTF8:)` which requires `byteArray` to be null terminated. `String(validating:as:)`
/// does not require that requirement. Therefore, it does not perform null checks, which makes it more optimal.
private func _decodeDisplayString(byteArray: inout [UInt8]) throws -> RFC9651BareItem {
// String(validatingUTF8:) requires byteArray to be null-terminated.
byteArray.append(0)

let unicodeSequence = try byteArray.withUnsafeBytes {
try $0.withMemoryRebound(to: CChar.self) {
guard let baseAddress = $0.baseAddress else {
throw StructuredHeaderError.invalidDisplayString
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't need this check: the base address will never be nil here.


return String(validatingUTF8: baseAddress)
}
}

guard let unicodeSequence else {
throw StructuredHeaderError.invalidDisplayString
}

return .displayString(unicodeSequence)
}

private mutating func _parseParameters() throws -> OrderedMap<Key, RFC9651BareItem> {
var parameters = OrderedMap<Key, RFC9651BareItem>()

Expand Down Expand Up @@ -643,3 +726,39 @@ extension StrippingStringEscapesCollection.Index: Comparable {
lhs._baseIndex < rhs._baseIndex
}
}

/// `EncodedHex` represents a (possibly invalid) hex value in UTF8.
struct EncodedHex {
private(set) var firstChar: UInt8
private(set) var secondChar: UInt8

init(_ slice: ArraySlice<UInt8>) {
precondition(slice.count == 2)
self.firstChar = slice[slice.startIndex]
self.secondChar = slice[slice.index(after: slice.startIndex)]
}

/// Validates and converts `EncodedHex` to a base 10 UInt8.
///
/// If `EncodedHex` does not represent a valid hex value, the result of this method is nil.
fileprivate func decode() -> UInt8? {
guard
let firstCharAsInteger = self.htoi(self.firstChar),
let secondCharAsInteger = self.htoi(self.secondChar)
else { return nil }

return (firstCharAsInteger << 4) + secondCharAsInteger
}

/// Converts a hex character given in UTF8 to its integer value.
private func htoi(_ asciiChar: UInt8) -> UInt8? {
switch asciiChar {
case asciiZero...asciiNine:
return asciiChar - asciiZero
case asciiLowerA...asciiLowerF:
return asciiChar - asciiLowerA + 10
default:
return nil
}
}
}
38 changes: 38 additions & 0 deletions Sources/RawStructuredFieldValues/FieldSerializer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,29 @@ extension StructuredFieldValueSerializer {
}

self.data.append(contentsOf: String(date, radix: 10).utf8)
case .displayString(let displayString):
let bytes = displayString.utf8

self.data.append(asciiPercent)
self.data.append(asciiDquote)

for byte in bytes {
if byte == asciiPercent
|| byte == asciiDquote
|| (0x00...0x1F).contains(byte)
|| (0x7F...).contains(byte)
{
self.data.append(asciiPercent)

let encodedByte = UInt8.encodeToHex(byte)
self.data.append(encodedByte.firstChar)
self.data.append(encodedByte.secondChar)
} else {
self.data.append(byte)
}
}

self.data.append(asciiDquote)
}
}
}
Expand Down Expand Up @@ -245,3 +268,18 @@ extension String {
}
}
}

extension UInt8 {
/// Converts an integer in base 10 to hex of type `EncodedHex`.
fileprivate static func encodeToHex(_ int: Self) -> EncodedHex {
let firstChar = self.itoh(int >> 4)
let secondChar = self.itoh(int & 0x0F)

return EncodedHex([firstChar, secondChar])
}

/// Converts an integer to its hex character in UTF8.
private static func itoh(_ int: Self) -> Self {
(int > 9) ? (asciiLowerA + int - 10) : (asciiZero + int)
}
}
2 changes: 2 additions & 0 deletions Sources/sh-parser/main.swift
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ extension RFC9651BareItem {
return "decimal \(d)"
case .date(let date):
return "date \(date)"
case .displayString(let displayString):
return "display string \(displayString)"
}
}
}
Expand Down
18 changes: 18 additions & 0 deletions Tests/StructuredFieldValuesTests/StructuredFieldParserTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,24 @@ final class StructuredFieldParserTests: XCTestCase {

XCTAssertEqual(typeName, "date", "\(fixtureName): Expected type date, got type \(typeName)")
XCTAssertEqual(typeValue, baseDate, "\(fixtureName): Got \(baseDate), expected \(typeValue)")
case (.displayString(let baseDisplayString), .dictionary(let typeDictionary)):
guard typeDictionary.count == 2, case .string(let typeName) = typeDictionary["__type"],
case .string(let typeValue) = typeDictionary["value"]
else {
XCTFail("\(fixtureName): Unexpected type dict \(typeDictionary)")
return
}

XCTAssertEqual(
typeName,
"displaystring",
"\(fixtureName): Expected type displaystring, got type \(typeName)"
)
XCTAssertEqual(
typeValue,
baseDisplayString,
"\(fixtureName): Got \(baseDisplayString), expected \(typeValue)"
)
default:
XCTFail("\(fixtureName): Got \(bareItem), expected \(schema)")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,9 @@ extension RFC9651BareItem {
case (.some(.string("date")), .some(.integer(let value))):
self = .date(value)

case (.some(.string("displaystring")), .some(.string(let value))):
self = .displayString(value)

default:
preconditionFailure("Unexpected type object \(typeObject)")
}
Expand Down
111 changes: 111 additions & 0 deletions Tests/TestFixtures/display-string.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
[
{
"name": "basic display string (ascii content)",
"raw": ["%\"foo bar\""],
"header_type": "item",
"expected": [{"__type": "displaystring", "value": "foo bar"}, {}]
},
{
"name": "all printable ascii",
"raw": ["%\" !%22#$%25&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\""],
"header_type": "item",
"expected": [{"__type": "displaystring", "value": " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"}, {}]
},
{
"name": "non-ascii display string (uppercase escaping)",
"raw": ["%\"f%C3%BC%C3%BC\""],
"canonical": ["%\"f%c3%bc%c3%bc\""],
"header_type": "item",
"must_fail": true
},
{
"name": "non-ascii display string (lowercase escaping)",
"raw": ["%\"f%c3%bc%c3%bc\""],
"header_type": "item",
"expected": [{"__type": "displaystring", "value": "füü"}, {}]
},
{
"name": "tab in display string",
"raw": ["%\"\t\""],
"header_type": "item",
"must_fail": true
},
{
"name": "newline in display string",
"raw": ["%\"\n\""],
"header_type": "item",
"must_fail": true
},
{
"name": "single quoted display string",
"raw": ["%'foo'"],
"header_type": "item",
"must_fail": true
},
{
"name": "unquoted display string",
"raw": ["%foo"],
"header_type": "item",
"must_fail": true
},
{
"name": "display string missing initial quote",
"raw": ["%foo\""],
"header_type": "item",
"must_fail": true
},
{
"name": "unbalanced display string",
"raw": ["%\"foo"],
"header_type": "item",
"must_fail": true
},
{
"name": "display string quoting",
"raw": ["%\"foo %22bar%22 \\ baz\""],
"header_type": "item",
"expected": [{"__type": "displaystring", "value": "foo \"bar\" \\ baz"}, {}]
},
{
"name": "bad display string escaping",
"raw": ["%\"foo %a"],
"header_type": "item",
"must_fail": true
},
{
"name": "bad display string utf-8 (invalid 2-byte seq)",
"raw": ["%\"%c3%28\""],
"header_type": "item",
"must_fail": true
},
{
"name": "bad display string utf-8 (invalid sequence id)",
"raw": ["%\"%a0%a1\""],
"header_type": "item",
"must_fail": true
},
{
"name": "bad display string utf-8 (invalid hex)",
"raw": ["%\"%g0%1w\""],
"header_type": "item",
"must_fail": true
},
{
"name": "bad display string utf-8 (invalid 3-byte seq)",
"raw": ["%\"%e2%28%a1\""],
"header_type": "item",
"must_fail": true
},
{
"name": "bad display string utf-8 (invalid 4-byte seq)",
"raw": ["%\"%f0%28%8c%28\""],
"header_type": "item",
"must_fail": true
},
{
"name": "BOM in display string",
"raw": ["%\"BOM: %ef%bb%bf\""],
"header_type": "item",
"expected": [{"__type": "displaystring", "value": "BOM: \uFEFF"}, {}]
}
]
Loading