Skip to content

Commit 36906ed

Browse files
authored
Enables linter rule UP006 (#2744)
<!-- Thanks for opening a pull request! --> Part of #2700 <!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. --> <!-- Closes #${GITHUB_ISSUE_ID} --> # Rationale for this change This enables linter rule UP006. Most of these changes revolve around using native-Python types instead of using typing imports. This is a big one, sorry! It seems easier just to do this in a single PR (considering how mechanical this process is) as opposed to breaking it up. ## Are these changes tested? `make lint` and `make test` should pass. ## Are there any user-facing changes? None. <!-- In the case of user-facing changes, please add the changelog label. -->
1 parent 60ebe93 commit 36906ed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+1020
-1103
lines changed

pyiceberg/avro/codecs/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
from __future__ import annotations
2828

29-
from typing import Dict, Literal, Type
29+
from typing import Literal
3030

3131
from typing_extensions import TypeAlias
3232

@@ -40,7 +40,7 @@
4040

4141
AVRO_CODEC_KEY = "avro.codec"
4242

43-
KNOWN_CODECS: Dict[AvroCompressionCodec, Type[Codec] | None] = {
43+
KNOWN_CODECS: dict[AvroCompressionCodec, type[Codec] | None] = {
4444
"null": None,
4545
"bzip2": BZip2Codec,
4646
"snappy": SnappyCodec,
@@ -49,4 +49,4 @@
4949
}
5050

5151
# Map to convert the naming from Iceberg to Avro
52-
CODEC_MAPPING_ICEBERG_TO_AVRO: Dict[str, str] = {"gzip": "deflate", "zstd": "zstandard"}
52+
CODEC_MAPPING_ICEBERG_TO_AVRO: dict[str, str] = {"gzip": "deflate", "zstd": "zstandard"}

pyiceberg/avro/decoder.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,6 @@
1818
from abc import ABC, abstractmethod
1919
from io import SEEK_CUR
2020
from typing import (
21-
Dict,
22-
List,
23-
Tuple,
2421
cast,
2522
)
2623

@@ -67,11 +64,11 @@ def read_int(self) -> int:
6764
datum = (n >> 1) ^ -(n & 1)
6865
return datum
6966

70-
def read_ints(self, n: int) -> Tuple[int, ...]:
67+
def read_ints(self, n: int) -> tuple[int, ...]:
7168
"""Read a list of integers."""
7269
return tuple(self.read_int() for _ in range(n))
7370

74-
def read_int_bytes_dict(self, n: int, dest: Dict[int, bytes]) -> None:
71+
def read_int_bytes_dict(self, n: int, dest: dict[int, bytes]) -> None:
7572
"""Read a dictionary of integers for keys and bytes for values into a destination dictionary."""
7673
for _ in range(n):
7774
k = self.read_int()
@@ -85,7 +82,7 @@ def read_float(self) -> float:
8582
The float is converted into a 32-bit integer using a method equivalent to
8683
Java's floatToIntBits and then encoded in little-endian format.
8784
"""
88-
return float(cast(Tuple[float, ...], STRUCT_FLOAT.unpack(self.read(4)))[0])
85+
return float(cast(tuple[float, ...], STRUCT_FLOAT.unpack(self.read(4)))[0])
8986

9087
def read_double(self) -> float:
9188
"""Read a value from the stream as a double.
@@ -94,7 +91,7 @@ def read_double(self) -> float:
9491
The double is converted into a 64-bit integer using a method equivalent to
9592
Java's doubleToLongBits and then encoded in little-endian format.
9693
"""
97-
return float(cast(Tuple[float, ...], STRUCT_DOUBLE.unpack(self.read(8)))[0])
94+
return float(cast(tuple[float, ...], STRUCT_DOUBLE.unpack(self.read(8)))[0])
9895

9996
def read_bytes(self) -> bytes:
10097
"""Bytes are encoded as a long followed by that many bytes of data."""
@@ -152,7 +149,7 @@ def read(self, n: int) -> bytes:
152149
"""Read n bytes."""
153150
if n < 0:
154151
raise ValueError(f"Requested {n} bytes to read, expected positive integer.")
155-
data: List[bytes] = []
152+
data: list[bytes] = []
156153

157154
n_remaining = n
158155
while n_remaining > 0:

pyiceberg/avro/file.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,7 @@
2727
from types import TracebackType
2828
from typing import (
2929
Callable,
30-
Dict,
3130
Generic,
32-
List,
33-
Type,
3431
TypeVar,
3532
)
3633

@@ -77,14 +74,14 @@ def magic(self) -> bytes:
7774
return self._data[0]
7875

7976
@property
80-
def meta(self) -> Dict[str, str]:
77+
def meta(self) -> dict[str, str]:
8178
return self._data[1]
8279

8380
@property
8481
def sync(self) -> bytes:
8582
return self._data[2]
8683

87-
def compression_codec(self) -> Type[Codec] | None:
84+
def compression_codec(self) -> type[Codec] | None:
8885
"""Get the file's compression codec algorithm from the file's metadata.
8986
9087
In the case of a null codec, we return a None indicating that we
@@ -146,8 +143,8 @@ class AvroFile(Generic[D]):
146143
)
147144
input_file: InputFile
148145
read_schema: Schema | None
149-
read_types: Dict[int, Callable[..., StructProtocol]]
150-
read_enums: Dict[int, Callable[..., Enum]]
146+
read_types: dict[int, Callable[..., StructProtocol]]
147+
read_enums: dict[int, Callable[..., Enum]]
151148
header: AvroFileHeader
152149
schema: Schema
153150
reader: Reader
@@ -159,8 +156,8 @@ def __init__(
159156
self,
160157
input_file: InputFile,
161158
read_schema: Schema | None = None,
162-
read_types: Dict[int, Callable[..., StructProtocol]] = EMPTY_DICT,
163-
read_enums: Dict[int, Callable[..., Enum]] = EMPTY_DICT,
159+
read_types: dict[int, Callable[..., StructProtocol]] = EMPTY_DICT,
160+
read_enums: dict[int, Callable[..., Enum]] = EMPTY_DICT,
164161
) -> None:
165162
self.input_file = input_file
166163
self.read_schema = read_schema
@@ -185,7 +182,7 @@ def __enter__(self) -> AvroFile[D]:
185182

186183
return self
187184

188-
def __exit__(self, exctype: Type[BaseException] | None, excinst: BaseException | None, exctb: TracebackType | None) -> None:
185+
def __exit__(self, exctype: type[BaseException] | None, excinst: BaseException | None, exctb: TracebackType | None) -> None:
189186
"""Perform cleanup when exiting the scope of a 'with' statement."""
190187

191188
def __iter__(self) -> AvroFile[D]:
@@ -240,7 +237,7 @@ def __init__(
240237
file_schema: Schema,
241238
schema_name: str,
242239
record_schema: Schema | None = None,
243-
metadata: Dict[str, str] = EMPTY_DICT,
240+
metadata: dict[str, str] = EMPTY_DICT,
244241
) -> None:
245242
self.output_file = output_file
246243
self.file_schema = file_schema
@@ -267,7 +264,7 @@ def __enter__(self) -> AvroOutputFile[D]:
267264

268265
return self
269266

270-
def __exit__(self, exctype: Type[BaseException] | None, excinst: BaseException | None, exctb: TracebackType | None) -> None:
267+
def __exit__(self, exctype: type[BaseException] | None, excinst: BaseException | None, exctb: TracebackType | None) -> None:
271268
"""Perform cleanup when exiting the scope of a 'with' statement."""
272269
self.output_stream.close()
273270

@@ -284,7 +281,7 @@ def _write_header(self) -> None:
284281
header = AvroFileHeader(MAGIC, meta, self.sync_bytes)
285282
construct_writer(META_SCHEMA).write(self.encoder, header)
286283

287-
def compression_codec(self) -> Type[Codec] | None:
284+
def compression_codec(self) -> type[Codec] | None:
288285
"""Get the file's compression codec algorithm from the file's metadata.
289286
290287
In the case of a null codec, we return a None indicating that we
@@ -302,7 +299,7 @@ def compression_codec(self) -> Type[Codec] | None:
302299

303300
return KNOWN_CODECS[codec_name] # type: ignore
304301

305-
def write_block(self, objects: List[D]) -> None:
302+
def write_block(self, objects: list[D]) -> None:
306303
in_memory = io.BytesIO()
307304
block_content_encoder = BinaryEncoder(output_stream=in_memory)
308305
for obj in objects:

pyiceberg/avro/reader.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,7 @@
3333
from typing import (
3434
Any,
3535
Callable,
36-
List,
3736
Mapping,
38-
Tuple,
3937
)
4038
from uuid import UUID
4139

@@ -319,14 +317,14 @@ class StructReader(Reader):
319317
"_hash",
320318
"_max_pos",
321319
)
322-
field_readers: Tuple[Tuple[int | None, Reader], ...]
320+
field_readers: tuple[tuple[int | None, Reader], ...]
323321
create_struct: Callable[..., StructProtocol]
324322
struct: StructType
325-
field_reader_functions = Tuple[Tuple[str | None, int, Callable[[BinaryDecoder], Any] | None], ...]
323+
field_reader_functions = tuple[tuple[str | None, int, Callable[[BinaryDecoder], Any] | None], ...]
326324

327325
def __init__(
328326
self,
329-
field_readers: Tuple[Tuple[int | None, Reader], ...],
327+
field_readers: tuple[tuple[int | None, Reader], ...],
330328
create_struct: Callable[..., StructProtocol],
331329
struct: StructType,
332330
) -> None:
@@ -338,7 +336,7 @@ def __init__(
338336
if not isinstance(self.create_struct(), StructProtocol):
339337
raise ValueError(f"Incompatible with StructProtocol: {self.create_struct}")
340338

341-
reading_callbacks: List[Tuple[int | None, Callable[[BinaryDecoder], Any]]] = []
339+
reading_callbacks: list[tuple[int | None, Callable[[BinaryDecoder], Any]]] = []
342340
max_pos = -1
343341
for pos, field in field_readers:
344342
if pos is not None:
@@ -394,8 +392,8 @@ def __init__(self, element: Reader) -> None:
394392
self._hash = hash(self.element)
395393
self._is_int_list = isinstance(self.element, IntegerReader)
396394

397-
def read(self, decoder: BinaryDecoder) -> List[Any]:
398-
read_items: List[Any] = []
395+
def read(self, decoder: BinaryDecoder) -> list[Any]:
396+
read_items: list[Any] = []
399397
block_count = decoder.read_int()
400398
while block_count != 0:
401399
if block_count < 0:
@@ -461,7 +459,7 @@ def _read_int_int(self, decoder: BinaryDecoder) -> Mapping[int, int]:
461459
if block_count == 0:
462460
return EMPTY_DICT
463461

464-
contents_array: List[Tuple[int, ...]] = []
462+
contents_array: list[tuple[int, ...]] = []
465463

466464
while block_count != 0:
467465
if block_count < 0:

pyiceberg/avro/resolver.py

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,6 @@
1818
from enum import Enum
1919
from typing import (
2020
Callable,
21-
Dict,
22-
List,
23-
Tuple,
2421
)
2522

2623
from pyiceberg.avro.decoder import BinaryDecoder
@@ -114,7 +111,7 @@
114111

115112

116113
def construct_reader(
117-
file_schema: Schema | IcebergType, read_types: Dict[int, Callable[..., StructProtocol]] = EMPTY_DICT
114+
file_schema: Schema | IcebergType, read_types: dict[int, Callable[..., StructProtocol]] = EMPTY_DICT
118115
) -> Reader:
119116
"""Construct a reader from a file schema.
120117
@@ -146,7 +143,7 @@ class ConstructWriter(SchemaVisitorPerPrimitiveType[Writer]):
146143
def schema(self, schema: Schema, struct_result: Writer) -> Writer:
147144
return struct_result
148145

149-
def struct(self, struct: StructType, field_results: List[Writer]) -> Writer:
146+
def struct(self, struct: StructType, field_results: list[Writer]) -> Writer:
150147
return StructWriter(tuple((pos, result) for pos, result in enumerate(field_results)))
151148

152149
def field(self, field: NestedField, field_result: Writer) -> Writer:
@@ -234,8 +231,8 @@ def resolve_writer(
234231
def resolve_reader(
235232
file_schema: Schema | IcebergType,
236233
read_schema: Schema | IcebergType,
237-
read_types: Dict[int, Callable[..., StructProtocol]] = EMPTY_DICT,
238-
read_enums: Dict[int, Callable[..., Enum]] = EMPTY_DICT,
234+
read_types: dict[int, Callable[..., StructProtocol]] = EMPTY_DICT,
235+
read_enums: dict[int, Callable[..., Enum]] = EMPTY_DICT,
239236
) -> Reader:
240237
"""Resolve the file and read schema to produce a reader.
241238
@@ -274,12 +271,12 @@ class WriteSchemaResolver(PrimitiveWithPartnerVisitor[IcebergType, Writer]):
274271
def schema(self, file_schema: Schema, record_schema: IcebergType | None, result: Writer) -> Writer:
275272
return result
276273

277-
def struct(self, file_schema: StructType, record_struct: IcebergType | None, file_writers: List[Writer]) -> Writer:
274+
def struct(self, file_schema: StructType, record_struct: IcebergType | None, file_writers: list[Writer]) -> Writer:
278275
if not isinstance(record_struct, StructType):
279276
raise ResolveError(f"File/write schema are not aligned for struct, got {record_struct}")
280277

281-
record_struct_positions: Dict[int, int] = {field.field_id: pos for pos, field in enumerate(record_struct.fields)}
282-
results: List[Tuple[int | None, Writer]] = []
278+
record_struct_positions: dict[int, int] = {field.field_id: pos for pos, field in enumerate(record_struct.fields)}
279+
results: list[tuple[int | None, Writer]] = []
283280

284281
for writer, file_field in zip(file_writers, file_schema.fields, strict=True):
285282
if file_field.field_id in record_struct_positions:
@@ -367,14 +364,14 @@ def visit_unknown(self, unknown_type: UnknownType, partner: IcebergType | None)
367364

368365
class ReadSchemaResolver(PrimitiveWithPartnerVisitor[IcebergType, Reader]):
369366
__slots__ = ("read_types", "read_enums", "context")
370-
read_types: Dict[int, Callable[..., StructProtocol]]
371-
read_enums: Dict[int, Callable[..., Enum]]
372-
context: List[int]
367+
read_types: dict[int, Callable[..., StructProtocol]]
368+
read_enums: dict[int, Callable[..., Enum]]
369+
context: list[int]
373370

374371
def __init__(
375372
self,
376-
read_types: Dict[int, Callable[..., StructProtocol]] = EMPTY_DICT,
377-
read_enums: Dict[int, Callable[..., Enum]] = EMPTY_DICT,
373+
read_types: dict[int, Callable[..., StructProtocol]] = EMPTY_DICT,
374+
read_enums: dict[int, Callable[..., Enum]] = EMPTY_DICT,
378375
) -> None:
379376
self.read_types = read_types
380377
self.read_enums = read_enums
@@ -389,7 +386,7 @@ def before_field(self, field: NestedField, field_partner: NestedField | None) ->
389386
def after_field(self, field: NestedField, field_partner: NestedField | None) -> None:
390387
self.context.pop()
391388

392-
def struct(self, struct: StructType, expected_struct: IcebergType | None, field_readers: List[Reader]) -> Reader:
389+
def struct(self, struct: StructType, expected_struct: IcebergType | None, field_readers: list[Reader]) -> Reader:
393390
read_struct_id = self.context[STRUCT_ROOT] if len(self.context) > 0 else STRUCT_ROOT
394391
struct_callable = self.read_types.get(read_struct_id, Record)
395392

@@ -399,10 +396,10 @@ def struct(self, struct: StructType, expected_struct: IcebergType | None, field_
399396
if not isinstance(expected_struct, StructType):
400397
raise ResolveError(f"File/read schema are not aligned for struct, got {expected_struct}")
401398

402-
expected_positions: Dict[int, int] = {field.field_id: pos for pos, field in enumerate(expected_struct.fields)}
399+
expected_positions: dict[int, int] = {field.field_id: pos for pos, field in enumerate(expected_struct.fields)}
403400

404401
# first, add readers for the file fields that must be in order
405-
results: List[Tuple[int | None, Reader]] = [
402+
results: list[tuple[int | None, Reader]] = [
406403
(
407404
expected_positions.get(field.field_id),
408405
# Check if we need to convert it to an Enum

pyiceberg/avro/writer.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,6 @@
2828
from dataclasses import field as dataclassfield
2929
from typing import (
3030
Any,
31-
Dict,
32-
List,
33-
Tuple,
3431
)
3532
from uuid import UUID
3633

@@ -186,7 +183,7 @@ def write(self, encoder: BinaryEncoder, val: Any) -> None:
186183

187184
@dataclass(frozen=True)
188185
class StructWriter(Writer):
189-
field_writers: Tuple[Tuple[int | None, Writer], ...] = dataclassfield()
186+
field_writers: tuple[tuple[int | None, Writer], ...] = dataclassfield()
190187

191188
def write(self, encoder: BinaryEncoder, val: Record) -> None:
192189
for pos, writer in self.field_writers:
@@ -210,7 +207,7 @@ def __hash__(self) -> int:
210207
class ListWriter(Writer):
211208
element_writer: Writer
212209

213-
def write(self, encoder: BinaryEncoder, val: List[Any]) -> None:
210+
def write(self, encoder: BinaryEncoder, val: list[Any]) -> None:
214211
encoder.write_int(len(val))
215212
for v in val:
216213
self.element_writer.write(encoder, v)
@@ -223,7 +220,7 @@ class MapWriter(Writer):
223220
key_writer: Writer
224221
value_writer: Writer
225222

226-
def write(self, encoder: BinaryEncoder, val: Dict[Any, Any]) -> None:
223+
def write(self, encoder: BinaryEncoder, val: dict[Any, Any]) -> None:
227224
encoder.write_int(len(val))
228225
for k, v in val.items():
229226
self.key_writer.write(encoder, k)

0 commit comments

Comments
 (0)