diff --git a/stubs/bleach/bleach/html5lib_shim.pyi b/stubs/bleach/bleach/html5lib_shim.pyi
index d24c2d825f33..1904b538e8dd 100644
--- a/stubs/bleach/bleach/html5lib_shim.pyi
+++ b/stubs/bleach/bleach/html5lib_shim.pyi
@@ -50,7 +50,7 @@ class InputStreamWithMemory:
class BleachHTMLTokenizer(HTMLTokenizer):
consume_entities: bool
- stream: InputStreamWithMemory
+ stream: InputStreamWithMemory # type: ignore[assignment]
emitted_last_token: dict[str, Any] | None
def __init__(self, consume_entities: bool = False, **kwargs: Any) -> None: ...
diff --git a/stubs/bleach/bleach/linkifier.pyi b/stubs/bleach/bleach/linkifier.pyi
index 73fe653c1b62..e6ec5d0ae35a 100644
--- a/stubs/bleach/bleach/linkifier.pyi
+++ b/stubs/bleach/bleach/linkifier.pyi
@@ -36,7 +36,7 @@ class Linker:
# or `html5lib` token might be reused
_Token: TypeAlias = dict[str, Any]
-class LinkifyFilter(Filter):
+class LinkifyFilter(Filter[_Token]):
callbacks: Iterable[_Callback]
skip_tags: Container[str]
parse_email: bool
diff --git a/stubs/bleach/bleach/sanitizer.pyi b/stubs/bleach/bleach/sanitizer.pyi
index a86be65feae6..ef7d597a48c2 100644
--- a/stubs/bleach/bleach/sanitizer.pyi
+++ b/stubs/bleach/bleach/sanitizer.pyi
@@ -1,3 +1,4 @@
+from _typeshed import Incomplete
from collections.abc import Callable, Container, Iterable, Iterator
from re import Pattern
from typing import Final, Protocol, type_check_only
@@ -24,7 +25,7 @@ class NoCssSanitizerWarning(UserWarning): ...
@type_check_only
class _FilterConstructor(Protocol):
- def __call__(self, *, source: BleachSanitizerFilter) -> Filter: ...
+ def __call__(self, *, source: BleachSanitizerFilter) -> Filter[Incomplete]: ...
# _FilterConstructor used to be called _Filter
# this alias is obsolete and can potentially be removed in the future
@@ -40,7 +41,7 @@ class Cleaner:
protocols: Iterable[str]
strip: bool
strip_comments: bool
- filters: Iterable[Filter]
+ filters: Iterable[_FilterConstructor]
css_sanitizer: CSSSanitizer | None
parser: BleachHTMLParser
walker: TreeWalker
@@ -85,7 +86,7 @@ class BleachSanitizerFilter(SanitizerFilter):
def sanitize_stream(self, token_iterator: Iterable[_Token]) -> Iterator[_Token]: ...
def merge_characters(self, token_iterator: Iterable[_Token]) -> Iterator[_Token]: ...
def __iter__(self) -> Iterator[_Token]: ...
- def sanitize_token(self, token: _Token) -> _Token | list[_Token] | None: ...
+ def sanitize_token(self, token: _Token) -> _Token | list[_Token] | None: ... # type: ignore[override]
def sanitize_characters(self, token: _Token) -> _Token | list[_Token]: ...
def sanitize_uri_value(self, value: str, allowed_protocols: Container[str]) -> str | None: ...
def allow_token(self, token: _Token) -> _Token: ...
diff --git a/stubs/html5lib/METADATA.toml b/stubs/html5lib/METADATA.toml
index 23a4684130a6..3f4ba61f3dc6 100644
--- a/stubs/html5lib/METADATA.toml
+++ b/stubs/html5lib/METADATA.toml
@@ -1,5 +1,6 @@
version = "1.1.*"
upstream_repository = "https://github.com/html5lib/html5lib-python"
+requires = ["types-webencodings"]
[tool.stubtest]
extras = ["all"]
diff --git a/stubs/html5lib/html5lib/_ihatexml.pyi b/stubs/html5lib/html5lib/_ihatexml.pyi
index 44115cfbab7a..4f0844635aa3 100644
--- a/stubs/html5lib/html5lib/_ihatexml.pyi
+++ b/stubs/html5lib/html5lib/_ihatexml.pyi
@@ -1,6 +1,4 @@
import re
-from _typeshed import Incomplete
-from collections.abc import Iterable
baseChar: str
ideographic: str
@@ -13,13 +11,13 @@ nameFirst: str
reChar: re.Pattern[str]
reCharRange: re.Pattern[str]
-def charStringToList(chars: str) -> list[str]: ...
-def normaliseCharList(charList: Iterable[str]) -> list[str]: ...
+def charStringToList(chars: str) -> list[list[int]]: ...
+def normaliseCharList(charList: list[list[int]]) -> list[list[int]]: ...
max_unicode: int
-def missingRanges(charList: Iterable[str]) -> list[str]: ...
-def listToRegexpStr(charList): ...
+def missingRanges(charList: list[list[int]]) -> list[list[int]]: ...
+def listToRegexpStr(charList: list[list[int]]) -> str: ...
def hexToInt(hex_str: str | bytes | bytearray) -> int: ...
def escapeRegexp(string: str) -> str: ...
@@ -29,13 +27,13 @@ nonPubidCharRegexp: re.Pattern[str]
class InfosetFilter:
replacementRegexp: re.Pattern[str]
- dropXmlnsLocalName: Incomplete
- dropXmlnsAttrNs: Incomplete
- preventDoubleDashComments: Incomplete
- preventDashAtCommentEnd: Incomplete
- replaceFormFeedCharacters: Incomplete
- preventSingleQuotePubid: Incomplete
- replaceCache: Incomplete
+ dropXmlnsLocalName: bool
+ dropXmlnsAttrNs: bool
+ preventDoubleDashComments: bool
+ preventDashAtCommentEnd: bool
+ replaceFormFeedCharacters: bool
+ preventSingleQuotePubid: bool
+ replaceCache: dict[str, str]
def __init__(
self,
dropXmlnsLocalName: bool = False,
@@ -45,13 +43,13 @@ class InfosetFilter:
replaceFormFeedCharacters: bool = True,
preventSingleQuotePubid: bool = False,
) -> None: ...
- def coerceAttribute(self, name, namespace=None): ...
- def coerceElement(self, name): ...
- def coerceComment(self, data): ...
- def coerceCharacters(self, data): ...
- def coercePubid(self, data): ...
- def toXmlName(self, name): ...
- def getReplacementCharacter(self, char): ...
- def fromXmlName(self, name): ...
- def escapeChar(self, char): ...
- def unescapeChar(self, charcode): ...
+ def coerceAttribute(self, name: str, namespace: str | None = None) -> str | None: ...
+ def coerceElement(self, name: str) -> str: ...
+ def coerceComment(self, data: str) -> str: ...
+ def coerceCharacters(self, data: str) -> str: ...
+ def coercePubid(self, data: str) -> str: ...
+ def toXmlName(self, name: str) -> str: ...
+ def getReplacementCharacter(self, char: str) -> str: ...
+ def fromXmlName(self, name: str) -> str: ...
+ def escapeChar(self, char: str) -> str: ...
+ def unescapeChar(self, charcode: str | bytes | bytearray) -> str: ...
diff --git a/stubs/html5lib/html5lib/_inputstream.pyi b/stubs/html5lib/html5lib/_inputstream.pyi
index 9f10e4dbf98e..53a94876db6f 100644
--- a/stubs/html5lib/html5lib/_inputstream.pyi
+++ b/stubs/html5lib/html5lib/_inputstream.pyi
@@ -1,37 +1,36 @@
-from _typeshed import Incomplete, SupportsRead
-from codecs import CodecInfo
-from typing import Protocol, overload, type_check_only
-from typing_extensions import TypeAlias
+import re
+from _io import BytesIO, StringIO
+from _typeshed import Incomplete, ReadableBuffer, SupportsRead
+from collections.abc import Callable, Iterable
+from typing import Any, AnyStr, Generic, Literal, TypeVar, overload
+from typing_extensions import Self, TypeAlias
-# Is actually webencodings.Encoding
-@type_check_only
-class _Encoding(Protocol):
- name: str
- codec_info: CodecInfo
- def __init__(self, name: str, codec_info: CodecInfo) -> None: ...
+from webencodings import Encoding
_UnicodeInputStream: TypeAlias = str | SupportsRead[str]
_BinaryInputStream: TypeAlias = bytes | SupportsRead[bytes]
_InputStream: TypeAlias = _UnicodeInputStream | _BinaryInputStream # noqa: Y047 # used in other files
+_SupportsReadT = TypeVar("_SupportsReadT", bound=SupportsRead[Any])
+_SupportsReadBytesT = TypeVar("_SupportsReadBytesT", bound=SupportsRead[bytes])
-spaceCharactersBytes: Incomplete
-asciiLettersBytes: Incomplete
-asciiUppercaseBytes: Incomplete
-spacesAngleBrackets: Incomplete
+spaceCharactersBytes: frozenset[bytes]
+asciiLettersBytes: frozenset[bytes]
+asciiUppercaseBytes: frozenset[bytes]
+spacesAngleBrackets: frozenset[bytes]
invalid_unicode_no_surrogate: str
-invalid_unicode_re: Incomplete
-non_bmp_invalid_codepoints: Incomplete
-ascii_punctuation_re: Incomplete
-charsUntilRegEx: Incomplete
+invalid_unicode_re: re.Pattern[str]
+non_bmp_invalid_codepoints: set[int]
+ascii_punctuation_re: re.Pattern[str]
+charsUntilRegEx: dict[tuple[Iterable[str | bytes | bytearray], bool], re.Pattern[str]]
-class BufferedStream:
- stream: Incomplete
- buffer: Incomplete
- position: Incomplete
- def __init__(self, stream) -> None: ...
- def tell(self): ...
- def seek(self, pos) -> None: ...
- def read(self, bytes): ...
+class BufferedStream(Generic[AnyStr]):
+ stream: SupportsRead[AnyStr]
+ buffer: list[AnyStr]
+ position: list[int]
+ def __init__(self, stream: SupportsRead[AnyStr]) -> None: ...
+ def tell(self) -> int: ...
+ def seek(self, pos: int) -> None: ...
+ def read(self, bytes: int) -> AnyStr: ...
@overload
def HTMLInputStream(source: _UnicodeInputStream) -> HTMLUnicodeInputStream: ...
@@ -48,9 +47,9 @@ def HTMLInputStream(
) -> HTMLBinaryInputStream: ...
class HTMLUnicodeInputStream:
- reportCharacterErrors: Incomplete
- newLines: Incomplete
- charEncoding: tuple[_Encoding, str]
+ reportCharacterErrors: Callable[[str], None]
+ newLines: list[int]
+ charEncoding: tuple[Encoding, str]
dataStream: Incomplete
def __init__(self, source: _UnicodeInputStream) -> None: ...
chunk: str
@@ -60,14 +59,17 @@ class HTMLUnicodeInputStream:
prevNumLines: int
prevNumCols: int
def reset(self) -> None: ...
- def openStream(self, source): ...
+ @overload
+ def openStream(self, source: _SupportsReadT) -> _SupportsReadT: ...
+ @overload
+ def openStream(self, source: str | None) -> StringIO: ...
def position(self) -> tuple[int, int]: ...
- def char(self): ...
- def readChunk(self, chunkSize=None): ...
- def characterErrorsUCS4(self, data) -> None: ...
- def characterErrorsUCS2(self, data) -> None: ...
- def charsUntil(self, characters, opposite: bool = False): ...
- def unget(self, char) -> None: ...
+ def char(self) -> str | None: ...
+ def readChunk(self, chunkSize: int | None = None) -> bool: ...
+ def characterErrorsUCS4(self, data: str) -> None: ...
+ def characterErrorsUCS2(self, data: str) -> None: ...
+ def charsUntil(self, characters: Iterable[str | bytes | bytearray], opposite: bool = False) -> str: ...
+ def unget(self, char: str | None) -> None: ...
class HTMLBinaryInputStream(HTMLUnicodeInputStream):
rawStream: Incomplete
@@ -77,8 +79,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
transport_encoding: Incomplete
same_origin_parent_encoding: Incomplete
likely_encoding: Incomplete
- default_encoding: Incomplete
- charEncoding: tuple[_Encoding, str]
+ default_encoding: str
+ charEncoding: tuple[Encoding, str]
def __init__(
self,
source: _BinaryInputStream,
@@ -91,46 +93,52 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
) -> None: ...
dataStream: Incomplete
def reset(self) -> None: ...
- def openStream(self, source): ...
+ @overload # type: ignore[override]
+ def openStream(self, source: _SupportsReadBytesT) -> _SupportsReadBytesT: ...
+ @overload # type: ignore[override]
+ def openStream(self, source: ReadableBuffer) -> BytesIO: ...
def determineEncoding(self, chardet: bool = True): ...
def changeEncoding(self, newEncoding: str | bytes | None) -> None: ...
- def detectBOM(self): ...
- def detectEncodingMeta(self): ...
+ def detectBOM(self) -> Encoding | None: ...
+ def detectEncodingMeta(self) -> Encoding | None: ...
class EncodingBytes(bytes):
- def __new__(self, value): ...
- def __init__(self, value) -> None: ...
- def __iter__(self): ...
- def __next__(self): ...
- def next(self): ...
- def previous(self): ...
- def setPosition(self, position) -> None: ...
- def getPosition(self): ...
- position: Incomplete
- def getCurrentByte(self): ...
+ def __new__(self, value: bytes) -> Self: ...
+ def __init__(self, value: bytes) -> None: ...
+ def __iter__(self) -> Self: ... # type: ignore[override]
+ def __next__(self) -> bytes: ...
+ def next(self) -> bytes: ...
+ def previous(self) -> bytes: ...
+ def setPosition(self, position: int) -> None: ...
+ def getPosition(self) -> int | None: ...
@property
- def currentByte(self): ...
- def skip(self, chars=...): ...
- def skipUntil(self, chars): ...
- def matchBytes(self, bytes): ...
- def jumpTo(self, bytes): ...
+ def position(self) -> int | None: ...
+ @position.setter
+ def position(self, position: int) -> None: ...
+ def getCurrentByte(self) -> bytes: ...
+ @property
+ def currentByte(self) -> bytes: ...
+ def skip(self, chars: bytes | bytearray | Iterable[bytes] = ...) -> bytes | None: ...
+ def skipUntil(self, chars: bytes | bytearray | Iterable[bytes]) -> bytes | None: ...
+ def matchBytes(self, bytes: bytes | bytearray) -> bool: ...
+ def jumpTo(self, bytes: bytes | bytearray) -> Literal[True]: ...
class EncodingParser:
- data: Incomplete
- encoding: Incomplete
- def __init__(self, data) -> None: ...
- def getEncoding(self): ...
- def handleComment(self): ...
- def handleMeta(self): ...
- def handlePossibleStartTag(self): ...
- def handlePossibleEndTag(self): ...
- def handlePossibleTag(self, endTag): ...
- def handleOther(self): ...
- def getAttribute(self): ...
+ data: EncodingBytes
+ encoding: Encoding | None
+ def __init__(self, data: bytes) -> None: ...
+ def getEncoding(self) -> Encoding | None: ...
+ def handleComment(self) -> bool: ...
+ def handleMeta(self) -> bool: ...
+ def handlePossibleStartTag(self) -> bool: ...
+ def handlePossibleEndTag(self) -> bool: ...
+ def handlePossibleTag(self, endTag: bool | None) -> bool: ...
+ def handleOther(self) -> bool: ...
+ def getAttribute(self) -> tuple[bytes, bytes] | None: ...
class ContentAttrParser:
- data: Incomplete
- def __init__(self, data) -> None: ...
- def parse(self): ...
+ data: EncodingBytes
+ def __init__(self, data: EncodingBytes) -> None: ...
+ def parse(self) -> bytes | None: ...
-def lookupEncoding(encoding: str | bytes | None) -> str | None: ...
+def lookupEncoding(encoding: str | bytes | None) -> Encoding | None: ...
diff --git a/stubs/html5lib/html5lib/_tokenizer.pyi b/stubs/html5lib/html5lib/_tokenizer.pyi
index 22316b1112b5..53c2e1bec605 100644
--- a/stubs/html5lib/html5lib/_tokenizer.pyi
+++ b/stubs/html5lib/html5lib/_tokenizer.pyi
@@ -1,42 +1,78 @@
from _typeshed import Incomplete
+from collections import deque
+from collections.abc import Callable, Iterator
+from typing import TypedDict, overload, type_check_only
-from ._inputstream import _InputStream
+from ._inputstream import HTMLBinaryInputStream, HTMLUnicodeInputStream, _BinaryInputStream, _UnicodeInputStream
+from ._trie import Trie
-entitiesTrie: Incomplete
+@type_check_only
+class _DataVars(TypedDict, total=False):
+ data: str | None
+ charAsInt: int
+
+@type_check_only
+class _Token(TypedDict, total=False):
+ type: int
+ data: str | list[str]
+ datavars: _DataVars
+ name: str
+ selfClosing: bool
+ selfClosingAcknowledged: bool
+ publicId: str | None
+ systemId: str | None
+ correct: bool
+
+entitiesTrie: Trie
attributeMap = dict
class HTMLTokenizer:
- stream: Incomplete
+ # TODO: Use Protocol to allow subclasses to set `stream` that do not inherit from HTMLUnicodeInputStream
+ stream: HTMLUnicodeInputStream | HTMLBinaryInputStream
parser: Incomplete
escapeFlag: bool
- lastFourChars: Incomplete
- state: Incomplete
+ lastFourChars: list[Incomplete]
+ state: Callable[[], bool]
escape: bool
- currentToken: Incomplete
- def __init__(self, stream: _InputStream, parser=None, **kwargs) -> None: ...
- tokenQueue: Incomplete
- def __iter__(self): ...
- def consumeNumberEntity(self, isHex): ...
- def consumeEntity(self, allowedChar=None, fromAttribute: bool = False) -> None: ...
- def processEntityInAttribute(self, allowedChar) -> None: ...
+ currentToken: _Token | None
+ @overload
+ def __init__(self, stream: _UnicodeInputStream, parser=None) -> None: ...
+ @overload
+ def __init__(
+ self,
+ stream: _BinaryInputStream,
+ parser=None,
+ *,
+ override_encoding: str | bytes | None = None,
+ transport_encoding: str | bytes | None = None,
+ same_origin_parent_encoding: str | bytes | None = None,
+ likely_encoding: str | bytes | None = None,
+ default_encoding: str = "windows-1252",
+ useChardet: bool = True,
+ ) -> None: ...
+ tokenQueue: deque[_Token]
+ def __iter__(self) -> Iterator[_Token]: ...
+ def consumeNumberEntity(self, isHex: bool | None) -> str: ...
+ def consumeEntity(self, allowedChar: str | None = None, fromAttribute: bool = False) -> None: ...
+ def processEntityInAttribute(self, allowedChar: str | None) -> None: ...
def emitCurrentToken(self) -> None: ...
- def dataState(self): ...
- def entityDataState(self): ...
- def rcdataState(self): ...
- def characterReferenceInRcdata(self): ...
- def rawtextState(self): ...
- def scriptDataState(self): ...
- def plaintextState(self): ...
- def tagOpenState(self): ...
- def closeTagOpenState(self): ...
- def tagNameState(self): ...
+ def dataState(self) -> bool: ...
+ def entityDataState(self) -> bool: ...
+ def rcdataState(self) -> bool: ...
+ def characterReferenceInRcdata(self) -> bool: ...
+ def rawtextState(self) -> bool: ...
+ def scriptDataState(self) -> bool: ...
+ def plaintextState(self) -> bool: ...
+ def tagOpenState(self) -> bool: ...
+ def closeTagOpenState(self) -> bool: ...
+ def tagNameState(self) -> bool: ...
temporaryBuffer: str
- def rcdataLessThanSignState(self): ...
- def rcdataEndTagOpenState(self): ...
- def rcdataEndTagNameState(self): ...
- def rawtextLessThanSignState(self): ...
- def rawtextEndTagOpenState(self): ...
- def rawtextEndTagNameState(self): ...
+ def rcdataLessThanSignState(self) -> bool: ...
+ def rcdataEndTagOpenState(self) -> bool: ...
+ def rcdataEndTagNameState(self) -> bool: ...
+ def rawtextLessThanSignState(self) -> bool: ...
+ def rawtextEndTagOpenState(self) -> bool: ...
+ def rawtextEndTagNameState(self) -> bool: ...
def scriptDataLessThanSignState(self) -> bool: ...
def scriptDataEndTagOpenState(self) -> bool: ...
def scriptDataEndTagNameState(self) -> bool: ...
@@ -54,17 +90,17 @@ class HTMLTokenizer:
def scriptDataDoubleEscapedDashDashState(self) -> bool: ...
def scriptDataDoubleEscapedLessThanSignState(self) -> bool: ...
def scriptDataDoubleEscapeEndState(self) -> bool: ...
- def beforeAttributeNameState(self): ...
- def attributeNameState(self): ...
- def afterAttributeNameState(self): ...
- def beforeAttributeValueState(self): ...
- def attributeValueDoubleQuotedState(self): ...
- def attributeValueSingleQuotedState(self): ...
- def attributeValueUnQuotedState(self): ...
- def afterAttributeValueState(self): ...
- def selfClosingStartTagState(self): ...
- def bogusCommentState(self): ...
- def markupDeclarationOpenState(self): ...
+ def beforeAttributeNameState(self) -> bool: ...
+ def attributeNameState(self) -> bool: ...
+ def afterAttributeNameState(self) -> bool: ...
+ def beforeAttributeValueState(self) -> bool: ...
+ def attributeValueDoubleQuotedState(self) -> bool: ...
+ def attributeValueSingleQuotedState(self) -> bool: ...
+ def attributeValueUnQuotedState(self) -> bool: ...
+ def afterAttributeValueState(self) -> bool: ...
+ def selfClosingStartTagState(self) -> bool: ...
+ def bogusCommentState(self) -> bool: ...
+ def markupDeclarationOpenState(self) -> bool: ...
def commentStartState(self) -> bool: ...
def commentStartDashState(self) -> bool: ...
def commentState(self) -> bool: ...
@@ -76,15 +112,15 @@ class HTMLTokenizer:
def doctypeNameState(self) -> bool: ...
def afterDoctypeNameState(self) -> bool: ...
def afterDoctypePublicKeywordState(self) -> bool: ...
- def beforeDoctypePublicIdentifierState(self): ...
- def doctypePublicIdentifierDoubleQuotedState(self): ...
- def doctypePublicIdentifierSingleQuotedState(self): ...
- def afterDoctypePublicIdentifierState(self): ...
- def betweenDoctypePublicAndSystemIdentifiersState(self): ...
- def afterDoctypeSystemKeywordState(self): ...
- def beforeDoctypeSystemIdentifierState(self): ...
- def doctypeSystemIdentifierDoubleQuotedState(self): ...
- def doctypeSystemIdentifierSingleQuotedState(self): ...
- def afterDoctypeSystemIdentifierState(self): ...
- def bogusDoctypeState(self): ...
- def cdataSectionState(self): ...
+ def beforeDoctypePublicIdentifierState(self) -> bool: ...
+ def doctypePublicIdentifierDoubleQuotedState(self) -> bool: ...
+ def doctypePublicIdentifierSingleQuotedState(self) -> bool: ...
+ def afterDoctypePublicIdentifierState(self) -> bool: ...
+ def betweenDoctypePublicAndSystemIdentifiersState(self) -> bool: ...
+ def afterDoctypeSystemKeywordState(self) -> bool: ...
+ def beforeDoctypeSystemIdentifierState(self) -> bool: ...
+ def doctypeSystemIdentifierDoubleQuotedState(self) -> bool: ...
+ def doctypeSystemIdentifierSingleQuotedState(self) -> bool: ...
+ def afterDoctypeSystemIdentifierState(self) -> bool: ...
+ def bogusDoctypeState(self) -> bool: ...
+ def cdataSectionState(self) -> bool: ...
diff --git a/stubs/html5lib/html5lib/_utils.pyi b/stubs/html5lib/html5lib/_utils.pyi
index f41afba04144..70abe9494add 100644
--- a/stubs/html5lib/html5lib/_utils.pyi
+++ b/stubs/html5lib/html5lib/_utils.pyi
@@ -1,6 +1,7 @@
import xml.etree.ElementTree as default_etree
-from _typeshed import Incomplete
-from collections.abc import Mapping
+from _typeshed import Incomplete, Unused
+from collections.abc import Iterable, Mapping, Sequence
+from typing import Final, TypeVar, overload
__all__ = [
"default_etree",
@@ -11,13 +12,19 @@ __all__ = [
"supports_lone_surrogates",
]
-supports_lone_surrogates: bool
+supports_lone_surrogates: Final[bool]
-class MethodDispatcher(dict[Incomplete, Incomplete]):
- default: Incomplete
- def __init__(self, items=()) -> None: ...
- def __getitem__(self, key): ...
- def __get__(self, instance, owner=None): ...
+_K = TypeVar("_K")
+_V = TypeVar("_V")
+
+class MethodDispatcher(dict[_K, _V]):
+ default: _V | None
+ @overload # to solve `reportInvalidTypeVarUse`
+ def __init__(self) -> None: ...
+ @overload
+ def __init__(self, items: Iterable[tuple[_K | Iterable[_K], _V]]) -> None: ...
+ def __getitem__(self, key: _K) -> _V | None: ... # type: ignore[override]
+ def __get__(self, instance, owner: Unused = None) -> BoundMethodDispatcher: ...
class BoundMethodDispatcher(Mapping[Incomplete, Incomplete]):
instance: Incomplete
@@ -27,8 +34,8 @@ class BoundMethodDispatcher(Mapping[Incomplete, Incomplete]):
def get(self, key, default): ... # type: ignore[override]
def __iter__(self): ...
def __len__(self) -> int: ...
- def __contains__(self, key): ...
+ def __contains__(self, key) -> bool: ...
-def isSurrogatePair(data): ...
-def surrogatePairToCodepoint(data): ...
+def isSurrogatePair(data: Sequence[str | bytes | bytearray]) -> bool: ...
+def surrogatePairToCodepoint(data: Sequence[str | bytes | bytearray]) -> int: ...
def moduleFactoryFactory(factory): ...
diff --git a/stubs/html5lib/html5lib/constants.pyi b/stubs/html5lib/html5lib/constants.pyi
index a0e12fbff98e..a5c4c5289afd 100644
--- a/stubs/html5lib/html5lib/constants.pyi
+++ b/stubs/html5lib/html5lib/constants.pyi
@@ -1,6 +1,4 @@
-from _typeshed import Incomplete
-
-EOF: Incomplete
+EOF: None
E: dict[str, str]
namespaces: dict[str, str]
scopingElements: frozenset[tuple[str, str]]
diff --git a/stubs/html5lib/html5lib/filters/alphabeticalattributes.pyi b/stubs/html5lib/html5lib/filters/alphabeticalattributes.pyi
index 20b694d65edf..955bae07d8ba 100644
--- a/stubs/html5lib/html5lib/filters/alphabeticalattributes.pyi
+++ b/stubs/html5lib/html5lib/filters/alphabeticalattributes.pyi
@@ -1,4 +1,5 @@
+from _typeshed import Incomplete
+
from . import base
-class Filter(base.Filter):
- def __iter__(self): ...
+class Filter(base.Filter[dict[str, Incomplete]]): ...
diff --git a/stubs/html5lib/html5lib/filters/base.pyi b/stubs/html5lib/html5lib/filters/base.pyi
index 82773fd9124f..11b1e579fa4f 100644
--- a/stubs/html5lib/html5lib/filters/base.pyi
+++ b/stubs/html5lib/html5lib/filters/base.pyi
@@ -1,7 +1,10 @@
-from _typeshed import Incomplete
+from collections.abc import Iterable, Iterator
+from typing import Any, Generic, TypeVar
-class Filter:
- source: Incomplete
- def __init__(self, source) -> None: ...
- def __iter__(self): ...
- def __getattr__(self, name: str): ...
+_T = TypeVar("_T")
+
+class Filter(Generic[_T]):
+ source: Iterable[_T]
+ def __init__(self, source: Iterable[_T]) -> None: ...
+ def __iter__(self) -> Iterator[_T]: ...
+ def __getattr__(self, name: str) -> Any: ... # Depends on `source`
diff --git a/stubs/html5lib/html5lib/filters/inject_meta_charset.pyi b/stubs/html5lib/html5lib/filters/inject_meta_charset.pyi
index 030cd63753df..d8bb75bfcf99 100644
--- a/stubs/html5lib/html5lib/filters/inject_meta_charset.pyi
+++ b/stubs/html5lib/html5lib/filters/inject_meta_charset.pyi
@@ -1,6 +1,8 @@
+from _typeshed import Incomplete
+from collections.abc import Iterable
+
from . import base
-class Filter(base.Filter):
+class Filter(base.Filter[dict[str, Incomplete]]):
encoding: str | None
- def __init__(self, source, encoding: str | None) -> None: ...
- def __iter__(self): ...
+ def __init__(self, source: Iterable[dict[str, Incomplete]], encoding: str | None) -> None: ...
diff --git a/stubs/html5lib/html5lib/filters/lint.pyi b/stubs/html5lib/html5lib/filters/lint.pyi
index 39629c8696d6..37da9926b41a 100644
--- a/stubs/html5lib/html5lib/filters/lint.pyi
+++ b/stubs/html5lib/html5lib/filters/lint.pyi
@@ -1,8 +1,10 @@
+from _typeshed import Incomplete
+from collections.abc import Iterable
+
from . import base
spaceCharacters: str
-class Filter(base.Filter):
+class Filter(base.Filter[dict[str, Incomplete]]):
require_matching_tags: bool
- def __init__(self, source, require_matching_tags: bool = True) -> None: ...
- def __iter__(self): ...
+ def __init__(self, source: Iterable[dict[str, Incomplete]], require_matching_tags: bool = True) -> None: ...
diff --git a/stubs/html5lib/html5lib/filters/optionaltags.pyi b/stubs/html5lib/html5lib/filters/optionaltags.pyi
index 81045a0e068f..9050d864e756 100644
--- a/stubs/html5lib/html5lib/filters/optionaltags.pyi
+++ b/stubs/html5lib/html5lib/filters/optionaltags.pyi
@@ -3,8 +3,7 @@ from collections.abc import Generator
from . import base
-class Filter(base.Filter):
+class Filter(base.Filter[dict[str, Incomplete]]):
def slider(self) -> Generator[tuple[Incomplete, Incomplete, Incomplete]]: ...
- def __iter__(self): ...
def is_optional_start(self, tagname: str, previous, next) -> bool: ...
def is_optional_end(self, tagname: str, next) -> bool: ...
diff --git a/stubs/html5lib/html5lib/filters/sanitizer.pyi b/stubs/html5lib/html5lib/filters/sanitizer.pyi
index 7761f2719a5a..3308d7fd60cd 100644
--- a/stubs/html5lib/html5lib/filters/sanitizer.pyi
+++ b/stubs/html5lib/html5lib/filters/sanitizer.pyi
@@ -1,4 +1,5 @@
import re
+from _typeshed import Incomplete
from collections.abc import Iterable
from typing_extensions import deprecated
@@ -19,7 +20,7 @@ allowed_content_types: frozenset[str]
data_content_type: re.Pattern[str]
@deprecated("html5lib's sanitizer is deprecated; see https://github.com/html5lib/html5lib-python/issues/443")
-class Filter(base.Filter):
+class Filter(base.Filter[dict[str, Incomplete]]):
allowed_elements: Iterable[tuple[str | None, str]]
allowed_attributes: Iterable[tuple[str | None, str]]
allowed_css_properties: Iterable[str]
@@ -32,7 +33,7 @@ class Filter(base.Filter):
svg_allow_local_href: Iterable[tuple[str | None, str]]
def __init__(
self,
- source,
+ source: Iterable[dict[str, Incomplete]],
allowed_elements: Iterable[tuple[str | None, str]] = ...,
allowed_attributes: Iterable[tuple[str | None, str]] = ...,
allowed_css_properties: Iterable[str] = ...,
@@ -44,8 +45,7 @@ class Filter(base.Filter):
svg_attr_val_allows_ref: Iterable[tuple[str | None, str]] = ...,
svg_allow_local_href: Iterable[tuple[str | None, str]] = ...,
) -> None: ...
- def __iter__(self): ...
- def sanitize_token(self, token): ...
- def allowed_token(self, token): ...
- def disallowed_token(self, token): ...
+ def sanitize_token(self, token: dict[str, Incomplete]) -> dict[str, Incomplete] | None: ...
+ def allowed_token(self, token: dict[str, Incomplete]) -> dict[str, Incomplete]: ...
+ def disallowed_token(self, token: dict[str, Incomplete]) -> dict[str, Incomplete]: ...
def sanitize_css(self, style: str) -> str: ...
diff --git a/stubs/html5lib/html5lib/filters/whitespace.pyi b/stubs/html5lib/html5lib/filters/whitespace.pyi
index 39f818100c7a..89334b070913 100644
--- a/stubs/html5lib/html5lib/filters/whitespace.pyi
+++ b/stubs/html5lib/html5lib/filters/whitespace.pyi
@@ -1,12 +1,12 @@
import re
+from _typeshed import Incomplete
from . import base
spaceCharacters: str
SPACES_REGEX: re.Pattern[str]
-class Filter(base.Filter):
+class Filter(base.Filter[dict[str, Incomplete]]):
spacePreserveElements: frozenset[str]
- def __iter__(self): ...
def collapse_spaces(text: str) -> str: ...
diff --git a/stubs/html5lib/html5lib/html5parser.pyi b/stubs/html5lib/html5lib/html5parser.pyi
index ad3adaca4eb6..114ce03ca233 100644
--- a/stubs/html5lib/html5lib/html5parser.pyi
+++ b/stubs/html5lib/html5lib/html5parser.pyi
@@ -54,9 +54,9 @@ class HTMLParser:
def reparseTokenNormal(self, token: dict[str, Any]) -> None: ...
def resetInsertionMode(self) -> None: ...
originalPhase: Incomplete
- def parseRCDataRawtext(self, token, contentType) -> None: ...
+ def parseRCDataRawtext(self, token, contentType: Literal["RAWTEXT", "RCDATA"]) -> None: ...
-def getPhases(debug): ...
+def getPhases(debug: bool | None) -> dict[str, type]: ...
def adjust_attributes(token: dict[str, Any], replacements: dict[str, Any]) -> None: ...
def impliedTagToken(
name: str, type: str = "EndTag", attributes: dict[str, Any] | None = None, selfClosing: bool = False