Handle classes/ids after pseudo-classes

This commit is contained in:
ItsDrike 2025-07-27 18:30:33 +02:00
parent 0545673e61
commit 9f46a6af42
Signed by: ItsDrike
GPG key ID: FA2745890B7048C0
4 changed files with 64 additions and 14 deletions

View file

@ -24,7 +24,7 @@ class SelectorParseError(Exception): ...
class InvalidSelectorError(ValueError): ... class InvalidSelectorError(ValueError): ...
@dataclass(frozen=True) @dataclass
class SimpleSelector: class SimpleSelector:
tag: str | None = None tag: str | None = None
classes: list[str] = field(default_factory=list) classes: list[str] = field(default_factory=list)
@ -61,6 +61,24 @@ class SimpleSelector:
return cls(tag, cls_selectors, id_selectors) return cls(tag, cls_selectors, id_selectors)
def merge_selector(self, other: SimpleSelector) -> None:
"""Merges another selector with this one (mutating this one).
This function is useful for extending this selector, for cases when we couldn't
parse it in one go. (E.g. separated by a pseudo-class).
"""
if other.tag is not None:
raise SelectorParseError(f"Can't merge tag-aware selector with current selector ({self=!r}, {other=!r})")
self.classes.extend(other.classes)
self.ids.extend(other.ids)
if len(self.ids) > 1:
warnings.warn(
"Simple selector contains multiple IDs. The CSS spec doesn't forbid this, but it will never match.",
stacklevel=2,
)
@override @override
def __str__(self) -> str: def __str__(self) -> str:
classes = ".".join(cls_name for cls_name in self.classes) classes = ".".join(cls_name for cls_name in self.classes)
@ -73,7 +91,7 @@ class SimpleSelector:
type ConcretePseudoClassSelector = NotPseudoClassSelector | NthChildPseudoClassSelector type ConcretePseudoClassSelector = NotPseudoClassSelector | NthChildPseudoClassSelector
@dataclass(frozen=True) @dataclass
class PseudoClassSelector: class PseudoClassSelector:
pseudo_class: str pseudo_class: str
selector: SimpleSelector | ConcretePseudoClassSelector | None selector: SimpleSelector | ConcretePseudoClassSelector | None
@ -137,7 +155,7 @@ class PseudoClassSelector:
return cls(pseudo_class, selector, arg) return cls(pseudo_class, selector, arg)
@dataclass(frozen=True) @dataclass
class DescendantSelector: class DescendantSelector:
parent: SimpleSelector | ConcretePseudoClassSelector parent: SimpleSelector | ConcretePseudoClassSelector
child: SimpleSelector | ConcretePseudoClassSelector | DescendantSelector | SiblingSelector child: SimpleSelector | ConcretePseudoClassSelector | DescendantSelector | SiblingSelector
@ -149,7 +167,7 @@ class DescendantSelector:
return f"{self.parent!s}{symbol}{self.child!s}" return f"{self.parent!s}{symbol}{self.child!s}"
@dataclass(frozen=True) @dataclass
class SiblingSelector: class SiblingSelector:
sibling_selector: SimpleSelector | ConcretePseudoClassSelector sibling_selector: SimpleSelector | ConcretePseudoClassSelector
selector: SimpleSelector | ConcretePseudoClassSelector | DescendantSelector | SiblingSelector selector: SimpleSelector | ConcretePseudoClassSelector | DescendantSelector | SiblingSelector
@ -164,7 +182,7 @@ class SiblingSelector:
type NonMultiSelector = SimpleSelector | ConcretePseudoClassSelector | DescendantSelector | SiblingSelector type NonMultiSelector = SimpleSelector | ConcretePseudoClassSelector | DescendantSelector | SiblingSelector
@dataclass(frozen=True) @dataclass
class MultiSelector: class MultiSelector:
selectors: list[NonMultiSelector] selectors: list[NonMultiSelector]
@ -176,7 +194,7 @@ class MultiSelector:
type AnySelector = NonMultiSelector | MultiSelector type AnySelector = NonMultiSelector | MultiSelector
@dataclass(frozen=True) @dataclass
class NotPseudoClassSelector: class NotPseudoClassSelector:
selector: SimpleSelector | ConcretePseudoClassSelector | None selector: SimpleSelector | ConcretePseudoClassSelector | None
not_selector: AnySelector not_selector: AnySelector
@ -200,7 +218,7 @@ class NotPseudoClassSelector:
return f"{sel}:not({self.not_selector!s})" return f"{sel}:not({self.not_selector!s})"
@dataclass(frozen=True) @dataclass
class NthChildPseudoClassSelector: class NthChildPseudoClassSelector:
selector: SimpleSelector | ConcretePseudoClassSelector | None selector: SimpleSelector | ConcretePseudoClassSelector | None
n: int n: int
@ -320,14 +338,29 @@ def parse_tokens(tokens: TokenStream, root: bool = True) -> AnySelector:
break break
case TokenType.PSEUDO_ELEMENT: case TokenType.PSEUDO_ELEMENT:
raise NotImplementedError("The parser doesn't (yet) support pseudo-elements") raise NotImplementedError("The parser doesn't (yet) support pseudo-elements")
case TokenType.ID | TokenType.CLASS:
# it's possible for a pseudo-class to come before class/id, handle this
# if we see id/class tokens following anything else, that's an error though
if not isinstance(s, (NotPseudoClassSelector, NthChildPseudoClassSelector)):
raise InvalidSelectorError(f"Unexpected token while parsing selector: {tokens!r}")
# Walk all pseudo-classes in between until we get to the simple selector class
# which we're interested in
prev_sel: ConcretePseudoClassSelector = s
simple_sel: ConcretePseudoClassSelector | SimpleSelector | None = s
while not (isinstance(simple_sel, SimpleSelector) or simple_sel is None):
prev_sel = simple_sel
simple_sel = simple_sel.selector
# Attach the selector appropriately (either merging with existing one, or making
# it a new selector)
post_sel = SimpleSelector.parse_tokens(tokens)
if simple_sel is None:
prev_sel.selector = post_sel
else:
simple_sel.merge_selector(post_sel)
case ( case (
TokenType.TAG TokenType.TAG | TokenType.NUMBER | TokenType.LPARENS | TokenType.RPARENS | TokenType.UNKNOWN
| TokenType.CLASS
| TokenType.ID
| TokenType.NUMBER
| TokenType.LPARENS
| TokenType.RPARENS
| TokenType.UNKNOWN
): # we're using an exhaustive case to allow static analysis to catch any missing enum variants ): # we're using an exhaustive case to allow static analysis to catch any missing enum variants
raise InvalidSelectorError(f"Unexpected token while parsing selector: {tokens!r}") raise InvalidSelectorError(f"Unexpected token while parsing selector: {tokens!r}")

View file

@ -65,6 +65,14 @@ selector = st.recursive(
), ),
# Apply pseudo-suffix # Apply pseudo-suffix
st.tuples(s, pseudo_suffixes(s)).map("".join), st.tuples(s, pseudo_suffixes(s)).map("".join),
# Optionally add classes / id after the pseudo-suffix
st.one_of(
s, # no append, keep as-is
st.tuples(
s,
st.lists(st.one_of(css_class, css_id), min_size=1).map("".join),
).map("".join),
),
), ),
max_leaves=10, max_leaves=10,
) )

View file

@ -424,6 +424,14 @@ def test_parse_not_with_missing_argument() -> None:
_ = NotPseudoClassSelector.from_pseudo_cls(pseudo) _ = NotPseudoClassSelector.from_pseudo_cls(pseudo)
def test_parse_class_after_pseudo_class() -> None:
tokens = TokenStream([Token(TokenType.PSEUDO_CLASS, ":first-child"), Token(TokenType.CLASS, ".bar")])
sel = parse_tokens(tokens)
assert isinstance(sel, NthChildPseudoClassSelector)
assert isinstance(sel.selector, SimpleSelector)
assert sel.selector.classes == ["bar"]
def test_parse_multiple_combinators() -> None: def test_parse_multiple_combinators() -> None:
# div .parent > .child + .sibling:not(.bar):first-child # div .parent > .child + .sibling:not(.bar):first-child
tokens = TokenStream( tokens = TokenStream(

View file

@ -228,6 +228,7 @@ def test_sibling_selector(selector: str, expected: list[Node]) -> None:
("div:last-child", [SECOND_DIV]), ("div:last-child", [SECOND_DIV]),
("div.nonexistent:last-child", []), ("div.nonexistent:last-child", []),
(".colour-primary:first-child", [INNER_DIV, SECOND_P]), (".colour-primary:first-child", [INNER_DIV, SECOND_P]),
(":first-child.colour-primary", [INNER_DIV, SECOND_P]),
], ],
) )
def test_nth_child_selector(selector: str, expected: list[Node]) -> None: def test_nth_child_selector(selector: str, expected: list[Node]) -> None: