Initial commit

This commit is contained in:
ItsDrike 2025-07-27 12:51:35 +02:00
commit 9fba4b3d34
Signed by: ItsDrike
GPG key ID: FA2745890B7048C0
17 changed files with 2939 additions and 0 deletions

474
tests/test_parser.py Normal file
View file

@ -0,0 +1,474 @@
from typing import cast
import pytest
from src.parser import (
DescendantSelector,
InvalidSelectorError,
MultiSelector,
NotPseudoClassSelector,
NthChildPseudoClassSelector,
PseudoClassSelector,
SiblingSelector,
SimpleSelector,
parse_tokens,
)
from src.tokenizer import Token, TokenStream, TokenType
def test_parse_simple_tag_class_id() -> None:
tokens = TokenStream(
[
Token(TokenType.TAG, "div"),
Token(TokenType.CLASS, ".foo"),
Token(TokenType.ID, "#bar"),
]
)
sel = SimpleSelector.parse_tokens(tokens)
assert sel.tag == "div"
assert sel.classes == ["foo"]
assert sel.ids == ["bar"]
def test_parse_simple_selector_id_only() -> None:
tokens = TokenStream([Token(TokenType.ID, "#foo")])
sel = SimpleSelector.parse_tokens(tokens)
assert sel.tag is None
assert sel.classes == []
assert sel.ids == ["foo"]
def test_parse_simple_selector_class_only() -> None:
tokens = TokenStream([Token(TokenType.CLASS, ".foo")])
sel = SimpleSelector.parse_tokens(tokens)
assert sel.tag is None
assert sel.classes == ["foo"]
assert sel.ids == []
def test_parse_simple_selector_multi_class_only() -> None:
tokens = TokenStream([Token(TokenType.CLASS, ".foo"), Token(TokenType.CLASS, ".bar")])
sel = SimpleSelector.parse_tokens(tokens)
assert sel.tag is None
assert sel.classes == ["foo", "bar"]
assert sel.ids == []
def test_parse_simple_selector_multiple_ids_warns() -> None:
tokens = TokenStream(
[
Token(TokenType.ID, "#one"),
Token(TokenType.ID, "#two"),
]
)
with pytest.warns(UserWarning, match="multiple IDs"):
sel = SimpleSelector.parse_tokens(tokens)
assert sel.ids == ["one", "two"]
def test_parse_simple_selector_invalid_double_tag_raises() -> None:
# This should be impossible to tokenize anyways, but still, let's
# make sure the parser correctly handles it with an exception
tokens = TokenStream(
[
Token(TokenType.TAG, "div"),
Token(TokenType.TAG, "span"),
]
)
with pytest.raises(InvalidSelectorError, match="multiple tag"):
_ = SimpleSelector.parse_tokens(tokens)
@pytest.mark.parametrize(
"extra_tokens",
[
[Token(TokenType.PSEUDO_CLASS, ":first-child")],
[
Token(TokenType.DESCENDANT, " "),
Token(TokenType.TAG, "p"),
],
[
Token(TokenType.COMMA, ", "),
Token(TokenType.TAG, "p"),
],
],
)
def test_parse_simple_tag_leaves_extra_tokens(extra_tokens: list[Token]) -> None:
tokens = TokenStream(
[
Token(TokenType.TAG, "div"),
Token(TokenType.CLASS, ".foo"),
Token(TokenType.ID, "#bar"),
*extra_tokens,
]
)
_ = SimpleSelector.parse_tokens(tokens)
assert tokens.peek() == extra_tokens[0]
def test_parse_descendant_selector() -> None:
tokens = TokenStream(
[
Token(TokenType.TAG, "div"),
Token(TokenType.DESCENDANT, " "),
Token(TokenType.CLASS, ".foo"),
]
)
sel = parse_tokens(tokens)
assert isinstance(sel, DescendantSelector)
assert isinstance(sel.parent, SimpleSelector)
assert sel.parent.tag == "div"
assert isinstance(sel.child, SimpleSelector)
assert sel.child.classes == ["foo"]
def test_parse_direct_child_selector() -> None:
tokens = TokenStream(
[
Token(TokenType.TAG, "div"),
Token(TokenType.DIRECT_CHILD, ">"),
Token(TokenType.CLASS, ".bar"),
]
)
sel = parse_tokens(tokens)
assert isinstance(sel, DescendantSelector)
assert sel.direct is True
assert isinstance(sel.parent, SimpleSelector)
assert sel.parent.tag == "div"
assert isinstance(sel.child, SimpleSelector)
assert sel.child.classes == ["bar"]
def test_parse_sibling_selector_adjacent() -> None:
tokens = TokenStream(
[
Token(TokenType.CLASS, ".a"),
Token(TokenType.ADJACENT_SIBLING, "+"),
Token(TokenType.CLASS, ".b"),
]
)
sel = parse_tokens(tokens)
assert isinstance(sel, SiblingSelector)
assert sel.is_adjacent is True
assert isinstance(sel.sibling_selector, SimpleSelector)
assert sel.sibling_selector.classes == ["a"]
assert isinstance(sel.selector, SimpleSelector)
assert sel.selector.classes == ["b"]
def test_parse_sibling_selector_subsequent() -> None:
tokens = TokenStream(
[
Token(TokenType.CLASS, ".a"),
Token(TokenType.SUBSEQUENT_SIBLING, "~"),
Token(TokenType.CLASS, ".b"),
]
)
sel = parse_tokens(tokens)
assert isinstance(sel, SiblingSelector)
assert sel.is_adjacent is False
assert isinstance(sel.sibling_selector, SimpleSelector)
assert sel.sibling_selector.classes == ["a"]
assert isinstance(sel.selector, SimpleSelector)
assert sel.selector.classes == ["b"]
def test_parse_multi_selector() -> None:
tokens = TokenStream(
[
Token(TokenType.CLASS, ".a"),
Token(TokenType.COMMA, ","),
Token(TokenType.CLASS, ".b"),
Token(TokenType.COMMA, ","),
Token(TokenType.CLASS, ".c"),
]
)
sel = parse_tokens(tokens)
assert isinstance(sel, MultiSelector)
assert len(sel.selectors) == 3
assert all(isinstance(subsel, SimpleSelector) for subsel in sel.selectors)
sels = cast("list[SimpleSelector]", sel.selectors)
assert sels[0].classes == ["a"]
assert sels[1].classes == ["b"]
assert sels[2].classes == ["c"]
def test_pseudo_class_without_arguments() -> None:
base = SimpleSelector(tag="div")
tokens = TokenStream([Token(TokenType.PSEUDO_CLASS, ":first-child")])
pseudo = PseudoClassSelector.parse_tokens(tokens, base)
assert pseudo.pseudo_class == "first-child"
assert pseudo.selector == base
assert pseudo.argument is None
@pytest.mark.parametrize(
"extra_tokens",
[
[Token(TokenType.PSEUDO_CLASS, ":first-child")],
[
Token(TokenType.DESCENDANT, " "),
Token(TokenType.TAG, "p"),
],
[
Token(TokenType.COMMA, ", "),
Token(TokenType.TAG, "p"),
],
],
)
def test_pseudo_class_without_arguments_leaves_extra_tokens(extra_tokens: list[Token]) -> None:
base = SimpleSelector(tag="div")
tokens = TokenStream(
[
Token(TokenType.PSEUDO_CLASS, ":first-child"),
*extra_tokens,
]
)
_ = PseudoClassSelector.parse_tokens(tokens, base)
assert tokens.peek() == extra_tokens[0]
def test_pseudo_class_with_nested_argument() -> None:
base = SimpleSelector(tag="div")
arg_tokens = [
Token(TokenType.TAG, "span"),
Token(TokenType.CLASS, ".foo"),
]
tokens = TokenStream(
[
Token(TokenType.PSEUDO_CLASS, ":not"),
Token(TokenType.LPARENS, "("),
*arg_tokens,
Token(TokenType.RPARENS, ")"),
]
)
pseudo = PseudoClassSelector.parse_tokens(tokens, base)
assert pseudo.pseudo_class == "not"
assert pseudo.selector == base
assert pseudo.argument == arg_tokens
def test_parse_pseudo_class_nested_parens() -> None:
base = SimpleSelector(tag="div")
arg_tokens = [
Token(TokenType.PSEUDO_CLASS, ":nth-child"),
Token(TokenType.LPARENS, "("),
Token(TokenType.NUMBER, "2"),
Token(TokenType.RPARENS, ")"),
]
tokens = TokenStream(
[
Token(TokenType.PSEUDO_CLASS, ":not"),
Token(TokenType.LPARENS, "("),
*arg_tokens,
Token(TokenType.RPARENS, ")"),
]
)
pseudo = PseudoClassSelector.parse_tokens(tokens, base)
assert pseudo.pseudo_class == "not"
assert pseudo.argument == arg_tokens
@pytest.mark.parametrize(
"extra_tokens",
[
[Token(TokenType.PSEUDO_CLASS, ":first-child")],
[
Token(TokenType.DESCENDANT, " "),
Token(TokenType.TAG, "p"),
],
[
Token(TokenType.COMMA, ", "),
Token(TokenType.TAG, "p"),
],
],
)
def test_pseudo_class_with_nested_argument_leaves_extra_tokens(extra_tokens: list[Token]) -> None:
base = SimpleSelector(tag="div")
arg_tokens = [
Token(TokenType.TAG, "span"),
Token(TokenType.CLASS, ".foo"),
]
tokens = TokenStream(
[
Token(TokenType.PSEUDO_CLASS, ":not"),
Token(TokenType.LPARENS, "("),
*arg_tokens,
Token(TokenType.RPARENS, ")"),
*extra_tokens,
]
)
_ = PseudoClassSelector.parse_tokens(tokens, base)
assert tokens.peek() == extra_tokens[0]
def test_pseudo_class_unbalanced_parens() -> None:
base = SimpleSelector(tag="div")
tokens = TokenStream(
[
Token(TokenType.PSEUDO_CLASS, ":not"),
Token(TokenType.LPARENS, "("),
Token(TokenType.TAG, "span"),
Token(TokenType.CLASS, ".foo"),
Token(TokenType.PSEUDO_CLASS, ":nth-child"),
Token(TokenType.LPARENS, "("),
Token(TokenType.NUMBER, "2"),
Token(TokenType.RPARENS, ")"),
]
)
with pytest.raises(InvalidSelectorError):
_ = PseudoClassSelector.parse_tokens(tokens, base)
def test_nth_child_valid() -> None:
base = SimpleSelector(tag="li")
arg_tokens = [Token(TokenType.NUMBER, "3")]
pseudo = PseudoClassSelector("nth-child", base, arg_tokens)
sel = NthChildPseudoClassSelector.from_pseudo_cls(pseudo)
assert sel.n == 3
assert sel.selector == base
@pytest.mark.parametrize(
"argument_tokens",
[
pytest.param([Token(TokenType.CLASS, ".bad")], id="bad-type"),
pytest.param(
[
Token(TokenType.TAG, "div"),
Token(TokenType.CLASS, ".main"),
],
id="multi-token",
),
pytest.param(
[
Token(TokenType.NUMBER, "5"),
Token(TokenType.TAG, "div"),
],
id="multi-token-number-first",
),
],
)
def test_nth_child_parsing_invalid_argument(argument_tokens: list[Token]) -> None:
base = SimpleSelector(tag="li")
pseudo = PseudoClassSelector("nth-child", base, argument_tokens)
with pytest.raises(InvalidSelectorError):
_ = NthChildPseudoClassSelector.from_pseudo_cls(pseudo)
def test_nth_child_parsing_missing_argument() -> None:
base = SimpleSelector(tag="li")
pseudo = PseudoClassSelector("nth-child", base, None)
with pytest.raises(InvalidSelectorError):
_ = NthChildPseudoClassSelector.from_pseudo_cls(pseudo)
@pytest.mark.parametrize(
("selector", "n"),
[
("first-child", 1),
("last-child", -1),
],
)
def test_specific_nth_child(selector: str, n: int) -> None:
base = SimpleSelector(tag="li")
pseudo = PseudoClassSelector(selector, base, None)
sel = NthChildPseudoClassSelector.from_pseudo_cls(pseudo)
assert sel.n == n
assert sel.selector == base
def test_specific_nth_child_with_argument() -> None:
base = SimpleSelector(tag="li")
arg_tokens = [Token(TokenType.NUMBER, "4")]
pseudo = PseudoClassSelector("first-child", base, arg_tokens)
with pytest.raises(InvalidSelectorError):
_ = NthChildPseudoClassSelector.from_pseudo_cls(pseudo)
def test_last_child_parsing() -> None:
base = SimpleSelector(tag="li")
pseudo = PseudoClassSelector("last-child", base, None)
sel = NthChildPseudoClassSelector.from_pseudo_cls(pseudo)
assert sel.n == -1
assert sel.selector == base
def test_parse_not_valid() -> None:
base = SimpleSelector(tag="div")
arg_tokens = [
Token(TokenType.TAG, "span"),
Token(TokenType.CLASS, ".foo"),
]
pseudo = PseudoClassSelector("not", base, arg_tokens)
sel = NotPseudoClassSelector.from_pseudo_cls(pseudo)
assert isinstance(sel.not_selector, SimpleSelector)
assert sel.not_selector.tag == "span"
assert sel.not_selector.classes == ["foo"]
assert sel.selector == base
def test_parse_not_with_missing_argument() -> None:
base = SimpleSelector(tag="div")
pseudo = PseudoClassSelector("not", base, None)
with pytest.raises(InvalidSelectorError):
_ = NotPseudoClassSelector.from_pseudo_cls(pseudo)
def test_parse_multiple_combinators() -> None:
# div .parent > .child + .sibling:not(.bar):first-child
tokens = TokenStream(
[
Token(TokenType.TAG, "div"),
Token(TokenType.DESCENDANT, " "),
Token(TokenType.CLASS, ".parent"),
Token(TokenType.DIRECT_CHILD, ">"),
Token(TokenType.CLASS, ".child"),
Token(TokenType.ADJACENT_SIBLING, "+"),
Token(TokenType.CLASS, ".sibling"),
Token(TokenType.PSEUDO_CLASS, ":not"),
Token(TokenType.LPARENS, "("),
Token(TokenType.CLASS, ".bar"),
Token(TokenType.RPARENS, ")"),
Token(TokenType.PSEUDO_CLASS, ":first-child"),
]
)
sel = parse_tokens(tokens)
assert isinstance(sel, DescendantSelector)
assert isinstance(sel.parent, SimpleSelector)
assert sel.parent.tag == "div"
assert isinstance(sel.child, DescendantSelector)
assert isinstance(sel.child.parent, SimpleSelector)
assert sel.child.parent.classes == ["parent"]
sibling = sel.child.child # .child + .sibling:not(.bar):first-child
assert isinstance(sibling, SiblingSelector)
assert sibling.is_adjacent is True
assert isinstance(sibling.sibling_selector, SimpleSelector)
assert sibling.sibling_selector.classes == ["child"]
nth_child = sibling.selector # .sibling:not(.bar):first-child
assert isinstance(nth_child, NthChildPseudoClassSelector)
assert nth_child.n == 1
not_selector = nth_child.selector # .sibling:not(.bar)
assert isinstance(not_selector, NotPseudoClassSelector)
assert isinstance(not_selector.not_selector, SimpleSelector)
assert not_selector.not_selector.classes == ["bar"]
assert isinstance(not_selector.selector, SimpleSelector)
assert not_selector.selector.classes == ["sibling"]