482 lines
14 KiB
Python
482 lines
14 KiB
Python
from typing import cast
|
|
|
|
import pytest
|
|
|
|
from src.parser import (
|
|
DescendantSelector,
|
|
InvalidSelectorError,
|
|
MultiSelector,
|
|
NotPseudoClassSelector,
|
|
NthChildPseudoClassSelector,
|
|
PseudoClassSelector,
|
|
SiblingSelector,
|
|
SimpleSelector,
|
|
parse_tokens,
|
|
)
|
|
from src.tokenizer import Token, TokenStream, TokenType
|
|
|
|
|
|
def test_parse_simple_tag_class_id() -> None:
|
|
tokens = TokenStream(
|
|
[
|
|
Token(TokenType.TAG, "div"),
|
|
Token(TokenType.CLASS, ".foo"),
|
|
Token(TokenType.ID, "#bar"),
|
|
]
|
|
)
|
|
sel = SimpleSelector.parse_tokens(tokens)
|
|
assert sel.tag == "div"
|
|
assert sel.classes == ["foo"]
|
|
assert sel.ids == ["bar"]
|
|
|
|
|
|
def test_parse_simple_selector_id_only() -> None:
|
|
tokens = TokenStream([Token(TokenType.ID, "#foo")])
|
|
sel = SimpleSelector.parse_tokens(tokens)
|
|
assert sel.tag is None
|
|
assert sel.classes == []
|
|
assert sel.ids == ["foo"]
|
|
|
|
|
|
def test_parse_simple_selector_class_only() -> None:
|
|
tokens = TokenStream([Token(TokenType.CLASS, ".foo")])
|
|
sel = SimpleSelector.parse_tokens(tokens)
|
|
assert sel.tag is None
|
|
assert sel.classes == ["foo"]
|
|
assert sel.ids == []
|
|
|
|
|
|
def test_parse_simple_selector_multi_class_only() -> None:
|
|
tokens = TokenStream([Token(TokenType.CLASS, ".foo"), Token(TokenType.CLASS, ".bar")])
|
|
sel = SimpleSelector.parse_tokens(tokens)
|
|
assert sel.tag is None
|
|
assert sel.classes == ["foo", "bar"]
|
|
assert sel.ids == []
|
|
|
|
|
|
def test_parse_simple_selector_multiple_ids_warns() -> None:
|
|
tokens = TokenStream(
|
|
[
|
|
Token(TokenType.ID, "#one"),
|
|
Token(TokenType.ID, "#two"),
|
|
]
|
|
)
|
|
with pytest.warns(UserWarning, match="multiple IDs"):
|
|
sel = SimpleSelector.parse_tokens(tokens)
|
|
assert sel.ids == ["one", "two"]
|
|
|
|
|
|
def test_parse_simple_selector_invalid_double_tag_raises() -> None:
|
|
# This should be impossible to tokenize anyways, but still, let's
|
|
# make sure the parser correctly handles it with an exception
|
|
tokens = TokenStream(
|
|
[
|
|
Token(TokenType.TAG, "div"),
|
|
Token(TokenType.TAG, "span"),
|
|
]
|
|
)
|
|
with pytest.raises(InvalidSelectorError, match="multiple tag"):
|
|
_ = SimpleSelector.parse_tokens(tokens)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"extra_tokens",
|
|
[
|
|
[Token(TokenType.PSEUDO_CLASS, ":first-child")],
|
|
[
|
|
Token(TokenType.DESCENDANT, " "),
|
|
Token(TokenType.TAG, "p"),
|
|
],
|
|
[
|
|
Token(TokenType.COMMA, ", "),
|
|
Token(TokenType.TAG, "p"),
|
|
],
|
|
],
|
|
)
|
|
def test_parse_simple_tag_leaves_extra_tokens(extra_tokens: list[Token]) -> None:
|
|
tokens = TokenStream(
|
|
[
|
|
Token(TokenType.TAG, "div"),
|
|
Token(TokenType.CLASS, ".foo"),
|
|
Token(TokenType.ID, "#bar"),
|
|
*extra_tokens,
|
|
]
|
|
)
|
|
_ = SimpleSelector.parse_tokens(tokens)
|
|
|
|
assert tokens.peek() == extra_tokens[0]
|
|
|
|
|
|
def test_parse_descendant_selector() -> None:
|
|
tokens = TokenStream(
|
|
[
|
|
Token(TokenType.TAG, "div"),
|
|
Token(TokenType.DESCENDANT, " "),
|
|
Token(TokenType.CLASS, ".foo"),
|
|
]
|
|
)
|
|
sel = parse_tokens(tokens)
|
|
assert isinstance(sel, DescendantSelector)
|
|
assert isinstance(sel.parent, SimpleSelector)
|
|
assert sel.parent.tag == "div"
|
|
assert isinstance(sel.child, SimpleSelector)
|
|
assert sel.child.classes == ["foo"]
|
|
|
|
|
|
def test_parse_direct_child_selector() -> None:
|
|
tokens = TokenStream(
|
|
[
|
|
Token(TokenType.TAG, "div"),
|
|
Token(TokenType.DIRECT_CHILD, ">"),
|
|
Token(TokenType.CLASS, ".bar"),
|
|
]
|
|
)
|
|
sel = parse_tokens(tokens)
|
|
assert isinstance(sel, DescendantSelector)
|
|
assert sel.direct is True
|
|
assert isinstance(sel.parent, SimpleSelector)
|
|
assert sel.parent.tag == "div"
|
|
assert isinstance(sel.child, SimpleSelector)
|
|
assert sel.child.classes == ["bar"]
|
|
|
|
|
|
def test_parse_sibling_selector_adjacent() -> None:
|
|
tokens = TokenStream(
|
|
[
|
|
Token(TokenType.CLASS, ".a"),
|
|
Token(TokenType.ADJACENT_SIBLING, "+"),
|
|
Token(TokenType.CLASS, ".b"),
|
|
]
|
|
)
|
|
sel = parse_tokens(tokens)
|
|
assert isinstance(sel, SiblingSelector)
|
|
assert sel.is_adjacent is True
|
|
assert isinstance(sel.sibling_selector, SimpleSelector)
|
|
assert sel.sibling_selector.classes == ["a"]
|
|
assert isinstance(sel.selector, SimpleSelector)
|
|
assert sel.selector.classes == ["b"]
|
|
|
|
|
|
def test_parse_sibling_selector_subsequent() -> None:
|
|
tokens = TokenStream(
|
|
[
|
|
Token(TokenType.CLASS, ".a"),
|
|
Token(TokenType.SUBSEQUENT_SIBLING, "~"),
|
|
Token(TokenType.CLASS, ".b"),
|
|
]
|
|
)
|
|
sel = parse_tokens(tokens)
|
|
assert isinstance(sel, SiblingSelector)
|
|
assert sel.is_adjacent is False
|
|
assert isinstance(sel.sibling_selector, SimpleSelector)
|
|
assert sel.sibling_selector.classes == ["a"]
|
|
assert isinstance(sel.selector, SimpleSelector)
|
|
assert sel.selector.classes == ["b"]
|
|
|
|
|
|
def test_parse_multi_selector() -> None:
|
|
tokens = TokenStream(
|
|
[
|
|
Token(TokenType.CLASS, ".a"),
|
|
Token(TokenType.COMMA, ","),
|
|
Token(TokenType.CLASS, ".b"),
|
|
Token(TokenType.COMMA, ","),
|
|
Token(TokenType.CLASS, ".c"),
|
|
]
|
|
)
|
|
sel = parse_tokens(tokens)
|
|
assert isinstance(sel, MultiSelector)
|
|
assert len(sel.selectors) == 3
|
|
assert all(isinstance(subsel, SimpleSelector) for subsel in sel.selectors)
|
|
sels = cast("list[SimpleSelector]", sel.selectors)
|
|
assert sels[0].classes == ["a"]
|
|
assert sels[1].classes == ["b"]
|
|
assert sels[2].classes == ["c"]
|
|
|
|
|
|
def test_pseudo_class_without_arguments() -> None:
|
|
base = SimpleSelector(tag="div")
|
|
tokens = TokenStream([Token(TokenType.PSEUDO_CLASS, ":first-child")])
|
|
pseudo = PseudoClassSelector.parse_tokens(tokens, base)
|
|
assert pseudo.pseudo_class == "first-child"
|
|
assert pseudo.selector == base
|
|
assert pseudo.argument is None
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"extra_tokens",
|
|
[
|
|
[Token(TokenType.PSEUDO_CLASS, ":first-child")],
|
|
[
|
|
Token(TokenType.DESCENDANT, " "),
|
|
Token(TokenType.TAG, "p"),
|
|
],
|
|
[
|
|
Token(TokenType.COMMA, ", "),
|
|
Token(TokenType.TAG, "p"),
|
|
],
|
|
],
|
|
)
|
|
def test_pseudo_class_without_arguments_leaves_extra_tokens(extra_tokens: list[Token]) -> None:
|
|
base = SimpleSelector(tag="div")
|
|
tokens = TokenStream(
|
|
[
|
|
Token(TokenType.PSEUDO_CLASS, ":first-child"),
|
|
*extra_tokens,
|
|
]
|
|
)
|
|
_ = PseudoClassSelector.parse_tokens(tokens, base)
|
|
assert tokens.peek() == extra_tokens[0]
|
|
|
|
|
|
def test_pseudo_class_with_nested_argument() -> None:
|
|
base = SimpleSelector(tag="div")
|
|
arg_tokens = [
|
|
Token(TokenType.TAG, "span"),
|
|
Token(TokenType.CLASS, ".foo"),
|
|
]
|
|
tokens = TokenStream(
|
|
[
|
|
Token(TokenType.PSEUDO_CLASS, ":not"),
|
|
Token(TokenType.LPARENS, "("),
|
|
*arg_tokens,
|
|
Token(TokenType.RPARENS, ")"),
|
|
]
|
|
)
|
|
pseudo = PseudoClassSelector.parse_tokens(tokens, base)
|
|
assert pseudo.pseudo_class == "not"
|
|
assert pseudo.selector == base
|
|
assert pseudo.argument == arg_tokens
|
|
|
|
|
|
def test_parse_pseudo_class_nested_parens() -> None:
|
|
base = SimpleSelector(tag="div")
|
|
arg_tokens = [
|
|
Token(TokenType.PSEUDO_CLASS, ":nth-child"),
|
|
Token(TokenType.LPARENS, "("),
|
|
Token(TokenType.NUMBER, "2"),
|
|
Token(TokenType.RPARENS, ")"),
|
|
]
|
|
tokens = TokenStream(
|
|
[
|
|
Token(TokenType.PSEUDO_CLASS, ":not"),
|
|
Token(TokenType.LPARENS, "("),
|
|
*arg_tokens,
|
|
Token(TokenType.RPARENS, ")"),
|
|
]
|
|
)
|
|
pseudo = PseudoClassSelector.parse_tokens(tokens, base)
|
|
assert pseudo.pseudo_class == "not"
|
|
assert pseudo.argument == arg_tokens
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"extra_tokens",
|
|
[
|
|
[Token(TokenType.PSEUDO_CLASS, ":first-child")],
|
|
[
|
|
Token(TokenType.DESCENDANT, " "),
|
|
Token(TokenType.TAG, "p"),
|
|
],
|
|
[
|
|
Token(TokenType.COMMA, ", "),
|
|
Token(TokenType.TAG, "p"),
|
|
],
|
|
],
|
|
)
|
|
def test_pseudo_class_with_nested_argument_leaves_extra_tokens(extra_tokens: list[Token]) -> None:
|
|
base = SimpleSelector(tag="div")
|
|
arg_tokens = [
|
|
Token(TokenType.TAG, "span"),
|
|
Token(TokenType.CLASS, ".foo"),
|
|
]
|
|
tokens = TokenStream(
|
|
[
|
|
Token(TokenType.PSEUDO_CLASS, ":not"),
|
|
Token(TokenType.LPARENS, "("),
|
|
*arg_tokens,
|
|
Token(TokenType.RPARENS, ")"),
|
|
*extra_tokens,
|
|
]
|
|
)
|
|
_ = PseudoClassSelector.parse_tokens(tokens, base)
|
|
assert tokens.peek() == extra_tokens[0]
|
|
|
|
|
|
def test_pseudo_class_unbalanced_parens() -> None:
|
|
base = SimpleSelector(tag="div")
|
|
tokens = TokenStream(
|
|
[
|
|
Token(TokenType.PSEUDO_CLASS, ":not"),
|
|
Token(TokenType.LPARENS, "("),
|
|
Token(TokenType.TAG, "span"),
|
|
Token(TokenType.CLASS, ".foo"),
|
|
Token(TokenType.PSEUDO_CLASS, ":nth-child"),
|
|
Token(TokenType.LPARENS, "("),
|
|
Token(TokenType.NUMBER, "2"),
|
|
Token(TokenType.RPARENS, ")"),
|
|
]
|
|
)
|
|
with pytest.raises(InvalidSelectorError):
|
|
_ = PseudoClassSelector.parse_tokens(tokens, base)
|
|
|
|
|
|
def test_nth_child_valid() -> None:
|
|
base = SimpleSelector(tag="li")
|
|
arg_tokens = [Token(TokenType.NUMBER, "3")]
|
|
pseudo = PseudoClassSelector("nth-child", base, arg_tokens)
|
|
|
|
sel = NthChildPseudoClassSelector.from_pseudo_cls(pseudo)
|
|
assert sel.n == 3
|
|
assert sel.selector == base
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"argument_tokens",
|
|
[
|
|
pytest.param([Token(TokenType.CLASS, ".bad")], id="bad-type"),
|
|
pytest.param(
|
|
[
|
|
Token(TokenType.TAG, "div"),
|
|
Token(TokenType.CLASS, ".main"),
|
|
],
|
|
id="multi-token",
|
|
),
|
|
pytest.param(
|
|
[
|
|
Token(TokenType.NUMBER, "5"),
|
|
Token(TokenType.TAG, "div"),
|
|
],
|
|
id="multi-token-number-first",
|
|
),
|
|
],
|
|
)
|
|
def test_nth_child_parsing_invalid_argument(argument_tokens: list[Token]) -> None:
|
|
base = SimpleSelector(tag="li")
|
|
pseudo = PseudoClassSelector("nth-child", base, argument_tokens)
|
|
|
|
with pytest.raises(InvalidSelectorError):
|
|
_ = NthChildPseudoClassSelector.from_pseudo_cls(pseudo)
|
|
|
|
|
|
def test_nth_child_parsing_missing_argument() -> None:
|
|
base = SimpleSelector(tag="li")
|
|
pseudo = PseudoClassSelector("nth-child", base, None)
|
|
|
|
with pytest.raises(InvalidSelectorError):
|
|
_ = NthChildPseudoClassSelector.from_pseudo_cls(pseudo)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("selector", "n"),
|
|
[
|
|
("first-child", 1),
|
|
("last-child", -1),
|
|
],
|
|
)
|
|
def test_specific_nth_child(selector: str, n: int) -> None:
|
|
base = SimpleSelector(tag="li")
|
|
pseudo = PseudoClassSelector(selector, base, None)
|
|
|
|
sel = NthChildPseudoClassSelector.from_pseudo_cls(pseudo)
|
|
assert sel.n == n
|
|
assert sel.selector == base
|
|
|
|
|
|
def test_specific_nth_child_with_argument() -> None:
|
|
base = SimpleSelector(tag="li")
|
|
arg_tokens = [Token(TokenType.NUMBER, "4")]
|
|
pseudo = PseudoClassSelector("first-child", base, arg_tokens)
|
|
|
|
with pytest.raises(InvalidSelectorError):
|
|
_ = NthChildPseudoClassSelector.from_pseudo_cls(pseudo)
|
|
|
|
|
|
def test_last_child_parsing() -> None:
|
|
base = SimpleSelector(tag="li")
|
|
pseudo = PseudoClassSelector("last-child", base, None)
|
|
|
|
sel = NthChildPseudoClassSelector.from_pseudo_cls(pseudo)
|
|
assert sel.n == -1
|
|
assert sel.selector == base
|
|
|
|
|
|
def test_parse_not_valid() -> None:
|
|
base = SimpleSelector(tag="div")
|
|
arg_tokens = [
|
|
Token(TokenType.TAG, "span"),
|
|
Token(TokenType.CLASS, ".foo"),
|
|
]
|
|
pseudo = PseudoClassSelector("not", base, arg_tokens)
|
|
|
|
sel = NotPseudoClassSelector.from_pseudo_cls(pseudo)
|
|
assert isinstance(sel.not_selector, SimpleSelector)
|
|
assert sel.not_selector.tag == "span"
|
|
assert sel.not_selector.classes == ["foo"]
|
|
assert sel.selector == base
|
|
|
|
|
|
def test_parse_not_with_missing_argument() -> None:
|
|
base = SimpleSelector(tag="div")
|
|
pseudo = PseudoClassSelector("not", base, None)
|
|
|
|
with pytest.raises(InvalidSelectorError):
|
|
_ = NotPseudoClassSelector.from_pseudo_cls(pseudo)
|
|
|
|
|
|
def test_parse_class_after_pseudo_class() -> None:
|
|
tokens = TokenStream([Token(TokenType.PSEUDO_CLASS, ":first-child"), Token(TokenType.CLASS, ".bar")])
|
|
sel = parse_tokens(tokens)
|
|
assert isinstance(sel, NthChildPseudoClassSelector)
|
|
assert isinstance(sel.selector, SimpleSelector)
|
|
assert sel.selector.classes == ["bar"]
|
|
|
|
|
|
def test_parse_multiple_combinators() -> None:
|
|
# div .parent > .child + .sibling:not(.bar):first-child
|
|
tokens = TokenStream(
|
|
[
|
|
Token(TokenType.TAG, "div"),
|
|
Token(TokenType.DESCENDANT, " "),
|
|
Token(TokenType.CLASS, ".parent"),
|
|
Token(TokenType.DIRECT_CHILD, ">"),
|
|
Token(TokenType.CLASS, ".child"),
|
|
Token(TokenType.ADJACENT_SIBLING, "+"),
|
|
Token(TokenType.CLASS, ".sibling"),
|
|
Token(TokenType.PSEUDO_CLASS, ":not"),
|
|
Token(TokenType.LPARENS, "("),
|
|
Token(TokenType.CLASS, ".bar"),
|
|
Token(TokenType.RPARENS, ")"),
|
|
Token(TokenType.PSEUDO_CLASS, ":first-child"),
|
|
]
|
|
)
|
|
sel = parse_tokens(tokens)
|
|
assert isinstance(sel, DescendantSelector)
|
|
|
|
assert isinstance(sel.parent, SimpleSelector)
|
|
assert sel.parent.tag == "div"
|
|
|
|
assert isinstance(sel.child, DescendantSelector)
|
|
|
|
assert isinstance(sel.child.parent, SimpleSelector)
|
|
assert sel.child.parent.classes == ["parent"]
|
|
|
|
sibling = sel.child.child # .child + .sibling:not(.bar):first-child
|
|
assert isinstance(sibling, SiblingSelector)
|
|
assert sibling.is_adjacent is True
|
|
|
|
assert isinstance(sibling.sibling_selector, SimpleSelector)
|
|
assert sibling.sibling_selector.classes == ["child"]
|
|
|
|
nth_child = sibling.selector # .sibling:not(.bar):first-child
|
|
assert isinstance(nth_child, NthChildPseudoClassSelector)
|
|
assert nth_child.n == 1
|
|
|
|
not_selector = nth_child.selector # .sibling:not(.bar)
|
|
assert isinstance(not_selector, NotPseudoClassSelector)
|
|
|
|
assert isinstance(not_selector.not_selector, SimpleSelector)
|
|
assert not_selector.not_selector.classes == ["bar"]
|
|
|
|
assert isinstance(not_selector.selector, SimpleSelector)
|
|
assert not_selector.selector.classes == ["sibling"]
|