from typing import cast import pytest from src.parser import ( DescendantSelector, InvalidSelectorError, MultiSelector, NotPseudoClassSelector, NthChildPseudoClassSelector, PseudoClassSelector, SiblingSelector, SimpleSelector, parse_tokens, ) from src.tokenizer import Token, TokenStream, TokenType def test_parse_simple_tag_class_id() -> None: tokens = TokenStream( [ Token(TokenType.TAG, "div"), Token(TokenType.CLASS, ".foo"), Token(TokenType.ID, "#bar"), ] ) sel = SimpleSelector.parse_tokens(tokens) assert sel.tag == "div" assert sel.classes == ["foo"] assert sel.ids == ["bar"] def test_parse_simple_selector_id_only() -> None: tokens = TokenStream([Token(TokenType.ID, "#foo")]) sel = SimpleSelector.parse_tokens(tokens) assert sel.tag is None assert sel.classes == [] assert sel.ids == ["foo"] def test_parse_simple_selector_class_only() -> None: tokens = TokenStream([Token(TokenType.CLASS, ".foo")]) sel = SimpleSelector.parse_tokens(tokens) assert sel.tag is None assert sel.classes == ["foo"] assert sel.ids == [] def test_parse_simple_selector_multi_class_only() -> None: tokens = TokenStream([Token(TokenType.CLASS, ".foo"), Token(TokenType.CLASS, ".bar")]) sel = SimpleSelector.parse_tokens(tokens) assert sel.tag is None assert sel.classes == ["foo", "bar"] assert sel.ids == [] def test_parse_simple_selector_multiple_ids_warns() -> None: tokens = TokenStream( [ Token(TokenType.ID, "#one"), Token(TokenType.ID, "#two"), ] ) with pytest.warns(UserWarning, match="multiple IDs"): sel = SimpleSelector.parse_tokens(tokens) assert sel.ids == ["one", "two"] def test_parse_simple_selector_invalid_double_tag_raises() -> None: # This should be impossible to tokenize anyways, but still, let's # make sure the parser correctly handles it with an exception tokens = TokenStream( [ Token(TokenType.TAG, "div"), Token(TokenType.TAG, "span"), ] ) with pytest.raises(InvalidSelectorError, match="multiple tag"): _ = SimpleSelector.parse_tokens(tokens) @pytest.mark.parametrize( "extra_tokens", [ [Token(TokenType.PSEUDO_CLASS, ":first-child")], [ Token(TokenType.DESCENDANT, " "), Token(TokenType.TAG, "p"), ], [ Token(TokenType.COMMA, ", "), Token(TokenType.TAG, "p"), ], ], ) def test_parse_simple_tag_leaves_extra_tokens(extra_tokens: list[Token]) -> None: tokens = TokenStream( [ Token(TokenType.TAG, "div"), Token(TokenType.CLASS, ".foo"), Token(TokenType.ID, "#bar"), *extra_tokens, ] ) _ = SimpleSelector.parse_tokens(tokens) assert tokens.peek() == extra_tokens[0] def test_parse_descendant_selector() -> None: tokens = TokenStream( [ Token(TokenType.TAG, "div"), Token(TokenType.DESCENDANT, " "), Token(TokenType.CLASS, ".foo"), ] ) sel = parse_tokens(tokens) assert isinstance(sel, DescendantSelector) assert isinstance(sel.parent, SimpleSelector) assert sel.parent.tag == "div" assert isinstance(sel.child, SimpleSelector) assert sel.child.classes == ["foo"] def test_parse_direct_child_selector() -> None: tokens = TokenStream( [ Token(TokenType.TAG, "div"), Token(TokenType.DIRECT_CHILD, ">"), Token(TokenType.CLASS, ".bar"), ] ) sel = parse_tokens(tokens) assert isinstance(sel, DescendantSelector) assert sel.direct is True assert isinstance(sel.parent, SimpleSelector) assert sel.parent.tag == "div" assert isinstance(sel.child, SimpleSelector) assert sel.child.classes == ["bar"] def test_parse_sibling_selector_adjacent() -> None: tokens = TokenStream( [ Token(TokenType.CLASS, ".a"), Token(TokenType.ADJACENT_SIBLING, "+"), Token(TokenType.CLASS, ".b"), ] ) sel = parse_tokens(tokens) assert isinstance(sel, SiblingSelector) assert sel.is_adjacent is True assert isinstance(sel.sibling_selector, SimpleSelector) assert sel.sibling_selector.classes == ["a"] assert isinstance(sel.selector, SimpleSelector) assert sel.selector.classes == ["b"] def test_parse_sibling_selector_subsequent() -> None: tokens = TokenStream( [ Token(TokenType.CLASS, ".a"), Token(TokenType.SUBSEQUENT_SIBLING, "~"), Token(TokenType.CLASS, ".b"), ] ) sel = parse_tokens(tokens) assert isinstance(sel, SiblingSelector) assert sel.is_adjacent is False assert isinstance(sel.sibling_selector, SimpleSelector) assert sel.sibling_selector.classes == ["a"] assert isinstance(sel.selector, SimpleSelector) assert sel.selector.classes == ["b"] def test_parse_multi_selector() -> None: tokens = TokenStream( [ Token(TokenType.CLASS, ".a"), Token(TokenType.COMMA, ","), Token(TokenType.CLASS, ".b"), Token(TokenType.COMMA, ","), Token(TokenType.CLASS, ".c"), ] ) sel = parse_tokens(tokens) assert isinstance(sel, MultiSelector) assert len(sel.selectors) == 3 assert all(isinstance(subsel, SimpleSelector) for subsel in sel.selectors) sels = cast("list[SimpleSelector]", sel.selectors) assert sels[0].classes == ["a"] assert sels[1].classes == ["b"] assert sels[2].classes == ["c"] def test_pseudo_class_without_arguments() -> None: base = SimpleSelector(tag="div") tokens = TokenStream([Token(TokenType.PSEUDO_CLASS, ":first-child")]) pseudo = PseudoClassSelector.parse_tokens(tokens, base) assert pseudo.pseudo_class == "first-child" assert pseudo.selector == base assert pseudo.argument is None @pytest.mark.parametrize( "extra_tokens", [ [Token(TokenType.PSEUDO_CLASS, ":first-child")], [ Token(TokenType.DESCENDANT, " "), Token(TokenType.TAG, "p"), ], [ Token(TokenType.COMMA, ", "), Token(TokenType.TAG, "p"), ], ], ) def test_pseudo_class_without_arguments_leaves_extra_tokens(extra_tokens: list[Token]) -> None: base = SimpleSelector(tag="div") tokens = TokenStream( [ Token(TokenType.PSEUDO_CLASS, ":first-child"), *extra_tokens, ] ) _ = PseudoClassSelector.parse_tokens(tokens, base) assert tokens.peek() == extra_tokens[0] def test_pseudo_class_with_nested_argument() -> None: base = SimpleSelector(tag="div") arg_tokens = [ Token(TokenType.TAG, "span"), Token(TokenType.CLASS, ".foo"), ] tokens = TokenStream( [ Token(TokenType.PSEUDO_CLASS, ":not"), Token(TokenType.LPARENS, "("), *arg_tokens, Token(TokenType.RPARENS, ")"), ] ) pseudo = PseudoClassSelector.parse_tokens(tokens, base) assert pseudo.pseudo_class == "not" assert pseudo.selector == base assert pseudo.argument == arg_tokens def test_parse_pseudo_class_nested_parens() -> None: base = SimpleSelector(tag="div") arg_tokens = [ Token(TokenType.PSEUDO_CLASS, ":nth-child"), Token(TokenType.LPARENS, "("), Token(TokenType.NUMBER, "2"), Token(TokenType.RPARENS, ")"), ] tokens = TokenStream( [ Token(TokenType.PSEUDO_CLASS, ":not"), Token(TokenType.LPARENS, "("), *arg_tokens, Token(TokenType.RPARENS, ")"), ] ) pseudo = PseudoClassSelector.parse_tokens(tokens, base) assert pseudo.pseudo_class == "not" assert pseudo.argument == arg_tokens @pytest.mark.parametrize( "extra_tokens", [ [Token(TokenType.PSEUDO_CLASS, ":first-child")], [ Token(TokenType.DESCENDANT, " "), Token(TokenType.TAG, "p"), ], [ Token(TokenType.COMMA, ", "), Token(TokenType.TAG, "p"), ], ], ) def test_pseudo_class_with_nested_argument_leaves_extra_tokens(extra_tokens: list[Token]) -> None: base = SimpleSelector(tag="div") arg_tokens = [ Token(TokenType.TAG, "span"), Token(TokenType.CLASS, ".foo"), ] tokens = TokenStream( [ Token(TokenType.PSEUDO_CLASS, ":not"), Token(TokenType.LPARENS, "("), *arg_tokens, Token(TokenType.RPARENS, ")"), *extra_tokens, ] ) _ = PseudoClassSelector.parse_tokens(tokens, base) assert tokens.peek() == extra_tokens[0] def test_pseudo_class_unbalanced_parens() -> None: base = SimpleSelector(tag="div") tokens = TokenStream( [ Token(TokenType.PSEUDO_CLASS, ":not"), Token(TokenType.LPARENS, "("), Token(TokenType.TAG, "span"), Token(TokenType.CLASS, ".foo"), Token(TokenType.PSEUDO_CLASS, ":nth-child"), Token(TokenType.LPARENS, "("), Token(TokenType.NUMBER, "2"), Token(TokenType.RPARENS, ")"), ] ) with pytest.raises(InvalidSelectorError): _ = PseudoClassSelector.parse_tokens(tokens, base) def test_nth_child_valid() -> None: base = SimpleSelector(tag="li") arg_tokens = [Token(TokenType.NUMBER, "3")] pseudo = PseudoClassSelector("nth-child", base, arg_tokens) sel = NthChildPseudoClassSelector.from_pseudo_cls(pseudo) assert sel.n == 3 assert sel.selector == base @pytest.mark.parametrize( "argument_tokens", [ pytest.param([Token(TokenType.CLASS, ".bad")], id="bad-type"), pytest.param( [ Token(TokenType.TAG, "div"), Token(TokenType.CLASS, ".main"), ], id="multi-token", ), pytest.param( [ Token(TokenType.NUMBER, "5"), Token(TokenType.TAG, "div"), ], id="multi-token-number-first", ), ], ) def test_nth_child_parsing_invalid_argument(argument_tokens: list[Token]) -> None: base = SimpleSelector(tag="li") pseudo = PseudoClassSelector("nth-child", base, argument_tokens) with pytest.raises(InvalidSelectorError): _ = NthChildPseudoClassSelector.from_pseudo_cls(pseudo) def test_nth_child_parsing_missing_argument() -> None: base = SimpleSelector(tag="li") pseudo = PseudoClassSelector("nth-child", base, None) with pytest.raises(InvalidSelectorError): _ = NthChildPseudoClassSelector.from_pseudo_cls(pseudo) @pytest.mark.parametrize( ("selector", "n"), [ ("first-child", 1), ("last-child", -1), ], ) def test_specific_nth_child(selector: str, n: int) -> None: base = SimpleSelector(tag="li") pseudo = PseudoClassSelector(selector, base, None) sel = NthChildPseudoClassSelector.from_pseudo_cls(pseudo) assert sel.n == n assert sel.selector == base def test_specific_nth_child_with_argument() -> None: base = SimpleSelector(tag="li") arg_tokens = [Token(TokenType.NUMBER, "4")] pseudo = PseudoClassSelector("first-child", base, arg_tokens) with pytest.raises(InvalidSelectorError): _ = NthChildPseudoClassSelector.from_pseudo_cls(pseudo) def test_last_child_parsing() -> None: base = SimpleSelector(tag="li") pseudo = PseudoClassSelector("last-child", base, None) sel = NthChildPseudoClassSelector.from_pseudo_cls(pseudo) assert sel.n == -1 assert sel.selector == base def test_parse_not_valid() -> None: base = SimpleSelector(tag="div") arg_tokens = [ Token(TokenType.TAG, "span"), Token(TokenType.CLASS, ".foo"), ] pseudo = PseudoClassSelector("not", base, arg_tokens) sel = NotPseudoClassSelector.from_pseudo_cls(pseudo) assert isinstance(sel.not_selector, SimpleSelector) assert sel.not_selector.tag == "span" assert sel.not_selector.classes == ["foo"] assert sel.selector == base def test_parse_not_with_missing_argument() -> None: base = SimpleSelector(tag="div") pseudo = PseudoClassSelector("not", base, None) with pytest.raises(InvalidSelectorError): _ = NotPseudoClassSelector.from_pseudo_cls(pseudo) def test_parse_multiple_combinators() -> None: # div .parent > .child + .sibling:not(.bar):first-child tokens = TokenStream( [ Token(TokenType.TAG, "div"), Token(TokenType.DESCENDANT, " "), Token(TokenType.CLASS, ".parent"), Token(TokenType.DIRECT_CHILD, ">"), Token(TokenType.CLASS, ".child"), Token(TokenType.ADJACENT_SIBLING, "+"), Token(TokenType.CLASS, ".sibling"), Token(TokenType.PSEUDO_CLASS, ":not"), Token(TokenType.LPARENS, "("), Token(TokenType.CLASS, ".bar"), Token(TokenType.RPARENS, ")"), Token(TokenType.PSEUDO_CLASS, ":first-child"), ] ) sel = parse_tokens(tokens) assert isinstance(sel, DescendantSelector) assert isinstance(sel.parent, SimpleSelector) assert sel.parent.tag == "div" assert isinstance(sel.child, DescendantSelector) assert isinstance(sel.child.parent, SimpleSelector) assert sel.child.parent.classes == ["parent"] sibling = sel.child.child # .child + .sibling:not(.bar):first-child assert isinstance(sibling, SiblingSelector) assert sibling.is_adjacent is True assert isinstance(sibling.sibling_selector, SimpleSelector) assert sibling.sibling_selector.classes == ["child"] nth_child = sibling.selector # .sibling:not(.bar):first-child assert isinstance(nth_child, NthChildPseudoClassSelector) assert nth_child.n == 1 not_selector = nth_child.selector # .sibling:not(.bar) assert isinstance(not_selector, NotPseudoClassSelector) assert isinstance(not_selector.not_selector, SimpleSelector) assert not_selector.not_selector.classes == ["bar"] assert isinstance(not_selector.selector, SimpleSelector) assert not_selector.selector.classes == ["sibling"]