import pytest from hypothesis import given from hypothesis import strategies as st from src.tokenizer import Token, TokenStream, TokenType, tokenize_selector from tests.hypot import css_class, css_class_multi, css_id, css_tag, selector # region: Tokenization tests @pytest.mark.parametrize( ("input_str", "expected_type", "expected_value"), [ ("div", TokenType.TAG, "div"), ("#id", TokenType.ID, "#id"), (".class", TokenType.CLASS, ".class"), ("123", TokenType.NUMBER, "123"), (",", TokenType.COMMA, ", "), (">", TokenType.DIRECT_CHILD, " > "), (" ", TokenType.DESCENDANT, " "), (":hover", TokenType.PSEUDO_CLASS, ":hover"), ("(", TokenType.LPARENS, "("), (")", TokenType.RPARENS, ")"), ("::after", TokenType.PSEUDO_ELEMENT, "::after"), ("+", TokenType.ADJACENT_SIBLING, " + "), ("~", TokenType.SUBSEQUENT_SIBLING, " ~ "), ("$", TokenType.UNKNOWN, "$"), ], ) def test_individual_tokens(input_str: str, expected_type: TokenType, expected_value: str) -> None: """Test each token type in isolation.""" tokens = list(tokenize_selector(input_str)) assert len(tokens) == 1 token = tokens[0] assert token.type == expected_type assert token.value == expected_value @pytest.mark.parametrize( ("selector", "expected"), [ ("div.class", [TokenType.TAG, TokenType.CLASS]), ("div > .class", [TokenType.TAG, TokenType.DIRECT_CHILD, TokenType.CLASS]), ("div, span", [TokenType.TAG, TokenType.COMMA, TokenType.TAG]), ("a:b::c", [TokenType.TAG, TokenType.PSEUDO_CLASS, TokenType.PSEUDO_ELEMENT]), ("a + b", [TokenType.TAG, TokenType.ADJACENT_SIBLING, TokenType.TAG]), ("a ~ b", [TokenType.TAG, TokenType.SUBSEQUENT_SIBLING, TokenType.TAG]), ("div (", [TokenType.TAG, TokenType.DESCENDANT, TokenType.LPARENS]), ], ) def test_token_combinations(selector: str, expected: list[TokenType]) -> None: """Test combinations of tokens (not necessarily valid ones).""" tokens = list(tokenize_selector(selector)) assert [t.type for t in tokens] == expected def test_empty_string() -> None: """Test tokenizing empty string returns no tokens.""" tokens = list(tokenize_selector("")) assert len(tokens) == 0 @given(css_tag) def test_valid_tags(tag: str) -> None: """Test valid tag names.""" tokens = list(tokenize_selector(tag)) assert len(tokens) == 1 assert tokens[0].type == TokenType.TAG assert tokens[0].value == tag @given(css_id) def test_valid_ids(id_val: str) -> None: """Test valid ID values.""" tokens = list(tokenize_selector(id_val)) assert len(tokens) == 1 assert tokens[0].type == TokenType.ID assert tokens[0].value == id_val @given(css_class) def test_valid_class(val: str) -> None: """Test valid single class values.""" tokens = list(tokenize_selector(val)) assert len(tokens) == 1 assert tokens[0].type == TokenType.CLASS assert tokens[0].value == val @given(css_class_multi) def test_valid_class_multi(val: str) -> None: """Test valid multi class values.""" tokens = list(tokenize_selector(val)) assert all(tok.type == TokenType.CLASS for tok in tokens) @given(selector) def test_arbitrary_valid_selector(selector: str) -> None: """Ensure tokenizer can handle any valid selector string.""" tokens = list(tokenize_selector(selector)) tok_types = {tok.type for tok in tokens} assert TokenType.UNKNOWN not in tok_types @given(st.text()) def test_no_crashes_on_arbitrary_text(s: str) -> None: """Ensure tokenizer doesn't crash on any input. (We should instead handle this with unknown tokens.) """ _ = list(tokenize_selector(s)) # endregion # region: TokenStream tests def test_peek_and_pop() -> None: tokens = [Token(TokenType.TAG, "div"), Token(TokenType.CLASS, ".main")] stream = TokenStream(tokens) # Initial peek assert stream.peek() == tokens[0] # Pop moves the stream assert stream.pop() == tokens[0] assert stream.peek() == tokens[1] def test_peek_trusted() -> None: stream = TokenStream([Token(TokenType.TAG, "div")]) tok = stream.peek_trusted() assert tok.type == TokenType.TAG _ = stream.pop() with pytest.raises(AssertionError): _ = stream.peek_trusted() def test_has_more() -> None: stream = TokenStream([Token(TokenType.TAG, "div")]) assert stream.has_more() _ = stream.pop() assert not stream.has_more() def test_pop_exhausted_raises() -> None: stream = TokenStream([Token(TokenType.TAG, "div")]) _ = stream.pop() with pytest.raises(StopIteration): _ = stream.pop() def test_consume_while() -> None: tokens = [Token(TokenType.TAG, "div"), Token(TokenType.CLASS, ".main"), Token(TokenType.ID, "#id")] stream = TokenStream(tokens) # Consume until we see an ID token consumed = list(stream.consume_while(lambda t: t.type != TokenType.ID)) assert consumed == tokens[:2] assert stream.peek() == tokens[2] def test_consume_while_all() -> None: tokens = [Token(TokenType.TAG, "div"), Token(TokenType.CLASS, ".main"), Token(TokenType.ID, "#id")] stream = TokenStream(tokens) # Consume until we see an ID token consumed = list(stream.consume_while(lambda t: t.type != TokenType.LPARENS)) assert consumed == tokens assert stream.peek() is None def test_reset() -> None: tokens = [Token(TokenType.TAG, "div"), Token(TokenType.CLASS, ".main")] stream = TokenStream(tokens) _ = stream.pop() assert stream.peek() == tokens[1] stream.reset() assert stream.peek() == tokens[0] def test_str_and_raw_str() -> None: tokens = [ Token(TokenType.TAG, "div"), Token(TokenType.CLASS, ".main"), Token(TokenType.COMMA, ", "), Token(TokenType.TAG, "a"), ] stream = TokenStream(tokens) assert str(stream) == "div.main, a" assert stream.raw_str == "div.main, a" # endregion