200 lines
5.9 KiB
Python
200 lines
5.9 KiB
Python
import pytest
|
|
from hypothesis import given
|
|
from hypothesis import strategies as st
|
|
|
|
from src.tokenizer import Token, TokenStream, TokenType, tokenize_selector
|
|
from tests.hypot import css_class, css_class_multi, css_id, css_tag, selector
|
|
|
|
# region: Tokenization tests
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("input_str", "expected_type", "expected_value"),
|
|
[
|
|
("div", TokenType.TAG, "div"),
|
|
("#id", TokenType.ID, "#id"),
|
|
(".class", TokenType.CLASS, ".class"),
|
|
("123", TokenType.NUMBER, "123"),
|
|
(",", TokenType.COMMA, ", "),
|
|
(">", TokenType.DIRECT_CHILD, " > "),
|
|
(" ", TokenType.DESCENDANT, " "),
|
|
(":hover", TokenType.PSEUDO_CLASS, ":hover"),
|
|
("(", TokenType.LPARENS, "("),
|
|
(")", TokenType.RPARENS, ")"),
|
|
("::after", TokenType.PSEUDO_ELEMENT, "::after"),
|
|
("+", TokenType.ADJACENT_SIBLING, " + "),
|
|
("~", TokenType.SUBSEQUENT_SIBLING, " ~ "),
|
|
("$", TokenType.UNKNOWN, "$"),
|
|
],
|
|
)
|
|
def test_individual_tokens(input_str: str, expected_type: TokenType, expected_value: str) -> None:
|
|
"""Test each token type in isolation."""
|
|
tokens = list(tokenize_selector(input_str))
|
|
assert len(tokens) == 1
|
|
token = tokens[0]
|
|
assert token.type == expected_type
|
|
assert token.value == expected_value
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("selector", "expected"),
|
|
[
|
|
("div.class", [TokenType.TAG, TokenType.CLASS]),
|
|
("div > .class", [TokenType.TAG, TokenType.DIRECT_CHILD, TokenType.CLASS]),
|
|
("div, span", [TokenType.TAG, TokenType.COMMA, TokenType.TAG]),
|
|
("a:b::c", [TokenType.TAG, TokenType.PSEUDO_CLASS, TokenType.PSEUDO_ELEMENT]),
|
|
("a + b", [TokenType.TAG, TokenType.ADJACENT_SIBLING, TokenType.TAG]),
|
|
("a ~ b", [TokenType.TAG, TokenType.SUBSEQUENT_SIBLING, TokenType.TAG]),
|
|
("div (", [TokenType.TAG, TokenType.DESCENDANT, TokenType.LPARENS]),
|
|
],
|
|
)
|
|
def test_token_combinations(selector: str, expected: list[TokenType]) -> None:
|
|
"""Test combinations of tokens (not necessarily valid ones)."""
|
|
tokens = list(tokenize_selector(selector))
|
|
assert [t.type for t in tokens] == expected
|
|
|
|
|
|
def test_empty_string() -> None:
|
|
"""Test tokenizing empty string returns no tokens."""
|
|
tokens = list(tokenize_selector(""))
|
|
assert len(tokens) == 0
|
|
|
|
|
|
@given(css_tag)
|
|
def test_valid_tags(tag: str) -> None:
|
|
"""Test valid tag names."""
|
|
tokens = list(tokenize_selector(tag))
|
|
assert len(tokens) == 1
|
|
assert tokens[0].type == TokenType.TAG
|
|
assert tokens[0].value == tag
|
|
|
|
|
|
@given(css_id)
|
|
def test_valid_ids(id_val: str) -> None:
|
|
"""Test valid ID values."""
|
|
tokens = list(tokenize_selector(id_val))
|
|
assert len(tokens) == 1
|
|
assert tokens[0].type == TokenType.ID
|
|
assert tokens[0].value == id_val
|
|
|
|
|
|
@given(css_class)
|
|
def test_valid_class(val: str) -> None:
|
|
"""Test valid single class values."""
|
|
tokens = list(tokenize_selector(val))
|
|
assert len(tokens) == 1
|
|
assert tokens[0].type == TokenType.CLASS
|
|
assert tokens[0].value == val
|
|
|
|
|
|
@given(css_class_multi)
|
|
def test_valid_class_multi(val: str) -> None:
|
|
"""Test valid multi class values."""
|
|
tokens = list(tokenize_selector(val))
|
|
assert all(tok.type == TokenType.CLASS for tok in tokens)
|
|
|
|
|
|
@given(selector)
|
|
def test_arbitrary_valid_selector(selector: str) -> None:
|
|
"""Ensure tokenizer can handle any valid selector string."""
|
|
tokens = list(tokenize_selector(selector))
|
|
tok_types = {tok.type for tok in tokens}
|
|
assert TokenType.UNKNOWN not in tok_types
|
|
|
|
|
|
@given(st.text())
|
|
def test_no_crashes_on_arbitrary_text(s: str) -> None:
|
|
"""Ensure tokenizer doesn't crash on any input.
|
|
|
|
(We should instead handle this with unknown tokens.)
|
|
"""
|
|
_ = list(tokenize_selector(s))
|
|
|
|
|
|
# endregion
|
|
# region: TokenStream tests
|
|
|
|
|
|
def test_peek_and_pop() -> None:
|
|
tokens = [Token(TokenType.TAG, "div"), Token(TokenType.CLASS, ".main")]
|
|
stream = TokenStream(tokens)
|
|
|
|
# Initial peek
|
|
assert stream.peek() == tokens[0]
|
|
|
|
# Pop moves the stream
|
|
assert stream.pop() == tokens[0]
|
|
assert stream.peek() == tokens[1]
|
|
|
|
|
|
def test_peek_trusted() -> None:
|
|
stream = TokenStream([Token(TokenType.TAG, "div")])
|
|
tok = stream.peek_trusted()
|
|
assert tok.type == TokenType.TAG
|
|
|
|
_ = stream.pop()
|
|
with pytest.raises(AssertionError):
|
|
_ = stream.peek_trusted()
|
|
|
|
|
|
def test_has_more() -> None:
|
|
stream = TokenStream([Token(TokenType.TAG, "div")])
|
|
assert stream.has_more()
|
|
_ = stream.pop()
|
|
assert not stream.has_more()
|
|
|
|
|
|
def test_pop_exhausted_raises() -> None:
|
|
stream = TokenStream([Token(TokenType.TAG, "div")])
|
|
_ = stream.pop()
|
|
with pytest.raises(StopIteration):
|
|
_ = stream.pop()
|
|
|
|
|
|
def test_consume_while() -> None:
|
|
tokens = [Token(TokenType.TAG, "div"), Token(TokenType.CLASS, ".main"), Token(TokenType.ID, "#id")]
|
|
stream = TokenStream(tokens)
|
|
|
|
# Consume until we see an ID token
|
|
consumed = list(stream.consume_while(lambda t: t.type != TokenType.ID))
|
|
|
|
assert consumed == tokens[:2]
|
|
assert stream.peek() == tokens[2]
|
|
|
|
|
|
def test_consume_while_all() -> None:
|
|
tokens = [Token(TokenType.TAG, "div"), Token(TokenType.CLASS, ".main"), Token(TokenType.ID, "#id")]
|
|
stream = TokenStream(tokens)
|
|
|
|
# Consume until we see an ID token
|
|
consumed = list(stream.consume_while(lambda t: t.type != TokenType.LPARENS))
|
|
|
|
assert consumed == tokens
|
|
assert stream.peek() is None
|
|
|
|
|
|
def test_reset() -> None:
|
|
tokens = [Token(TokenType.TAG, "div"), Token(TokenType.CLASS, ".main")]
|
|
stream = TokenStream(tokens)
|
|
|
|
_ = stream.pop()
|
|
assert stream.peek() == tokens[1]
|
|
|
|
stream.reset()
|
|
assert stream.peek() == tokens[0]
|
|
|
|
|
|
def test_str_and_raw_str() -> None:
|
|
tokens = [
|
|
Token(TokenType.TAG, "div"),
|
|
Token(TokenType.CLASS, ".main"),
|
|
Token(TokenType.COMMA, ", "),
|
|
Token(TokenType.TAG, "a"),
|
|
]
|
|
stream = TokenStream(tokens)
|
|
|
|
assert str(stream) == "div.main, a"
|
|
assert stream.raw_str == "div.main, a"
|
|
|
|
|
|
# endregion
|