Initial commit
This commit is contained in:
commit
9fba4b3d34
17 changed files with 2939 additions and 0 deletions
200
tests/test_tokenizer.py
Normal file
200
tests/test_tokenizer.py
Normal file
|
@ -0,0 +1,200 @@
|
|||
import pytest
|
||||
from hypothesis import given
|
||||
from hypothesis import strategies as st
|
||||
|
||||
from src.tokenizer import Token, TokenStream, TokenType, tokenize_selector
|
||||
from tests.hypot import css_class, css_class_multi, css_id, css_tag, selector
|
||||
|
||||
# region: Tokenization tests
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("input_str", "expected_type", "expected_value"),
|
||||
[
|
||||
("div", TokenType.TAG, "div"),
|
||||
("#id", TokenType.ID, "#id"),
|
||||
(".class", TokenType.CLASS, ".class"),
|
||||
("123", TokenType.NUMBER, "123"),
|
||||
(",", TokenType.COMMA, ", "),
|
||||
(">", TokenType.DIRECT_CHILD, " > "),
|
||||
(" ", TokenType.DESCENDANT, " "),
|
||||
(":hover", TokenType.PSEUDO_CLASS, ":hover"),
|
||||
("(", TokenType.LPARENS, "("),
|
||||
(")", TokenType.RPARENS, ")"),
|
||||
("::after", TokenType.PSEUDO_ELEMENT, "::after"),
|
||||
("+", TokenType.ADJACENT_SIBLING, " + "),
|
||||
("~", TokenType.SUBSEQUENT_SIBLING, " ~ "),
|
||||
("$", TokenType.UNKNOWN, "$"),
|
||||
],
|
||||
)
|
||||
def test_individual_tokens(input_str: str, expected_type: TokenType, expected_value: str) -> None:
|
||||
"""Test each token type in isolation."""
|
||||
tokens = list(tokenize_selector(input_str))
|
||||
assert len(tokens) == 1
|
||||
token = tokens[0]
|
||||
assert token.type == expected_type
|
||||
assert token.value == expected_value
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("selector", "expected"),
|
||||
[
|
||||
("div.class", [TokenType.TAG, TokenType.CLASS]),
|
||||
("div > .class", [TokenType.TAG, TokenType.DIRECT_CHILD, TokenType.CLASS]),
|
||||
("div, span", [TokenType.TAG, TokenType.COMMA, TokenType.TAG]),
|
||||
("a:b::c", [TokenType.TAG, TokenType.PSEUDO_CLASS, TokenType.PSEUDO_ELEMENT]),
|
||||
("a + b", [TokenType.TAG, TokenType.ADJACENT_SIBLING, TokenType.TAG]),
|
||||
("a ~ b", [TokenType.TAG, TokenType.SUBSEQUENT_SIBLING, TokenType.TAG]),
|
||||
("div (", [TokenType.TAG, TokenType.DESCENDANT, TokenType.LPARENS]),
|
||||
],
|
||||
)
|
||||
def test_token_combinations(selector: str, expected: list[TokenType]) -> None:
|
||||
"""Test combinations of tokens (not necessarily valid ones)."""
|
||||
tokens = list(tokenize_selector(selector))
|
||||
assert [t.type for t in tokens] == expected
|
||||
|
||||
|
||||
def test_empty_string() -> None:
|
||||
"""Test tokenizing empty string returns no tokens."""
|
||||
tokens = list(tokenize_selector(""))
|
||||
assert len(tokens) == 0
|
||||
|
||||
|
||||
@given(css_tag)
|
||||
def test_valid_tags(tag: str) -> None:
|
||||
"""Test valid tag names."""
|
||||
tokens = list(tokenize_selector(tag))
|
||||
assert len(tokens) == 1
|
||||
assert tokens[0].type == TokenType.TAG
|
||||
assert tokens[0].value == tag
|
||||
|
||||
|
||||
@given(css_id)
|
||||
def test_valid_ids(id_val: str) -> None:
|
||||
"""Test valid ID values."""
|
||||
tokens = list(tokenize_selector(id_val))
|
||||
assert len(tokens) == 1
|
||||
assert tokens[0].type == TokenType.ID
|
||||
assert tokens[0].value == id_val
|
||||
|
||||
|
||||
@given(css_class)
|
||||
def test_valid_class(val: str) -> None:
|
||||
"""Test valid single class values."""
|
||||
tokens = list(tokenize_selector(val))
|
||||
assert len(tokens) == 1
|
||||
assert tokens[0].type == TokenType.CLASS
|
||||
assert tokens[0].value == val
|
||||
|
||||
|
||||
@given(css_class_multi)
|
||||
def test_valid_class_multi(val: str) -> None:
|
||||
"""Test valid multi class values."""
|
||||
tokens = list(tokenize_selector(val))
|
||||
assert all(tok.type == TokenType.CLASS for tok in tokens)
|
||||
|
||||
|
||||
@given(selector)
|
||||
def test_arbitrary_valid_selector(selector: str) -> None:
|
||||
"""Ensure tokenizer can handle any valid selector string."""
|
||||
tokens = list(tokenize_selector(selector))
|
||||
tok_types = {tok.type for tok in tokens}
|
||||
assert TokenType.UNKNOWN not in tok_types
|
||||
|
||||
|
||||
@given(st.text())
|
||||
def test_no_crashes_on_arbitrary_text(s: str) -> None:
|
||||
"""Ensure tokenizer doesn't crash on any input.
|
||||
|
||||
(We should instead handle this with unknown tokens.)
|
||||
"""
|
||||
_ = list(tokenize_selector(s))
|
||||
|
||||
|
||||
# endregion
|
||||
# region: TokenStream tests
|
||||
|
||||
|
||||
def test_peek_and_pop() -> None:
|
||||
tokens = [Token(TokenType.TAG, "div"), Token(TokenType.CLASS, ".main")]
|
||||
stream = TokenStream(tokens)
|
||||
|
||||
# Initial peek
|
||||
assert stream.peek() == tokens[0]
|
||||
|
||||
# Pop moves the stream
|
||||
assert stream.pop() == tokens[0]
|
||||
assert stream.peek() == tokens[1]
|
||||
|
||||
|
||||
def test_peek_trusted() -> None:
|
||||
stream = TokenStream([Token(TokenType.TAG, "div")])
|
||||
tok = stream.peek_trusted()
|
||||
assert tok.type == TokenType.TAG
|
||||
|
||||
_ = stream.pop()
|
||||
with pytest.raises(AssertionError):
|
||||
_ = stream.peek_trusted()
|
||||
|
||||
|
||||
def test_has_more() -> None:
|
||||
stream = TokenStream([Token(TokenType.TAG, "div")])
|
||||
assert stream.has_more()
|
||||
_ = stream.pop()
|
||||
assert not stream.has_more()
|
||||
|
||||
|
||||
def test_pop_exhausted_raises() -> None:
|
||||
stream = TokenStream([Token(TokenType.TAG, "div")])
|
||||
_ = stream.pop()
|
||||
with pytest.raises(StopIteration):
|
||||
_ = stream.pop()
|
||||
|
||||
|
||||
def test_consume_while() -> None:
|
||||
tokens = [Token(TokenType.TAG, "div"), Token(TokenType.CLASS, ".main"), Token(TokenType.ID, "#id")]
|
||||
stream = TokenStream(tokens)
|
||||
|
||||
# Consume until we see an ID token
|
||||
consumed = list(stream.consume_while(lambda t: t.type != TokenType.ID))
|
||||
|
||||
assert consumed == tokens[:2]
|
||||
assert stream.peek() == tokens[2]
|
||||
|
||||
|
||||
def test_consume_while_all() -> None:
|
||||
tokens = [Token(TokenType.TAG, "div"), Token(TokenType.CLASS, ".main"), Token(TokenType.ID, "#id")]
|
||||
stream = TokenStream(tokens)
|
||||
|
||||
# Consume until we see an ID token
|
||||
consumed = list(stream.consume_while(lambda t: t.type != TokenType.LPARENS))
|
||||
|
||||
assert consumed == tokens
|
||||
assert stream.peek() is None
|
||||
|
||||
|
||||
def test_reset() -> None:
|
||||
tokens = [Token(TokenType.TAG, "div"), Token(TokenType.CLASS, ".main")]
|
||||
stream = TokenStream(tokens)
|
||||
|
||||
_ = stream.pop()
|
||||
assert stream.peek() == tokens[1]
|
||||
|
||||
stream.reset()
|
||||
assert stream.peek() == tokens[0]
|
||||
|
||||
|
||||
def test_str_and_raw_str() -> None:
|
||||
tokens = [
|
||||
Token(TokenType.TAG, "div"),
|
||||
Token(TokenType.CLASS, ".main"),
|
||||
Token(TokenType.COMMA, ", "),
|
||||
Token(TokenType.TAG, "a"),
|
||||
]
|
||||
stream = TokenStream(tokens)
|
||||
|
||||
assert str(stream) == "div.main, a"
|
||||
assert stream.raw_str == "div.main, a"
|
||||
|
||||
|
||||
# endregion
|
Loading…
Add table
Add a link
Reference in a new issue