diff --git a/src/__main__.py b/src/__main__.py index 0637324..bffed95 100644 --- a/src/__main__.py +++ b/src/__main__.py @@ -1,4 +1,5 @@ import asyncio +import re from pprint import pprint import httpx @@ -6,12 +7,18 @@ import httpx from src.link_scraper import get_urlmap URL = httpx.URL("https://ailab.fai.utb.cz") +ALLOWED_HOSTS_RE = re.compile(r"(?:.*\.)?utb\.cz") async def main() -> None: """Program entrypoint.""" async with httpx.AsyncClient() as client: - url_map = await get_urlmap(client, URL, max_depth=2, filter_condition=lambda url: url.host == URL.host) + url_map = await get_urlmap( + client, + URL, + max_depth=5, + filter_condition=lambda url: ALLOWED_HOSTS_RE.fullmatch(url.host) is not None, + ) pprint(url_map)