Use regex for filter condition
This commit is contained in:
parent
47c9a9f555
commit
7f9798ed28
|
@ -1,4 +1,5 @@
|
|||
import asyncio
|
||||
import re
|
||||
from pprint import pprint
|
||||
|
||||
import httpx
|
||||
|
@ -6,12 +7,18 @@ import httpx
|
|||
from src.link_scraper import get_urlmap
|
||||
|
||||
URL = httpx.URL("https://ailab.fai.utb.cz")
|
||||
ALLOWED_HOSTS_RE = re.compile(r"(?:.*\.)?utb\.cz")
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
"""Program entrypoint."""
|
||||
async with httpx.AsyncClient() as client:
|
||||
url_map = await get_urlmap(client, URL, max_depth=2, filter_condition=lambda url: url.host == URL.host)
|
||||
url_map = await get_urlmap(
|
||||
client,
|
||||
URL,
|
||||
max_depth=5,
|
||||
filter_condition=lambda url: ALLOWED_HOSTS_RE.fullmatch(url.host) is not None,
|
||||
)
|
||||
|
||||
pprint(url_map)
|
||||
|
||||
|
|
Loading…
Reference in a new issue