Add caching support

This commit is contained in:
Peter Vacho 2024-11-24 19:53:08 +01:00
parent 299350a90a
commit bdb9529b77
Signed by: school
GPG key ID: 8CFC3837052871B4
2 changed files with 22 additions and 8 deletions

3
.gitignore vendored
View file

@ -1,3 +1,6 @@
# Cache files
url_map.pickle
# Output graphs # Output graphs
output/ output/

View file

@ -1,5 +1,7 @@
import asyncio import asyncio
import pickle
import re import re
from pathlib import Path
from time import perf_counter from time import perf_counter
import httpx import httpx
@ -10,6 +12,7 @@ from src.visualization import display_top_urls
URL = httpx.URL("https://ailab.fai.utb.cz") URL = httpx.URL("https://ailab.fai.utb.cz")
ALLOWED_HOSTS_RE = re.compile(r"(?:.*\.)?utb\.cz") ALLOWED_HOSTS_RE = re.compile(r"(?:.*\.)?utb\.cz")
CACHE_RESULTS_DIR = Path.cwd() / "url_map.pickle"
async def main() -> None: async def main() -> None:
@ -21,6 +24,11 @@ async def main() -> None:
print("Scraping...") print("Scraping...")
start = perf_counter() start = perf_counter()
if CACHE_RESULTS_DIR.exists():
print("> Using cached URLs")
with CACHE_RESULTS_DIR.open("rb") as fp:
url_map = pickle.load(fp) # noqa: S301
else:
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
url_map = await get_urlmap( url_map = await get_urlmap(
client, client,
@ -30,6 +38,9 @@ async def main() -> None:
suppress_exception=standard_urlmap_exception_suppressor, suppress_exception=standard_urlmap_exception_suppressor,
) )
with CACHE_RESULTS_DIR.open("wb") as fp:
pickle.dump(url_map, fp)
took = perf_counter() - start took = perf_counter() - start
print(f"Took: {round(took, 2)} seconds") print(f"Took: {round(took, 2)} seconds")