diff --git a/src/link_scraper.py b/src/link_scraper.py index e6a8fb6..5b189b1 100644 --- a/src/link_scraper.py +++ b/src/link_scraper.py @@ -4,6 +4,7 @@ from urllib.parse import urljoin import httpx from bs4 import BeautifulSoup +from bs4.builder import ParserRejectedMarkup from rich.console import Console from rich.style import StyleType from rich.text import Text @@ -66,6 +67,9 @@ def standard_urlmap_exception_suppressor(exc: Exception, url: httpx.URL) -> bool print_exc("Got ", (exc.__class__.__qualname__, "red"), ", (", (str(exc), "orange"), ")") return True + if isinstance(exc, ParserRejectedMarkup): + print_exc("Parsing failure: ", ("Invalid HTML", "red")) + return False