From 56947296b57ae8114355ae20d546e153af201306 Mon Sep 17 00:00:00 2001 From: Peter Vacho Date: Sun, 24 Nov 2024 22:09:52 +0100 Subject: [PATCH] Handle parsing errors --- src/link_scraper.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/link_scraper.py b/src/link_scraper.py index e6a8fb6..5b189b1 100644 --- a/src/link_scraper.py +++ b/src/link_scraper.py @@ -4,6 +4,7 @@ from urllib.parse import urljoin import httpx from bs4 import BeautifulSoup +from bs4.builder import ParserRejectedMarkup from rich.console import Console from rich.style import StyleType from rich.text import Text @@ -66,6 +67,9 @@ def standard_urlmap_exception_suppressor(exc: Exception, url: httpx.URL) -> bool print_exc("Got ", (exc.__class__.__qualname__, "red"), ", (", (str(exc), "orange"), ")") return True + if isinstance(exc, ParserRejectedMarkup): + print_exc("Parsing failure: ", ("Invalid HTML", "red")) + return False