Handle parsing errors

This commit is contained in:
Peter Vacho 2024-11-24 22:09:52 +01:00
parent 422b0d5880
commit 56947296b5
Signed by: school
GPG key ID: 8CFC3837052871B4

View file

@ -4,6 +4,7 @@ from urllib.parse import urljoin
import httpx
from bs4 import BeautifulSoup
from bs4.builder import ParserRejectedMarkup
from rich.console import Console
from rich.style import StyleType
from rich.text import Text
@ -66,6 +67,9 @@ def standard_urlmap_exception_suppressor(exc: Exception, url: httpx.URL) -> bool
print_exc("Got ", (exc.__class__.__qualname__, "red"), ", (", (str(exc), "orange"), ")")
return True
if isinstance(exc, ParserRejectedMarkup):
print_exc("Parsing failure: ", ("Invalid HTML", "red"))
return False