Handle parsing errors

This commit is contained in:
Peter Vacho 2024-11-24 22:09:52 +01:00
parent 422b0d5880
commit 56947296b5
Signed by: school
GPG key ID: 8CFC3837052871B4

View file

@ -4,6 +4,7 @@ from urllib.parse import urljoin
import httpx import httpx
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from bs4.builder import ParserRejectedMarkup
from rich.console import Console from rich.console import Console
from rich.style import StyleType from rich.style import StyleType
from rich.text import Text from rich.text import Text
@ -66,6 +67,9 @@ def standard_urlmap_exception_suppressor(exc: Exception, url: httpx.URL) -> bool
print_exc("Got ", (exc.__class__.__qualname__, "red"), ", (", (str(exc), "orange"), ")") print_exc("Got ", (exc.__class__.__qualname__, "red"), ", (", (str(exc), "orange"), ")")
return True return True
if isinstance(exc, ParserRejectedMarkup):
print_exc("Parsing failure: ", ("Invalid HTML", "red"))
return False return False