diff --git a/src/__main__.py b/src/__main__.py index 65c052c..88e4b1c 100644 --- a/src/__main__.py +++ b/src/__main__.py @@ -1,12 +1,12 @@ import asyncio import re -from pprint import pprint from time import perf_counter import httpx from src.link_scraper import get_urlmap, standard_urlmap_exception_suppressor from src.pagerank import pagerank, test_pagerank +from src.visualization import display_top_urls URL = httpx.URL("https://ailab.fai.utb.cz") ALLOWED_HOSTS_RE = re.compile(r"(?:.*\.)?utb\.cz") @@ -37,14 +37,12 @@ async def main() -> None: start = perf_counter() ranking = pagerank(url_map) - sorted_urls = list(ranking.keys()) - sorted_urls.sort(key=lambda url: ranking[url]) took = perf_counter() - start print(f"Took: {round(took, 2)} seconds") print("Done") - pprint(sorted_urls) + display_top_urls(ranking) if __name__ == "__main__": diff --git a/src/visualization.py b/src/visualization.py new file mode 100644 index 0000000..7068b07 --- /dev/null +++ b/src/visualization.py @@ -0,0 +1,28 @@ +import httpx +from rich.console import Console +from rich.table import Table + + +def display_top_urls(ranking: dict[httpx.URL, float], top_n: int = 50) -> None: + """Display a table of the top N URLs by rank score. + + :param ranking: Dictionary containing URL rankings. + :param top_n: Number of top-ranked URLs to display. + """ + # Sort URLs by their rank score in descending order + sorted_urls = sorted(ranking.items(), key=lambda item: item[1], reverse=True) + + # Initialize Rich table + table = Table(title=f"Top {top_n} URLs by PageRank Score") + table.add_column("Rank", style="bold green", justify="right") + table.add_column("URL", style="cyan") + table.add_column("PageRank Score", style="magenta", justify="right") + + # Add rows to the table + for idx, (url, score) in enumerate(sorted_urls[:top_n], start=1): + table.add_row(str(idx), str(url), f"{score:.18f}") + + # Render the table + console = Console() + console.print(table) +