Print top 50 URLs & their scores

This commit is contained in:
Peter Vacho 2024-11-24 19:45:11 +01:00
parent e7f0b5ce4e
commit 726b60eb82
Signed by: school
GPG key ID: 8CFC3837052871B4
2 changed files with 30 additions and 4 deletions

View file

@ -1,12 +1,12 @@
import asyncio
import re
from pprint import pprint
from time import perf_counter
import httpx
from src.link_scraper import get_urlmap, standard_urlmap_exception_suppressor
from src.pagerank import pagerank, test_pagerank
from src.visualization import display_top_urls
URL = httpx.URL("https://ailab.fai.utb.cz")
ALLOWED_HOSTS_RE = re.compile(r"(?:.*\.)?utb\.cz")
@ -37,14 +37,12 @@ async def main() -> None:
start = perf_counter()
ranking = pagerank(url_map)
sorted_urls = list(ranking.keys())
sorted_urls.sort(key=lambda url: ranking[url])
took = perf_counter() - start
print(f"Took: {round(took, 2)} seconds")
print("Done")
pprint(sorted_urls)
display_top_urls(ranking)
if __name__ == "__main__":

28
src/visualization.py Normal file
View file

@ -0,0 +1,28 @@
import httpx
from rich.console import Console
from rich.table import Table
def display_top_urls(ranking: dict[httpx.URL, float], top_n: int = 50) -> None:
"""Display a table of the top N URLs by rank score.
:param ranking: Dictionary containing URL rankings.
:param top_n: Number of top-ranked URLs to display.
"""
# Sort URLs by their rank score in descending order
sorted_urls = sorted(ranking.items(), key=lambda item: item[1], reverse=True)
# Initialize Rich table
table = Table(title=f"Top {top_n} URLs by PageRank Score")
table.add_column("Rank", style="bold green", justify="right")
table.add_column("URL", style="cyan")
table.add_column("PageRank Score", style="magenta", justify="right")
# Add rows to the table
for idx, (url, score) in enumerate(sorted_urls[:top_n], start=1):
table.add_row(str(idx), str(url), f"{score:.18f}")
# Render the table
console = Console()
console.print(table)