Print top 50 URLs & their scores
This commit is contained in:
parent
e7f0b5ce4e
commit
726b60eb82
|
@ -1,12 +1,12 @@
|
|||
import asyncio
|
||||
import re
|
||||
from pprint import pprint
|
||||
from time import perf_counter
|
||||
|
||||
import httpx
|
||||
|
||||
from src.link_scraper import get_urlmap, standard_urlmap_exception_suppressor
|
||||
from src.pagerank import pagerank, test_pagerank
|
||||
from src.visualization import display_top_urls
|
||||
|
||||
URL = httpx.URL("https://ailab.fai.utb.cz")
|
||||
ALLOWED_HOSTS_RE = re.compile(r"(?:.*\.)?utb\.cz")
|
||||
|
@ -37,14 +37,12 @@ async def main() -> None:
|
|||
start = perf_counter()
|
||||
|
||||
ranking = pagerank(url_map)
|
||||
sorted_urls = list(ranking.keys())
|
||||
sorted_urls.sort(key=lambda url: ranking[url])
|
||||
|
||||
took = perf_counter() - start
|
||||
print(f"Took: {round(took, 2)} seconds")
|
||||
print("Done")
|
||||
|
||||
pprint(sorted_urls)
|
||||
display_top_urls(ranking)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
28
src/visualization.py
Normal file
28
src/visualization.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
import httpx
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
|
||||
def display_top_urls(ranking: dict[httpx.URL, float], top_n: int = 50) -> None:
|
||||
"""Display a table of the top N URLs by rank score.
|
||||
|
||||
:param ranking: Dictionary containing URL rankings.
|
||||
:param top_n: Number of top-ranked URLs to display.
|
||||
"""
|
||||
# Sort URLs by their rank score in descending order
|
||||
sorted_urls = sorted(ranking.items(), key=lambda item: item[1], reverse=True)
|
||||
|
||||
# Initialize Rich table
|
||||
table = Table(title=f"Top {top_n} URLs by PageRank Score")
|
||||
table.add_column("Rank", style="bold green", justify="right")
|
||||
table.add_column("URL", style="cyan")
|
||||
table.add_column("PageRank Score", style="magenta", justify="right")
|
||||
|
||||
# Add rows to the table
|
||||
for idx, (url, score) in enumerate(sorted_urls[:top_n], start=1):
|
||||
table.add_row(str(idx), str(url), f"{score:.18f}")
|
||||
|
||||
# Render the table
|
||||
console = Console()
|
||||
console.print(table)
|
||||
|
Loading…
Reference in a new issue