Print top 50 URLs & their scores
This commit is contained in:
parent
e7f0b5ce4e
commit
726b60eb82
|
@ -1,12 +1,12 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import re
|
import re
|
||||||
from pprint import pprint
|
|
||||||
from time import perf_counter
|
from time import perf_counter
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from src.link_scraper import get_urlmap, standard_urlmap_exception_suppressor
|
from src.link_scraper import get_urlmap, standard_urlmap_exception_suppressor
|
||||||
from src.pagerank import pagerank, test_pagerank
|
from src.pagerank import pagerank, test_pagerank
|
||||||
|
from src.visualization import display_top_urls
|
||||||
|
|
||||||
URL = httpx.URL("https://ailab.fai.utb.cz")
|
URL = httpx.URL("https://ailab.fai.utb.cz")
|
||||||
ALLOWED_HOSTS_RE = re.compile(r"(?:.*\.)?utb\.cz")
|
ALLOWED_HOSTS_RE = re.compile(r"(?:.*\.)?utb\.cz")
|
||||||
|
@ -37,14 +37,12 @@ async def main() -> None:
|
||||||
start = perf_counter()
|
start = perf_counter()
|
||||||
|
|
||||||
ranking = pagerank(url_map)
|
ranking = pagerank(url_map)
|
||||||
sorted_urls = list(ranking.keys())
|
|
||||||
sorted_urls.sort(key=lambda url: ranking[url])
|
|
||||||
|
|
||||||
took = perf_counter() - start
|
took = perf_counter() - start
|
||||||
print(f"Took: {round(took, 2)} seconds")
|
print(f"Took: {round(took, 2)} seconds")
|
||||||
print("Done")
|
print("Done")
|
||||||
|
|
||||||
pprint(sorted_urls)
|
display_top_urls(ranking)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
28
src/visualization.py
Normal file
28
src/visualization.py
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
import httpx
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
|
|
||||||
|
def display_top_urls(ranking: dict[httpx.URL, float], top_n: int = 50) -> None:
|
||||||
|
"""Display a table of the top N URLs by rank score.
|
||||||
|
|
||||||
|
:param ranking: Dictionary containing URL rankings.
|
||||||
|
:param top_n: Number of top-ranked URLs to display.
|
||||||
|
"""
|
||||||
|
# Sort URLs by their rank score in descending order
|
||||||
|
sorted_urls = sorted(ranking.items(), key=lambda item: item[1], reverse=True)
|
||||||
|
|
||||||
|
# Initialize Rich table
|
||||||
|
table = Table(title=f"Top {top_n} URLs by PageRank Score")
|
||||||
|
table.add_column("Rank", style="bold green", justify="right")
|
||||||
|
table.add_column("URL", style="cyan")
|
||||||
|
table.add_column("PageRank Score", style="magenta", justify="right")
|
||||||
|
|
||||||
|
# Add rows to the table
|
||||||
|
for idx, (url, score) in enumerate(sorted_urls[:top_n], start=1):
|
||||||
|
table.add_row(str(idx), str(url), f"{score:.18f}")
|
||||||
|
|
||||||
|
# Render the table
|
||||||
|
console = Console()
|
||||||
|
console.print(table)
|
||||||
|
|
Loading…
Reference in a new issue