From 422b0d5880b4db0a59eeb25635c3e2f670e59af9 Mon Sep 17 00:00:00 2001
From: Peter Vacho <p_vacho@utb.cz>
Date: Sun, 24 Nov 2024 22:09:24 +0100
Subject: [PATCH] Improve comments in pagerank algo

---
 src/pagerank.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/pagerank.py b/src/pagerank.py
index 0741834..3a39037 100644
--- a/src/pagerank.py
+++ b/src/pagerank.py
@@ -15,12 +15,12 @@ def pagerank[T](
     :param tol: Tolerance for the difference between iterations (convergence threshold).
     :return: A dictionary where the key is a URL and the value is its rank.
     """
-    # Get unique pages
+    # Step 1: Identify all unique pages
     pages = list(link_map.keys() | {link for links in link_map.values() for link in links})
     n = len(pages)
     page_indices = {page: i for i, page in enumerate(pages)}  # Map pages to indices
 
-    # Build the adjacency matrix
+    # Step 2: Construct the adjacency matrix (m) that represents the links between pages
     m = np.zeros((n, n))
     for src, targets in link_map.items():
         if not targets:
@@ -31,23 +31,27 @@ def pagerank[T](
             for target in targets:
                 m[page_indices[target], page_indices[src]] = 1 / len(targets)
 
-    # Create the E matrix
-    e = np.ones((n, n)) / n
+    # Step 3: Create the "E" matrix, which represents the random jump factor
+    # This represents the idea that a user can randomly jump to any page, with equal probability for each.
+    e = np.ones((n, n)) / n  # A matrix where each entry is 1/n (uniform probability for each page)
 
-    # Create the A matrix
+    # Step 4: Create the "A" matrix that combines the link structure (m) and the random jump factor (e)
+    # This matrix represents the full model combining the link structure and random jumps
     a = beta * m + (1 - beta) * e
 
-    # Initialize ranks (r(0))
-    ranks = np.ones(n) / n
+    # Step 5: Initialize the PageRank scores r(0).
+    ranks = np.ones(n) / n  # Start with a uniform rank for all pages
 
-    # Iteratively calculate PageRank
+    # Step 6: Iterate to update the PageRank scores
     for _ in range(max_iter):
-        new_ranks = a @ ranks  # r(t+1) = A . r(t)
+        new_ranks = a @ ranks  # Calculate new ranks based on the current ranks
         if np.linalg.norm(new_ranks - ranks, ord=1) < tol:  # Convergence check
+            # If the change is smaller than the tolerance, we stop
             break
         ranks = new_ranks
 
-    # Return ranks as {httpx.URL: rank}
+    # Step 7: Return the final PageRank scores
+    # Map the final ranks back to the original page names and return the result
     return {page: ranks[idx] for page, idx in page_indices.items()}