lib/graphs: introduce PageRank computation on digraphs
[nit.git] / lib / graphs / pagerank.nit
1 # This file is part of NIT (http://www.nitlanguage.org).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Add PageRank computation for vertices in Digraph.
16 module pagerank
17
18 import graphs::digraph
19
20 redef class Digraph[V]
21
22 # Compute PageRank for each vertex
23 #
24 # Details of the algorithm can be found in:
25 # > L. Page, S. Brin, R. Motwani, and T.Winograd.
26 # > **The pagerank citation ranking: Bringing order to the web.**
27 # > *Technical report, Stanford Digital Library Technologies Project, 1998*
28 #
29 # Example:
30 # ~~~
31 # var g = new HashDigraph[String]
32 # g.add_arc("A", "B")
33 # g.add_arc("A", "C")
34 # g.add_arc("B", "C")
35 # g.add_arc("C", "A")
36 # g.add_arc("D", "C")
37 #
38 # assert g.pagerank.join(", ", ":") == "A:1.488, B:0.782, C:1.575, D:0.15"
39 # ~~~
40 fun pagerank: PRMap[V] do
41 # `d` constant such as `initial_pagerank(node) == (1 - d) != 0`
42 var d = 0.85 # commonly-choosen value
43 # Init each node page rank with an initial_value
44 var values = new PRMap[V]
45 var vertices = self.vertices
46 for v in vertices do values[v] = 1.0 - d
47 # Compute page rank until convergence
48 var prev = new PRMap[V]
49 while not values.is_approx(prev, 0.001) do
50 prev = new PRMap[V].from(values)
51 for v in vertices do
52 var in_pr = 0.0
53 for o in predecessors(v) do
54 in_pr += values[o] / out_degree(o).to_f
55 end
56 values[v] = (1.0 - d) + d * in_pr
57 end
58 end
59 return values
60 end
61 end
62
63 # Map each Vertice of a Digraph to it's PageRank.
64 #
65 # See: `Digraph::pagerank`.
66 class PRMap[V]
67 super HashMap[V, Float]
68
69 # Init `self` by copying `other` values.
70 init from(other: PRMap[V]) do
71 init
72 for k, v in other do self[k] = v
73 end
74
75 # Is `self` approximately equal to another PRMap?
76 #
77 # `self` is approximately equal to `o` if `o` contains all the key from `self`
78 # with the same values.
79 #
80 # Values equality is based on `Float::is_approx` with `precision`.
81 fun is_approx(o: SELF, precision: Float): Bool do
82 for k1, v1 in self do
83 if not o.has_key(k1) then return false
84 if not v1.is_approx(o[k1], precision) then return false
85 end
86 return true
87 end
88 end