From: Jean Privat Date: Fri, 16 Oct 2015 12:19:39 +0000 (-0400) Subject: Merge: lib/graphs: introduce PageRank computation on digraphs X-Git-Tag: v0.7.9~36 X-Git-Url: http://nitlanguage.org?hp=6064cf2ff603453cb6a61adecfdda2e0c1f31cf6 Merge: lib/graphs: introduce PageRank computation on digraphs Implement famous Google's `PageRank` algorithm for directed graphs. Ping @ablondin Pull-Request: #1706 Reviewed-by: Jean Privat Reviewed-by: Alexandre Blondin Massé --- diff --git a/lib/graphs/digraph.nit b/lib/graphs/digraph.nit index 8f45495..bc9a8af 100644 --- a/lib/graphs/digraph.nit +++ b/lib/graphs/digraph.nit @@ -117,7 +117,7 @@ # g.add_arcs([[1,2],[2,1],[2,3],[3,4],[4,5],[5,3]]) # for component in g.strongly_connected_components.to_partitions # do -# print component +# print component # end # # Prints [1,2] and [3,4,5] # ~~~ @@ -291,7 +291,7 @@ interface Digraph[V: Object] # g.add_arc(0, 2) # g.add_arc(1, 2) # for arc in g.arcs_iterator do - # assert g.has_arc(arc[0], arc[1]) + # assert g.has_arc(arc[0], arc[1]) # end # ~~~ fun arcs_iterator: Iterator[Array[V]] do return new ArcsIterator[V](self) @@ -317,7 +317,7 @@ interface Digraph[V: Object] # g.add_arc(1, 3) # g.add_arc(2, 3) # for arc in g.incoming_arcs(3) do - # assert g.is_predecessor(arc[0], arc[1]) + # assert g.is_predecessor(arc[0], arc[1]) # end # ~~~ fun incoming_arcs(u: V): Collection[Array[V]] @@ -340,7 +340,7 @@ interface Digraph[V: Object] # g.add_arc(2, 3) # g.add_arc(1, 2) # for arc in g.outgoing_arcs(1) do - # assert g.is_successor(arc[1], arc[0]) + # assert g.is_successor(arc[1], arc[0]) # end # ~~~ fun outgoing_arcs(u: V): Collection[Array[V]] @@ -383,6 +383,15 @@ interface Digraph[V: Object] return s end + # Open Graphviz with `self.to_dot`. + # + # Mainly used for debugging. + fun show_dot do + var f = new ProcessWriter("dot", "-Txlib") + f.write to_dot + f.close + end + ## ------------ ## ## Neighborhood ## ## ------------ ## diff --git a/lib/graphs/pagerank.nit b/lib/graphs/pagerank.nit new file mode 100644 index 0000000..5177e5a --- /dev/null +++ b/lib/graphs/pagerank.nit @@ -0,0 +1,88 @@ +# This file is part of NIT (http://www.nitlanguage.org). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Add PageRank computation for vertices in Digraph. +module pagerank + +import graphs::digraph + +redef class Digraph[V] + + # Compute PageRank for each vertex + # + # Details of the algorithm can be found in: + # > L. Page, S. Brin, R. Motwani, and T.Winograd. + # > **The pagerank citation ranking: Bringing order to the web.** + # > *Technical report, Stanford Digital Library Technologies Project, 1998* + # + # Example: + # ~~~ + # var g = new HashDigraph[String] + # g.add_arc("A", "B") + # g.add_arc("A", "C") + # g.add_arc("B", "C") + # g.add_arc("C", "A") + # g.add_arc("D", "C") + # + # assert g.pagerank.join(", ", ":") == "A:1.488, B:0.782, C:1.575, D:0.15" + # ~~~ + fun pagerank: PRMap[V] do + # `d` constant such as `initial_pagerank(node) == (1 - d) != 0` + var d = 0.85 # commonly-choosen value + # Init each node page rank with an initial_value + var values = new PRMap[V] + var vertices = self.vertices + for v in vertices do values[v] = 1.0 - d + # Compute page rank until convergence + var prev = new PRMap[V] + while not values.is_approx(prev, 0.001) do + prev = new PRMap[V].from(values) + for v in vertices do + var in_pr = 0.0 + for o in predecessors(v) do + in_pr += values[o] / out_degree(o).to_f + end + values[v] = (1.0 - d) + d * in_pr + end + end + return values + end +end + +# Map each Vertice of a Digraph to it's PageRank. +# +# See: `Digraph::pagerank`. +class PRMap[V] + super HashMap[V, Float] + + # Init `self` by copying `other` values. + init from(other: PRMap[V]) do + init + for k, v in other do self[k] = v + end + + # Is `self` approximately equal to another PRMap? + # + # `self` is approximately equal to `o` if `o` contains all the key from `self` + # with the same values. + # + # Values equality is based on `Float::is_approx` with `precision`. + fun is_approx(o: SELF, precision: Float): Bool do + for k1, v1 in self do + if not o.has_key(k1) then return false + if not v1.is_approx(o[k1], precision) then return false + end + return true + end +end