neo4j/graph: Add Neo4j as a storage mechanism.
[nit.git] / lib / neo4j / graph / graph.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Provides an interface for services on a Neo4j graphs.
12 module neo4j::graph::graph
13
14 import neo4j
15 import progression
16
17 # A Neo4j graph with a local identification scheme for its nodes.
18 #
19 # An identification scheme can be defined by subclassing `NeoNodeCollection`.
20 #
21 # `GraphStore` can be subclassed to add ways to save or load a graph. The
22 # storing mechanisms may use `nodes.id_of` to identify the nodes in the graph
23 # while encoding the relationships.
24 class NeoGraph
25 # All the nodes in the graph.
26 var nodes: NeoNodeCollection
27
28 # All the relationships in the graph.
29 var edges: SimpleCollection[NeoEdge] = new Array[NeoEdge]
30
31 # Add a new node to the graph and return it.
32 #
33 # Set the local ID of the node before returning it.
34 #
35 # SEE: `NeoNodeCollection.add`
36 # SEE: `NeoNodeCollection.create_node`
37 # SEE: `NeoNodeCollection.register`
38 fun create_node: NeoNode do return nodes.create_node
39 end
40
41 # All the nodes in a `NeoGraph`.
42 #
43 # An identification scheme can be defined throught the `register` and `add`
44 # methods. The `id_property` attribute defines where the local ID (that is the
45 # ID managed by the collection) is stored in each node.
46 abstract class NeoNodeCollection
47 super SimpleCollection[NeoNode]
48
49 # The type of the local IDs.
50 type ID_TYPE: Jsonable
51
52 # The property of the nodes that hold the local ID.
53 var id_property: String
54
55 # Retrieve the node that has the specified local id.
56 #
57 # Note: The default implementation uses `get_or_null`.
58 fun [](id: ID_TYPE): NeoNode do
59 var n = get_or_null(id)
60 assert n isa NeoNode
61 return n
62 end
63
64 # Retrieve the node that has the specified local id, or return `null`.
65 #
66 # Note: The default implementation uses `iterator`.
67 fun get_or_null(id: ID_TYPE): nullable NeoNode do
68 for n in self do
69 if id_of(n) == id then return n
70 end
71 return null
72 end
73
74 # There is a node that has the specified local id?
75 #
76 # Note: The default implementation uses `get_or_null`.
77 fun has_id(id: ID_TYPE): Bool do return get_or_null(id) isa NeoNode
78
79 # Return the local ID of the node.
80 fun id_of(node: NeoNode): ID_TYPE do return node[id_property].as(ID_TYPE)
81
82 # Set the local ID of the specified node.
83 #
84 # Just update the property at `property_id`. Do not check anything.
85 protected fun id_of=(node: NeoNode, id: ID_TYPE) do
86 node[id_property] = id
87 end
88
89 # Enlarge the collection to have at least the specified capacity.
90 #
91 # The capacity is specified in number of nodes. Used to minimize the
92 # number of times the collection need to be resized when adding nodes
93 # in batches.
94 #
95 # Do nothing by default.
96 fun enlarge(cap: Int) do end
97
98 # Add the specified node to the graph and set its local ID.
99 #
100 # SEE: `add`
101 # SEE: `create_node`
102 fun register(node: NeoNode) is abstract
103
104 # Add the specified node to the graph assuming that its local ID is already set.
105 #
106 # SEE: `create_node`
107 # SEE: `register`
108 redef fun add(node: NeoNode) is abstract
109
110 # Add a new node to the graph and return it.
111 #
112 # Set the local ID of the node before returning it.
113 #
114 # SEE: `add`
115 # SEE: `register`
116 fun create_node: NeoNode do
117 var node = new NeoNode
118 register(node)
119 return node
120 end
121
122 # Remove the node with the specified local ID.
123 fun remove_at(id: ID_TYPE) is abstract
124
125 # Remove the specified node.
126 #
127 # The local ID is used instead of `==` to seek the node.
128 fun remove_node(node: NeoNode) do
129 remove_at(id_of(node))
130 end
131
132 redef fun clear do
133 for node in self do remove_node(node)
134 end
135
136 redef fun remove(node: NeoNode) do
137 for n in self do
138 if node == n then
139 remove_node(n)
140 return
141 end
142 end
143 end
144
145 redef fun remove_all(node: NeoNode) do
146 for n in self do
147 if node == n then remove_node(n)
148 end
149 end
150
151 # Optimize the collection, possibly by rewritting it.
152 #
153 # The local ID of the elements may be changed by this method.
154 fun compact do end
155 end
156
157 # A mean to save and load a Neo4j graph.
158 abstract class GraphStore
159 super Trackable
160
161 # The graph to save or load.
162 var graph: NeoGraph
163
164 # Can we save the graph without conflict?
165 fun isolated_save: Bool is abstract
166
167 # Load the graph (or a part of it).
168 #
169 # Do not reset the graph.
170 fun load is abstract
171
172 # Save the graph.
173 fun save do save_part(graph.nodes, graph.edges)
174
175 # Save the specified part of the graph.
176 #
177 # Assume that for each relationship specified, both ends are already saved
178 # or are specified in the same call to this method.
179 fun save_part(nodes: Collection[NeoNode],
180 edges: Collection[NeoEdge]) is abstract
181 end
182
183 # Save or load a graph using an actual Neo4j database.
184 class Neo4jGraphStore
185 super GraphStore
186
187 # The maximum number of entities saved in one request.
188 #
189 # Also defines the granulity of the reported progression.
190 #
191 # TODO Also honor this limit in `load`.
192 var batch_max_size = 512 is writable
193
194 # The Neo4j client to use.
195 var client: Neo4jClient
196
197 # The label to use to retrieve the nodes.
198 var node_label: String
199
200 private var done_part = 0
201 private var total = 0
202
203 # Is the database already contains at least one node with the specified label?
204 fun has_node_label(name: String): Bool do
205 var query = new CypherQuery.from_string(
206 "match n where \{name\} in labels(n) return count(n)")
207 query.params["name"] = name
208 var data = client.cypher(query).as(JsonObject)["data"]
209 var result = data.as(JsonArray).first.as(JsonArray).first.as(Int)
210 return result > 0
211 end
212
213 redef fun isolated_save do return not has_node_label(node_label)
214
215 redef fun load do
216 assert batch_max_size > 0
217 fire_started
218 var db_nodes = client.nodes_with_label(node_label)
219 var nodes = graph.nodes
220 var edges = graph.edges
221 var i = 0
222
223 total = nodes.length * 2
224 done_part = nodes.length
225 fire_progressed(done_part, total)
226 for node in db_nodes do
227 nodes.add(node)
228 edges.add_all(node.out_edges)
229 i += 1
230 if i >= batch_max_size then
231 done_part += batch_max_size
232 fire_progressed(done_part, total)
233 end
234 end
235 fire_done
236 end
237
238 redef fun save_part(nodes, edges) do
239 assert batch_max_size > 0
240 fire_started
241 total = nodes.length + edges.length
242 done_part = 0
243
244 save_entities(nodes)
245 save_entities(edges)
246 fire_done
247 end
248
249 # Save the specified entities.
250 private fun save_entities(neo_entities: Collection[NeoEntity]) do
251 var batch = new NeoBatch(client)
252 var batch_length = 0
253
254 for nentity in neo_entities do
255 batch.save_entity(nentity)
256 batch_length += 1
257 if batch_length >= batch_max_size then
258 do_batch(batch)
259 done_part += batch_max_size
260 fire_progressed(done_part, total)
261 batch = new NeoBatch(client)
262 batch_length = 0
263 end
264 end
265 do_batch(batch)
266 done_part += batch_length
267 end
268
269 # Execute `batch` and check for errors.
270 #
271 # Abort if `batch.execute` returns errors.
272 private fun do_batch(batch: NeoBatch) do
273 var errors = batch.execute
274 assert errors.is_empty else
275 for e in errors do sys.stderr.write("{e}\n")
276 end
277 end
278 end