78700d0ecce310831a920ff79d577d8af4270203
[nit.git] / src / catalog.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Basic catalog generator for Nit packages
16 #
17 # See: <http://nitlanguage.org/catalog/>
18 #
19 # The tool scans packages and generates the HTML files of a catalog.
20 #
21 # ## Features
22 #
23 # * [X] scan packages and their `.ini`
24 # * [X] generate lists of packages
25 # * [X] generate a page per package with the readme and most metadata
26 # * [ ] link/include/be included in the documentation
27 # * [ ] propose `related packages`
28 # * [X] show directory content (a la nitls)
29 # * [X] gather git information from the working directory
30 # * [ ] gather git information from the repository
31 # * [ ] gather package information from github
32 # * [ ] gather people information from github
33 # * [X] reify people
34 # * [X] separate information gathering from rendering
35 # * [ ] move up information gathering in (existing or new) service modules
36 # * [X] add command line options
37 # * [ ] harden HTML (escaping, path injection, etc)
38 # * [ ] nitcorn server with RESTful API
39 #
40 # ## Issues and limitations
41 #
42 # The tool works likee the other tools and expects to find valid Nit source code in the directories
43 #
44 # * cruft and temporary files will be collected
45 # * missing source file (e.g. not yet generated by nitcc) will make information
46 # incomplete (e.g. invalid module thus partial dependency and metrics)
47 #
48 # How to use the tool as the basis of a Nit code archive on the web usable with a package manager is not clear.
49 module catalog
50
51 import md5 # To get gravatar images
52 import counter # For statistics
53 import modelize # To process and count classes and methods
54
55 redef class MPackage
56
57 # Metadata related to this package
58 var metadata = new MPackageMetadata(self)
59 end
60
61 # The metadata extracted from a MPackage
62 class MPackageMetadata
63
64 # The mpacakge this metadata belongs to
65 var mpackage: MPackage
66
67 # Return the associated metadata from the `ini`, if any
68 fun metadata(key: String): nullable String do
69 var ini = mpackage.ini
70 if ini == null then return null
71 return ini[key]
72 end
73
74 # The consolidated list of tags
75 var tags: Array[String] is lazy do
76 var tags = new Array[String]
77 var string = metadata("package.tags")
78 if string == null then return tags
79 for tag in string.split(",") do
80 tag = tag.trim
81 if tag.is_empty then continue
82 tags.add tag
83 end
84 if tryit != null then tags.add "tryit"
85 if apk != null then tags.add "apk"
86 if tags.is_empty then tags.add "none"
87 return tags
88 end
89
90 # The list of all maintainers
91 var maintainers = new Array[Person]
92
93 # The list of contributors
94 var contributors = new Array[Person]
95
96 # The date of the most recent commit
97 var last_date: nullable String = null
98
99 # The date of the oldest commit
100 var first_date: nullable String = null
101
102 # Key: package.maintainer`
103 var maintainer: nullable String is lazy do return metadata("package.maintainer")
104
105 # Key: `package.more_contributors`
106 var more_contributors: Array[String] is lazy do
107 var res = new Array[String]
108 var string = metadata("package.more_contributors")
109 if string == null then return res
110 for c in string.split(",") do
111 c = c.trim
112 if c.is_empty then continue
113 res.add c
114 end
115 return res
116 end
117
118 # Key: `package.license`
119 var license: nullable String is lazy do return metadata("package.license")
120
121 # Key: `upstream.tryit`
122 var tryit: nullable String is lazy do return metadata("upstream.tryit")
123
124 # Key: `upstream.apk`
125 var apk: nullable String is lazy do return metadata("upstream.apk")
126
127 # Key: `upstream.homepage`
128 var homepage: nullable String is lazy do return metadata("upstream.homepage")
129
130 # Key: `upstream.browse`
131 var browse: nullable String is lazy do return metadata("upstream.browse")
132
133 # Package git clone address
134 var git: nullable String is lazy do return metadata("upstream.git")
135
136 # Package issue tracker
137 var issues: nullable String is lazy do return metadata("upstream.issues")
138 end
139
140 redef class Int
141 # Returns `log(self+1)`. Used to compute score of packages
142 fun score: Float do return (self+1).to_f.log
143 end
144
145 # A contributor/author/etc.
146 #
147 # It comes from git or the metadata
148 #
149 # TODO get more things from github by using the email as a key
150 # "https://api.github.com/search/users?q={email}+in:email"
151 class Person
152 # The name. Eg "John Doe"
153 var name: String is writable
154
155 # The email, Eg "john.doe@example.com"
156 var email: nullable String is writable
157
158 # Some homepage. Eg "http://example.com/~jdoe"
159 var page: nullable String is writable
160
161 # Return a full-featured link to a person
162 fun to_html: String
163 do
164 var res = ""
165 var e = name.html_escape
166 var page = self.page
167 if page != null then
168 res += "<a href=\"{page.html_escape}\">"
169 end
170 var email = self.email
171 if email != null then
172 var md5 = email.md5.to_lower
173 res += "<img src=\"https://secure.gravatar.com/avatar/{md5}?size=20&amp;default=retro\">&nbsp;"
174 end
175 res += e
176 if page != null then res += "</a>"
177 return res
178 end
179
180 # The standard representation of a person.
181 #
182 # ~~~
183 # var jd = new Person("John Doe", "john.doe@example.com", "http://example.com/~jdoe")
184 # assert jd.to_s == "John Doe <john.doe@example.com> (http://example.com/~jdoe)"
185 # ~~~
186 #
187 # It can be used as the input of `parse`.
188 #
189 # ~~~
190 # var jd2 = new Person.parse(jd.to_s)
191 # assert jd2.to_s == jd.to_s
192 # ~~~
193 redef fun to_s
194 do
195 var res = name
196 var email = self.email
197 if email != null then res += " <{email}>"
198 var page = self.page
199 if page != null then res += " ({page})"
200 return res
201 end
202
203 # Crete a new person from its standard textual representation.
204 #
205 # ~~~
206 # var jd = new Person.parse("John Doe <john.doe@example.com> (http://example.com/~jdoe)")
207 # assert jd.name == "John Doe"
208 # assert jd.email == "john.doe@example.com"
209 # assert jd.page == "http://example.com/~jdoe"
210 # ~~~
211 #
212 # Emails and page are optional.
213 #
214 # ~~~
215 # var jd2 = new Person.parse("John Doe")
216 # assert jd2.name == "John Doe"
217 # assert jd2.email == null
218 # assert jd2.page == null
219 # ~~~
220 init parse(person: String)
221 do
222 var name = person
223 var email = null
224 var page = null
225 # Regular expressions are broken, need to investigate.
226 # So split manually.
227 #
228 #var re = "([^<(]*?)(<([^>]*?)>)?(\\((.*)\\))?".to_re
229 #var m = (person+" ").search(re)
230 #print "{person}: `{m or else "?"}` `{m[1] or else "?"}` `{m[3] or else "?"}` `{m[5] or else "?"}`"
231 do
232 var sp1 = person.split_once_on("<")
233 if sp1.length < 2 then
234 break
235 end
236 var sp2 = sp1.last.split_once_on(">")
237 if sp2.length < 2 then
238 break
239 end
240 name = sp1.first.trim
241 email = sp2.first.trim
242 var sp3 = sp2.last.split_once_on("(")
243 if sp3.length < 2 then
244 break
245 end
246 var sp4 = sp3.last.split_once_on(")")
247 if sp4.length < 2 then
248 break
249 end
250 page = sp4.first.trim
251 end
252
253 init(name, email, page)
254 end
255 end
256
257
258 # The main class of the calatog generator that has the knowledge
259 class Catalog
260
261 # The modelbuilder
262 # used to access the files and count source lines of code
263 var modelbuilder: ModelBuilder
264
265 # Packages by tag
266 var tag2proj = new MultiHashMap[String, MPackage]
267
268 # Packages by category
269 var cat2proj = new MultiHashMap[String, MPackage]
270
271 # Packages by maintainer
272 var maint2proj = new MultiHashMap[Person, MPackage]
273
274 # Packages by contributors
275 var contrib2proj = new MultiHashMap[Person, MPackage]
276
277 # Dependency between packages
278 var deps = new POSet[MPackage]
279
280 # Number of modules by package
281 var mmodules = new Counter[MPackage]
282
283 # Number of classes by package
284 var mclasses = new Counter[MPackage]
285
286 # Number of methods by package
287 var mmethods = new Counter[MPackage]
288
289 # Number of line of code by package
290 var loc = new Counter[MPackage]
291
292 # Number of errors
293 var errors = new Counter[MPackage]
294
295 # Number of warnings and advices
296 var warnings = new Counter[MPackage]
297
298 # Number of warnings per 1000 lines of code (w/kloc)
299 var warnings_per_kloc = new Counter[MPackage]
300
301 # Documentation score (between 0 and 100)
302 var documentation_score = new Counter[MPackage]
303
304 # Number of commits by package
305 var commits = new Counter[MPackage]
306
307 # Score by package
308 #
309 # The score is loosely computed using other metrics
310 var score = new Counter[MPackage]
311
312 # List of known people
313 var persons = new HashMap[String, Person]
314
315 # Scan, register and add a contributor to a package
316 fun register_contrib(person: String, mpackage: MPackage): Person
317 do
318 var p = persons.get_or_null(person)
319 if p == null then
320 var new_p = new Person.parse(person)
321 # Maybe, we already have this person in fact?
322 p = persons.get_or_null(new_p.to_s)
323 if p == null then
324 p = new_p
325 persons[p.to_s] = p
326 end
327 end
328 var projs = contrib2proj[p]
329 if not projs.has(mpackage) then
330 projs.add mpackage
331 mpackage.metadata.contributors.add p
332 end
333 return p
334 end
335
336 # Compute information for a package
337 fun package_page(mpackage: MPackage)
338 do
339 var score = score[mpackage].to_f
340
341 var mdoc = mpackage.mdoc_or_fallback
342 if mdoc != null then
343 score += 100.0
344 score += mdoc.content.length.score
345 end
346
347 var metadata = mpackage.metadata
348
349 var tryit = metadata.tryit
350 if tryit != null then
351 score += 1.0
352 end
353 var apk = metadata.apk
354 if apk != null then
355 score += 1.0
356 end
357 var homepage = metadata.homepage
358 if homepage != null then
359 score += 5.0
360 end
361 var maintainer = metadata.maintainer
362 if maintainer != null then
363 score += 5.0
364 var person = register_contrib(maintainer, mpackage)
365 mpackage.metadata.maintainers.add person
366 var projs = maint2proj[person]
367 if not projs.has(mpackage) then projs.add mpackage
368 end
369 var license = metadata.license
370 if license != null then
371 score += 5.0
372 end
373 var browse = metadata.browse
374 if browse != null then
375 score += 5.0
376 end
377 var tags = metadata.tags
378 for tag in tags do
379 tag2proj[tag].add mpackage
380 end
381 if tags.not_empty then
382 var cat = tags.first
383 cat2proj[cat].add mpackage
384 score += tags.length.score
385 end
386 if deps.has(mpackage) then
387 score += deps[mpackage].greaters.length.score
388 score += deps[mpackage].direct_greaters.length.score
389 score += deps[mpackage].smallers.length.score
390 score += deps[mpackage].direct_smallers.length.score
391 end
392
393 var contributors = mpackage.metadata.contributors
394 var more_contributors = metadata.more_contributors
395 for c in more_contributors do
396 register_contrib(c, mpackage)
397 end
398 score += contributors.length.to_f
399 var mmodules = 0
400 var mclasses = 0
401 var mmethods = 0
402 var loc = 0
403 var errors = 0
404 var warnings = 0
405 # The documentation value of each entity is ad hoc.
406 var entity_score = 0.0
407 var doc_score = 0.0
408 for g in mpackage.mgroups do
409 mmodules += g.mmodules.length
410 var gs = 1.0
411 entity_score += gs
412 if g.mdoc != null then doc_score += gs
413 for m in g.mmodules do
414 var source = m.location.file
415 if source != null then
416 for msg in source.messages do
417 if msg.level == 2 then
418 errors += 1
419 else
420 warnings += 1
421 end
422 end
423 end
424 var am = modelbuilder.mmodule2node(m)
425 if am != null then
426 var file = am.location.file
427 if file != null then
428 loc += file.line_starts.length - 1
429 end
430 end
431 var ms = gs
432 if m.is_test then ms /= 100.0
433 entity_score += ms
434 if m.mdoc != null then doc_score += ms else ms /= 10.0
435 for cd in m.mclassdefs do
436 var cs = ms * 0.2
437 if not cd.is_intro then cs /= 100.0
438 if not cd.mclass.visibility <= private_visibility then cs /= 100.0
439 entity_score += cs
440 if cd.mdoc != null then doc_score += cs
441 mclasses += 1
442 for pd in cd.mpropdefs do
443 var ps = ms * 0.1
444 if not pd.is_intro then ps /= 100.0
445 if not pd.mproperty.visibility <= private_visibility then ps /= 100.0
446 entity_score += ps
447 if pd.mdoc != null then doc_score += ps
448 if not pd isa MMethodDef then continue
449 mmethods += 1
450 end
451 end
452 end
453 end
454 self.mmodules[mpackage] = mmodules
455 self.mclasses[mpackage] = mclasses
456 self.mmethods[mpackage] = mmethods
457 self.loc[mpackage] = loc
458 self.errors[mpackage] = errors
459 self.warnings[mpackage] = warnings
460 if loc > 0 then
461 self.warnings_per_kloc[mpackage] = warnings * 1000 / loc
462 end
463 var documentation_score = (100.0 * doc_score / entity_score).to_i
464 self.documentation_score[mpackage] = documentation_score
465 #score += mmodules.score
466 score += mclasses.score
467 score += mmethods.score
468 score += loc.score
469 score += documentation_score.score
470
471 self.score[mpackage] = score.to_i
472 end
473
474 # Collect more information on a package using the `git` tool.
475 fun git_info(mpackage: MPackage)
476 do
477 var ini = mpackage.ini
478 if ini == null then return
479
480 var root = mpackage.root
481 if root == null then return
482
483 # TODO use real git info
484 #var repo = ini.get_or_null("upstream.git")
485 #var branch = ini.get_or_null("upstream.git.branch")
486 #var directory = ini.get_or_null("upstream.git.directory")
487
488 var dirpath = root.filepath
489 if dirpath == null then return
490
491 # Collect commits info
492 var res = git_run("log", "--no-merges", "--follow", "--pretty=tformat:%ad;%aN <%aE>", "--", dirpath)
493 var contributors = new Counter[String]
494 var commits = res.split("\n")
495 if commits.not_empty and commits.last == "" then commits.pop
496 self.commits[mpackage] = commits.length
497 for l in commits do
498 var s = l.split_once_on(';')
499 if s.length != 2 or s.last == "" then continue
500
501 # Collect date of last and first commit
502 if mpackage.metadata.last_date == null then mpackage.metadata.last_date = s.first
503 mpackage.metadata.first_date = s.first
504
505 # Count contributors
506 contributors.inc(s.last)
507 end
508 for c in contributors.sort.reverse_iterator do
509 register_contrib(c, mpackage)
510 end
511
512 end
513 end
514
515 # Execute a git command and return the result
516 fun git_run(command: String...): String
517 do
518 # print "git {command.join(" ")}"
519 var p = new ProcessReader("git", command...)
520 var res = p.read_all
521 p.close
522 p.wait
523 return res
524 end