catalog: Update to use the package graph
[nit.git] / src / catalog / catalog.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Basic catalog generator for Nit packages
16 #
17 # See: <http://nitlanguage.org/catalog/>
18 #
19 # The tool scans packages and generates the HTML files of a catalog.
20 #
21 # ## Features
22 #
23 # * [X] scan packages and their `.ini`
24 # * [X] generate lists of packages
25 # * [X] generate a page per package with the readme and most metadata
26 # * [ ] link/include/be included in the documentation
27 # * [ ] propose `related packages`
28 # * [X] show directory content (a la nitls)
29 # * [X] gather git information from the working directory
30 # * [ ] gather git information from the repository
31 # * [ ] gather package information from github
32 # * [ ] gather people information from github
33 # * [X] reify people
34 # * [X] separate information gathering from rendering
35 # * [ ] move up information gathering in (existing or new) service modules
36 # * [X] add command line options
37 # * [ ] harden HTML (escaping, path injection, etc)
38 # * [ ] nitcorn server with RESTful API
39 #
40 # ## Issues and limitations
41 #
42 # The tool works likee the other tools and expects to find valid Nit source code in the directories
43 #
44 # * cruft and temporary files will be collected
45 # * missing source file (e.g. not yet generated by nitcc) will make information
46 # incomplete (e.g. invalid module thus partial dependency and metrics)
47 #
48 # How to use the tool as the basis of a Nit code archive on the web usable with a package manager is not clear.
49 module catalog
50
51 import md5 # To get gravatar images
52 import counter # For statistics
53 import modelize # To process and count classes and methods
54
55 redef class MPackage
56
57 # Metadata related to this package
58 var metadata = new MPackageMetadata(self)
59 end
60
61 # The metadata extracted from a MPackage
62 class MPackageMetadata
63
64 # The mpacakge this metadata belongs to
65 var mpackage: MPackage
66
67 # Return the associated metadata from the `ini`, if any
68 fun metadata(key: String): nullable String do
69 var ini = mpackage.ini
70 if ini == null then return null
71 return ini[key]
72 end
73
74 # The consolidated list of tags
75 var tags: Array[String] is lazy do
76 var tags = new Array[String]
77 var string = metadata("package.tags")
78 if string == null then return tags
79 for tag in string.split(",") do
80 tag = tag.trim
81 if tag.is_empty then continue
82 tags.add tag
83 end
84 if tryit != null then tags.add "tryit"
85 if apk != null then tags.add "apk"
86 if tags.is_empty then tags.add "none"
87 return tags
88 end
89
90 # The list of all maintainers
91 var maintainers = new Array[Person]
92
93 # The list of contributors
94 var contributors = new Array[Person]
95
96 # The date of the most recent commit
97 var last_date: nullable String = null
98
99 # The date of the oldest commit
100 var first_date: nullable String = null
101
102 # Key: package.maintainer`
103 var maintainer: nullable String is lazy do return metadata("package.maintainer")
104
105 # Key: `package.more_contributors`
106 var more_contributors: Array[String] is lazy do
107 var res = new Array[String]
108 var string = metadata("package.more_contributors")
109 if string == null then return res
110 for c in string.split(",") do
111 c = c.trim
112 if c.is_empty then continue
113 res.add c
114 end
115 return res
116 end
117
118 # Key: `package.license`
119 var license: nullable String is lazy do return metadata("package.license")
120
121 # Key: `upstream.tryit`
122 var tryit: nullable String is lazy do return metadata("upstream.tryit")
123
124 # Key: `upstream.apk`
125 var apk: nullable String is lazy do return metadata("upstream.apk")
126
127 # Key: `upstream.homepage`
128 var homepage: nullable String is lazy do return metadata("upstream.homepage")
129
130 # Key: `upstream.browse`
131 var browse: nullable String is lazy do return metadata("upstream.browse")
132
133 # Package git clone address
134 var git: nullable String is lazy do return metadata("upstream.git")
135
136 # Package issue tracker
137 var issues: nullable String is lazy do return metadata("upstream.issues")
138 end
139
140 redef class Int
141 # Returns `log(self+1)`. Used to compute score of packages
142 fun score: Float do return (self+1).to_f.log
143 end
144
145 # A contributor/author/etc.
146 #
147 # It comes from git or the metadata
148 #
149 # TODO get more things from github by using the email as a key
150 # "https://api.github.com/search/users?q={email}+in:email"
151 class Person
152 # The name. Eg "John Doe"
153 var name: String is writable
154
155 # The email, Eg "john.doe@example.com"
156 var email: nullable String is writable
157
158 # Some homepage. Eg "http://example.com/~jdoe"
159 var page: nullable String is writable
160
161 # Gravatar id
162 var gravatar: nullable String is lazy do
163 var email = self.email
164 if email == null then return null
165 return email.md5.to_lower
166 end
167
168 # The standard representation of a person.
169 #
170 # ~~~
171 # var jd = new Person("John Doe", "john.doe@example.com", "http://example.com/~jdoe")
172 # assert jd.to_s == "John Doe <john.doe@example.com> (http://example.com/~jdoe)"
173 # ~~~
174 #
175 # It can be used as the input of `parse`.
176 #
177 # ~~~
178 # var jd2 = new Person.parse(jd.to_s)
179 # assert jd2.to_s == jd.to_s
180 # ~~~
181 redef fun to_s
182 do
183 var res = name
184 var email = self.email
185 if email != null then res += " <{email}>"
186 var page = self.page
187 if page != null then res += " ({page})"
188 return res
189 end
190
191 # Crete a new person from its standard textual representation.
192 #
193 # ~~~
194 # var jd = new Person.parse("John Doe <john.doe@example.com> (http://example.com/~jdoe)")
195 # assert jd.name == "John Doe"
196 # assert jd.email == "john.doe@example.com"
197 # assert jd.page == "http://example.com/~jdoe"
198 # ~~~
199 #
200 # Emails and page are optional.
201 #
202 # ~~~
203 # var jd2 = new Person.parse("John Doe")
204 # assert jd2.name == "John Doe"
205 # assert jd2.email == null
206 # assert jd2.page == null
207 # ~~~
208 init parse(person: String)
209 do
210 var name = person
211 var email = null
212 var page = null
213 # Regular expressions are broken, need to investigate.
214 # So split manually.
215 #
216 #var re = "([^<(]*?)(<([^>]*?)>)?(\\((.*)\\))?".to_re
217 #var m = (person+" ").search(re)
218 #print "{person}: `{m or else "?"}` `{m[1] or else "?"}` `{m[3] or else "?"}` `{m[5] or else "?"}`"
219 do
220 var sp1 = person.split_once_on("<")
221 if sp1.length < 2 then
222 break
223 end
224 var sp2 = sp1.last.split_once_on(">")
225 if sp2.length < 2 then
226 break
227 end
228 name = sp1.first.trim
229 email = sp2.first.trim
230 var sp3 = sp2.last.split_once_on("(")
231 if sp3.length < 2 then
232 break
233 end
234 var sp4 = sp3.last.split_once_on(")")
235 if sp4.length < 2 then
236 break
237 end
238 page = sp4.first.trim
239 end
240
241 init(name, email, page)
242 end
243 end
244
245
246 # The main class of the calatog generator that has the knowledge
247 class Catalog
248
249 # The modelbuilder
250 # used to access the files and count source lines of code
251 var modelbuilder: ModelBuilder
252
253 # List of all packages by their names
254 var mpackages = new HashMap[String, MPackage]
255
256 # Packages by tag
257 var tag2proj = new MultiHashMap[String, MPackage]
258
259 # Packages by category
260 var cat2proj = new MultiHashMap[String, MPackage]
261
262 # Packages by maintainer
263 var maint2proj = new MultiHashMap[Person, MPackage]
264
265 # Packages by contributors
266 var contrib2proj = new MultiHashMap[Person, MPackage]
267
268 # Dependency between packages
269 fun deps: HashDigraph[MPackage] do return modelbuilder.model.mpackage_importation_graph
270
271 # Number of modules by package
272 var mmodules = new Counter[MPackage]
273
274 # Number of classes by package
275 var mclasses = new Counter[MPackage]
276
277 # Number of methods by package
278 var mmethods = new Counter[MPackage]
279
280 # Number of line of code by package
281 var loc = new Counter[MPackage]
282
283 # Number of errors
284 var errors = new Counter[MPackage]
285
286 # Number of warnings and advices
287 var warnings = new Counter[MPackage]
288
289 # Number of warnings per 1000 lines of code (w/kloc)
290 var warnings_per_kloc = new Counter[MPackage]
291
292 # Documentation score (between 0 and 100)
293 var documentation_score = new Counter[MPackage]
294
295 # Number of commits by package
296 var commits = new Counter[MPackage]
297
298 # Score by package
299 #
300 # The score is loosely computed using other metrics
301 var score = new Counter[MPackage]
302
303 # List of known people by their git string
304 var persons = new HashMap[String, Person]
305
306 # Map person short names to person objects
307 var name2person = new HashMap[String, Person]
308
309 # Package statistics cache
310 var mpackages_stats = new HashMap[MPackage, MPackageStats]
311
312 # Scan, register and add a contributor to a package
313 fun register_contrib(person: String, mpackage: MPackage): Person
314 do
315 var p = persons.get_or_null(person)
316 if p == null then
317 var new_p = new Person.parse(person)
318 # Maybe, we already have this person in fact?
319 p = persons.get_or_null(new_p.to_s)
320 if p == null then
321 p = new_p
322 persons[p.to_s] = p
323 end
324 end
325 var projs = contrib2proj[p]
326 if not projs.has(mpackage) then
327 projs.add mpackage
328 mpackage.metadata.contributors.add p
329 end
330 name2person[p.name] = p
331 return p
332 end
333
334 # Compute information for a package
335 fun package_page(mpackage: MPackage)
336 do
337 mpackages[mpackage.full_name] = mpackage
338
339 var score = score[mpackage].to_f
340
341 var mdoc = mpackage.mdoc_or_fallback
342 if mdoc != null then
343 score += 100.0
344 score += mdoc.content.length.score
345 end
346 var metadata = mpackage.metadata
347
348 var tryit = metadata.tryit
349 if tryit != null then
350 score += 1.0
351 end
352 var apk = metadata.apk
353 if apk != null then
354 score += 1.0
355 end
356 var homepage = metadata.homepage
357 if homepage != null then
358 score += 5.0
359 end
360 var maintainer = metadata.maintainer
361 if maintainer != null then
362 score += 5.0
363 var person = register_contrib(maintainer, mpackage)
364 mpackage.metadata.maintainers.add person
365 var projs = maint2proj[person]
366 if not projs.has(mpackage) then projs.add mpackage
367 end
368 var license = metadata.license
369 if license != null then
370 score += 5.0
371 end
372 var browse = metadata.browse
373 if browse != null then
374 score += 5.0
375 end
376 var tags = metadata.tags
377 for tag in tags do
378 tag2proj[tag].add mpackage
379 end
380 if tags.not_empty then
381 var cat = tags.first
382 cat2proj[cat].add mpackage
383 score += tags.length.score
384 end
385 if deps.has_vertex(mpackage) then
386 score += deps.predecessors(mpackage).length.score
387 score += deps.get_all_predecessors(mpackage).length.score
388 score += deps.successors(mpackage).length.score
389 score += deps.get_all_successors(mpackage).length.score
390 end
391
392 var contributors = mpackage.metadata.contributors
393 var more_contributors = metadata.more_contributors
394 for c in more_contributors do
395 register_contrib(c, mpackage)
396 end
397 score += contributors.length.to_f
398 var mmodules = 0
399 var mclasses = 0
400 var mmethods = 0
401 var loc = 0
402 var errors = 0
403 var warnings = 0
404 # The documentation value of each entity is ad hoc.
405 var entity_score = 0.0
406 var doc_score = 0.0
407 for g in mpackage.mgroups do
408 mmodules += g.mmodules.length
409 var gs = 1.0
410 entity_score += gs
411 if g.mdoc != null then doc_score += gs
412 for m in g.mmodules do
413 var source = m.location.file
414 if source != null then
415 for msg in source.messages do
416 if msg.level == 2 then
417 errors += 1
418 else
419 warnings += 1
420 end
421 end
422 end
423 var am = modelbuilder.mmodule2node(m)
424 if am != null then
425 var file = am.location.file
426 if file != null then
427 loc += file.line_starts.length - 1
428 end
429 end
430 var ms = gs
431 if m.is_test then ms /= 100.0
432 entity_score += ms
433 if m.mdoc != null then doc_score += ms else ms /= 10.0
434 for cd in m.mclassdefs do
435 var cs = ms * 0.2
436 if not cd.is_intro then cs /= 100.0
437 if not cd.mclass.visibility <= private_visibility then cs /= 100.0
438 entity_score += cs
439 if cd.mdoc != null then doc_score += cs
440 mclasses += 1
441 for pd in cd.mpropdefs do
442 var ps = ms * 0.1
443 if not pd.is_intro then ps /= 100.0
444 if not pd.mproperty.visibility <= private_visibility then ps /= 100.0
445 entity_score += ps
446 if pd.mdoc != null then doc_score += ps
447 if not pd isa MMethodDef then continue
448 mmethods += 1
449 end
450 end
451 end
452 end
453 self.mmodules[mpackage] = mmodules
454 self.mclasses[mpackage] = mclasses
455 self.mmethods[mpackage] = mmethods
456 self.loc[mpackage] = loc
457 self.errors[mpackage] = errors
458 self.warnings[mpackage] = warnings
459 if loc > 0 then
460 self.warnings_per_kloc[mpackage] = warnings * 1000 / loc
461 end
462 var documentation_score = (100.0 * doc_score / entity_score).to_i
463 self.documentation_score[mpackage] = documentation_score
464 #score += mmodules.score
465 score += mclasses.score
466 score += mmethods.score
467 score += loc.score
468 score += documentation_score.score
469
470 self.score[mpackage] = score.to_i
471 end
472
473 # Collect more information on a package using the `git` tool.
474 fun git_info(mpackage: MPackage)
475 do
476 var ini = mpackage.ini
477 if ini == null then return
478
479 var root = mpackage.root
480 if root == null then return
481
482 # TODO use real git info
483 #var repo = ini.get_or_null("upstream.git")
484 #var branch = ini.get_or_null("upstream.git.branch")
485 #var directory = ini.get_or_null("upstream.git.directory")
486
487 var dirpath = root.filepath
488 if dirpath == null then return
489
490 # Collect commits info
491 var res = git_run("log", "--no-merges", "--follow", "--pretty=tformat:%ad;%aN <%aE>", "--", dirpath)
492 var contributors = new Counter[String]
493 var commits = res.split("\n")
494 if commits.not_empty and commits.last == "" then commits.pop
495 self.commits[mpackage] = commits.length
496 for l in commits do
497 var s = l.split_once_on(';')
498 if s.length != 2 or s.last == "" then continue
499
500 # Collect date of last and first commit
501 if mpackage.metadata.last_date == null then mpackage.metadata.last_date = s.first
502 mpackage.metadata.first_date = s.first
503
504 # Count contributors
505 contributors.inc(s.last)
506 end
507 for c in contributors.sort.reverse_iterator do
508 register_contrib(c, mpackage)
509 end
510 end
511
512 # Compose package stats
513 fun mpackage_stats(mpackage: MPackage): MPackageStats do
514 var stats = new MPackageStats
515 stats.mmodules = mmodules[mpackage]
516 stats.mclasses = mclasses[mpackage]
517 stats.mmethods = mmethods[mpackage]
518 stats.loc = loc[mpackage]
519 stats.errors = errors[mpackage]
520 stats.warnings = warnings[mpackage]
521 stats.warnings_per_kloc = warnings_per_kloc[mpackage]
522 stats.documentation_score = documentation_score[mpackage]
523 stats.commits = commits[mpackage]
524 stats.score = score[mpackage]
525
526 mpackages_stats[mpackage] = stats
527 return stats
528 end
529
530 # Compose catalog stats
531 var catalog_stats: CatalogStats is lazy do
532 var stats = new CatalogStats
533 stats.packages = mpackages.length
534 stats.maintainers = maint2proj.length
535 stats.contributors = contrib2proj.length
536 stats.tags = tag2proj.length
537 stats.modules = mmodules.sum
538 stats.classes = mclasses.sum
539 stats.methods = mmethods.sum
540 stats.loc = loc.sum
541 return stats
542 end
543 end
544
545 # Catalog statistics
546 class CatalogStats
547
548 # Number of packages
549 var packages = 0
550
551 # Number of maintainers
552 var maintainers = 0
553
554 # Number of contributors
555 var contributors = 0
556
557 # Number of tags
558 var tags = 0
559
560 # Number of modules
561 var modules = 0
562
563 # Number of classes
564 var classes = 0
565
566 # Number of methods
567 var methods = 0
568
569 # Number of line of codes
570 var loc = 0
571
572 # Return the stats as a Map associating each stat key to its value
573 fun to_map: Map[String, Int] do
574 var map = new HashMap[String, Int]
575 map["packages"] = packages
576 map["maintainers"] = maintainers
577 map["contributors"] = contributors
578 map["tags"] = tags
579 map["modules"] = modules
580 map["classes"] = classes
581 map["methods"] = methods
582 map["loc"] = loc
583 return map
584 end
585 end
586
587 # MPackage statistics for the catalog
588 class MPackageStats
589
590 # Number of modules
591 var mmodules = 0
592
593 # Number of classes
594 var mclasses = 0
595
596 # Number of methods
597 var mmethods = 0
598
599 # Number of lines of code
600 var loc = 0
601
602 # Number of errors
603 var errors = 0
604
605 # Number of warnings and advices
606 var warnings = 0
607
608 # Number of warnings per 1000 lines of code (w/kloc)
609 var warnings_per_kloc = 0
610
611 # Documentation score (between 0 and 100)
612 var documentation_score = 0
613
614 # Number of commits by package
615 var commits = 0
616
617 # Score by package
618 #
619 # The score is loosely computed using other metrics
620 var score = 0
621 end
622
623 # Sort the mpackages by their score
624 class CatalogScoreSorter
625 super Comparator
626
627 # Catalog used to access scores
628 var catalog: Catalog
629
630 redef type COMPARED: MPackage
631
632 redef fun compare(a, b) do
633 if not catalog.mpackages_stats.has_key(a) then return 1
634 if not catalog.mpackages_stats.has_key(b) then return -1
635 var astats = catalog.mpackages_stats[a]
636 var bstats = catalog.mpackages_stats[b]
637 return bstats.score <=> astats.score
638 end
639 end
640
641 # Sort tabs alphabetically
642 class CatalogTagsSorter
643 super Comparator
644
645 redef type COMPARED: String
646
647 redef fun compare(a, b) do return a <=> b
648 end
649
650 # Execute a git command and return the result
651 fun git_run(command: String...): String
652 do
653 # print "git {command.join(" ")}"
654 var p = new ProcessReader("git", command...)
655 var res = p.read_all
656 p.close
657 p.wait
658 return res
659 end