Merge: lib/config: fix doc
[nit.git] / src / catalog.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Basic catalog generator for Nit packages
16 #
17 # See: <http://nitlanguage.org/catalog/>
18 #
19 # The tool scans packages and generates the HTML files of a catalog.
20 #
21 # ## Features
22 #
23 # * [X] scan packages and their `.ini`
24 # * [X] generate lists of packages
25 # * [X] generate a page per package with the readme and most metadata
26 # * [ ] link/include/be included in the documentation
27 # * [ ] propose `related packages`
28 # * [X] show directory content (a la nitls)
29 # * [X] gather git information from the working directory
30 # * [ ] gather git information from the repository
31 # * [ ] gather package information from github
32 # * [ ] gather people information from github
33 # * [X] reify people
34 # * [X] separate information gathering from rendering
35 # * [ ] move up information gathering in (existing or new) service modules
36 # * [X] add command line options
37 # * [ ] harden HTML (escaping, path injection, etc)
38 # * [ ] nitcorn server with RESTful API
39 #
40 # ## Issues and limitations
41 #
42 # The tool works likee the other tools and expects to find valid Nit source code in the directories
43 #
44 # * cruft and temporary files will be collected
45 # * missing source file (e.g. not yet generated by nitcc) will make information
46 # incomplete (e.g. invalid module thus partial dependency and metrics)
47 #
48 # How to use the tool as the basis of a Nit code archive on the web usable with a package manager is not clear.
49 module catalog
50
51 import md5 # To get gravatar images
52 import counter # For statistics
53 import modelize # To process and count classes and methods
54
55 redef class MPackage
56
57 # Metadata related to this package
58 var metadata = new MPackageMetadata(self)
59 end
60
61 # The metadata extracted from a MPackage
62 class MPackageMetadata
63
64 # The mpacakge this metadata belongs to
65 var mpackage: MPackage
66
67 # Return the associated metadata from the `ini`, if any
68 fun metadata(key: String): nullable String do
69 var ini = mpackage.ini
70 if ini == null then return null
71 return ini[key]
72 end
73
74 # The consolidated list of tags
75 var tags: Array[String] is lazy do
76 var tags = new Array[String]
77 var string = metadata("package.tags")
78 if string == null then return tags
79 for tag in string.split(",") do
80 tag = tag.trim
81 if tag.is_empty then continue
82 tags.add tag
83 end
84 if tryit != null then tags.add "tryit"
85 if apk != null then tags.add "apk"
86 if tags.is_empty then tags.add "none"
87 return tags
88 end
89
90 # The list of all maintainers
91 var maintainers = new Array[Person]
92
93 # The list of contributors
94 var contributors = new Array[Person]
95
96 # The date of the most recent commit
97 var last_date: nullable String = null
98
99 # The date of the oldest commit
100 var first_date: nullable String = null
101
102 # Key: package.maintainer`
103 var maintainer: nullable String is lazy do return metadata("package.maintainer")
104
105 # Key: `package.more_contributors`
106 var more_contributors: Array[String] is lazy do
107 var res = new Array[String]
108 var string = metadata("package.more_contributors")
109 if string == null then return res
110 for c in string.split(",") do
111 c = c.trim
112 if c.is_empty then continue
113 res.add c
114 end
115 return res
116 end
117
118 # Key: `package.license`
119 var license: nullable String is lazy do return metadata("package.license")
120
121 # Key: `upstream.tryit`
122 var tryit: nullable String is lazy do return metadata("upstream.tryit")
123
124 # Key: `upstream.apk`
125 var apk: nullable String is lazy do return metadata("upstream.apk")
126
127 # Key: `upstream.homepage`
128 var homepage: nullable String is lazy do return metadata("upstream.homepage")
129
130 # Key: `upstream.browse`
131 var browse: nullable String is lazy do return metadata("upstream.browse")
132
133 # Package git clone address
134 var git: nullable String is lazy do return metadata("upstream.git")
135
136 # Package issue tracker
137 var issues: nullable String is lazy do return metadata("upstream.issues")
138 end
139
140 redef class Int
141 # Returns `log(self+1)`. Used to compute score of packages
142 fun score: Float do return (self+1).to_f.log
143 end
144
145 # A contributor/author/etc.
146 #
147 # It comes from git or the metadata
148 #
149 # TODO get more things from github by using the email as a key
150 # "https://api.github.com/search/users?q={email}+in:email"
151 class Person
152 # The name. Eg "John Doe"
153 var name: String is writable
154
155 # The email, Eg "john.doe@example.com"
156 var email: nullable String is writable
157
158 # Some homepage. Eg "http://example.com/~jdoe"
159 var page: nullable String is writable
160
161 # Gravatar id
162 var gravatar: nullable String is lazy do
163 var email = self.email
164 if email == null then return null
165 return email.md5.to_lower
166 end
167
168 # Return a full-featured link to a person
169 fun to_html: String
170 do
171 var res = ""
172 var e = name.html_escape
173 var page = self.page
174 if page != null then
175 res += "<a href=\"{page.html_escape}\">"
176 end
177 var gravatar = self.gravatar
178 if gravatar != null then
179 res += "<img src=\"https://secure.gravatar.com/avatar/{gravatar}?size=20&amp;default=retro\">&nbsp;"
180 end
181 res += e
182 if page != null then res += "</a>"
183 return res
184 end
185
186 # The standard representation of a person.
187 #
188 # ~~~
189 # var jd = new Person("John Doe", "john.doe@example.com", "http://example.com/~jdoe")
190 # assert jd.to_s == "John Doe <john.doe@example.com> (http://example.com/~jdoe)"
191 # ~~~
192 #
193 # It can be used as the input of `parse`.
194 #
195 # ~~~
196 # var jd2 = new Person.parse(jd.to_s)
197 # assert jd2.to_s == jd.to_s
198 # ~~~
199 redef fun to_s
200 do
201 var res = name
202 var email = self.email
203 if email != null then res += " <{email}>"
204 var page = self.page
205 if page != null then res += " ({page})"
206 return res
207 end
208
209 # Crete a new person from its standard textual representation.
210 #
211 # ~~~
212 # var jd = new Person.parse("John Doe <john.doe@example.com> (http://example.com/~jdoe)")
213 # assert jd.name == "John Doe"
214 # assert jd.email == "john.doe@example.com"
215 # assert jd.page == "http://example.com/~jdoe"
216 # ~~~
217 #
218 # Emails and page are optional.
219 #
220 # ~~~
221 # var jd2 = new Person.parse("John Doe")
222 # assert jd2.name == "John Doe"
223 # assert jd2.email == null
224 # assert jd2.page == null
225 # ~~~
226 init parse(person: String)
227 do
228 var name = person
229 var email = null
230 var page = null
231 # Regular expressions are broken, need to investigate.
232 # So split manually.
233 #
234 #var re = "([^<(]*?)(<([^>]*?)>)?(\\((.*)\\))?".to_re
235 #var m = (person+" ").search(re)
236 #print "{person}: `{m or else "?"}` `{m[1] or else "?"}` `{m[3] or else "?"}` `{m[5] or else "?"}`"
237 do
238 var sp1 = person.split_once_on("<")
239 if sp1.length < 2 then
240 break
241 end
242 var sp2 = sp1.last.split_once_on(">")
243 if sp2.length < 2 then
244 break
245 end
246 name = sp1.first.trim
247 email = sp2.first.trim
248 var sp3 = sp2.last.split_once_on("(")
249 if sp3.length < 2 then
250 break
251 end
252 var sp4 = sp3.last.split_once_on(")")
253 if sp4.length < 2 then
254 break
255 end
256 page = sp4.first.trim
257 end
258
259 init(name, email, page)
260 end
261 end
262
263
264 # The main class of the calatog generator that has the knowledge
265 class Catalog
266
267 # The modelbuilder
268 # used to access the files and count source lines of code
269 var modelbuilder: ModelBuilder
270
271 # List of all packages by their names
272 var mpackages = new HashMap[String, MPackage]
273
274 # Packages by tag
275 var tag2proj = new MultiHashMap[String, MPackage]
276
277 # Packages by category
278 var cat2proj = new MultiHashMap[String, MPackage]
279
280 # Packages by maintainer
281 var maint2proj = new MultiHashMap[Person, MPackage]
282
283 # Packages by contributors
284 var contrib2proj = new MultiHashMap[Person, MPackage]
285
286 # Dependency between packages
287 var deps = new POSet[MPackage]
288
289 # Number of modules by package
290 var mmodules = new Counter[MPackage]
291
292 # Number of classes by package
293 var mclasses = new Counter[MPackage]
294
295 # Number of methods by package
296 var mmethods = new Counter[MPackage]
297
298 # Number of line of code by package
299 var loc = new Counter[MPackage]
300
301 # Number of errors
302 var errors = new Counter[MPackage]
303
304 # Number of warnings and advices
305 var warnings = new Counter[MPackage]
306
307 # Number of warnings per 1000 lines of code (w/kloc)
308 var warnings_per_kloc = new Counter[MPackage]
309
310 # Documentation score (between 0 and 100)
311 var documentation_score = new Counter[MPackage]
312
313 # Number of commits by package
314 var commits = new Counter[MPackage]
315
316 # Score by package
317 #
318 # The score is loosely computed using other metrics
319 var score = new Counter[MPackage]
320
321 # List of known people by their git string
322 var persons = new HashMap[String, Person]
323
324 # Map person short names to person objects
325 var name2person = new HashMap[String, Person]
326
327 # Package statistics cache
328 var mpackages_stats = new HashMap[MPackage, MPackageStats]
329
330 # Scan, register and add a contributor to a package
331 fun register_contrib(person: String, mpackage: MPackage): Person
332 do
333 var p = persons.get_or_null(person)
334 if p == null then
335 var new_p = new Person.parse(person)
336 # Maybe, we already have this person in fact?
337 p = persons.get_or_null(new_p.to_s)
338 if p == null then
339 p = new_p
340 persons[p.to_s] = p
341 end
342 end
343 var projs = contrib2proj[p]
344 if not projs.has(mpackage) then
345 projs.add mpackage
346 mpackage.metadata.contributors.add p
347 end
348 name2person[p.name] = p
349 return p
350 end
351
352 # Compute information for a package
353 fun package_page(mpackage: MPackage)
354 do
355 mpackages[mpackage.full_name] = mpackage
356
357 var score = score[mpackage].to_f
358
359 var mdoc = mpackage.mdoc_or_fallback
360 if mdoc != null then
361 score += 100.0
362 score += mdoc.content.length.score
363 end
364 var metadata = mpackage.metadata
365
366 var tryit = metadata.tryit
367 if tryit != null then
368 score += 1.0
369 end
370 var apk = metadata.apk
371 if apk != null then
372 score += 1.0
373 end
374 var homepage = metadata.homepage
375 if homepage != null then
376 score += 5.0
377 end
378 var maintainer = metadata.maintainer
379 if maintainer != null then
380 score += 5.0
381 var person = register_contrib(maintainer, mpackage)
382 mpackage.metadata.maintainers.add person
383 var projs = maint2proj[person]
384 if not projs.has(mpackage) then projs.add mpackage
385 end
386 var license = metadata.license
387 if license != null then
388 score += 5.0
389 end
390 var browse = metadata.browse
391 if browse != null then
392 score += 5.0
393 end
394 var tags = metadata.tags
395 for tag in tags do
396 tag2proj[tag].add mpackage
397 end
398 if tags.not_empty then
399 var cat = tags.first
400 cat2proj[cat].add mpackage
401 score += tags.length.score
402 end
403 if deps.has(mpackage) then
404 score += deps[mpackage].greaters.length.score
405 score += deps[mpackage].direct_greaters.length.score
406 score += deps[mpackage].smallers.length.score
407 score += deps[mpackage].direct_smallers.length.score
408 end
409
410 var contributors = mpackage.metadata.contributors
411 var more_contributors = metadata.more_contributors
412 for c in more_contributors do
413 register_contrib(c, mpackage)
414 end
415 score += contributors.length.to_f
416 var mmodules = 0
417 var mclasses = 0
418 var mmethods = 0
419 var loc = 0
420 var errors = 0
421 var warnings = 0
422 # The documentation value of each entity is ad hoc.
423 var entity_score = 0.0
424 var doc_score = 0.0
425 for g in mpackage.mgroups do
426 mmodules += g.mmodules.length
427 var gs = 1.0
428 entity_score += gs
429 if g.mdoc != null then doc_score += gs
430 for m in g.mmodules do
431 var source = m.location.file
432 if source != null then
433 for msg in source.messages do
434 if msg.level == 2 then
435 errors += 1
436 else
437 warnings += 1
438 end
439 end
440 end
441 var am = modelbuilder.mmodule2node(m)
442 if am != null then
443 var file = am.location.file
444 if file != null then
445 loc += file.line_starts.length - 1
446 end
447 end
448 var ms = gs
449 if m.is_test then ms /= 100.0
450 entity_score += ms
451 if m.mdoc != null then doc_score += ms else ms /= 10.0
452 for cd in m.mclassdefs do
453 var cs = ms * 0.2
454 if not cd.is_intro then cs /= 100.0
455 if not cd.mclass.visibility <= private_visibility then cs /= 100.0
456 entity_score += cs
457 if cd.mdoc != null then doc_score += cs
458 mclasses += 1
459 for pd in cd.mpropdefs do
460 var ps = ms * 0.1
461 if not pd.is_intro then ps /= 100.0
462 if not pd.mproperty.visibility <= private_visibility then ps /= 100.0
463 entity_score += ps
464 if pd.mdoc != null then doc_score += ps
465 if not pd isa MMethodDef then continue
466 mmethods += 1
467 end
468 end
469 end
470 end
471 self.mmodules[mpackage] = mmodules
472 self.mclasses[mpackage] = mclasses
473 self.mmethods[mpackage] = mmethods
474 self.loc[mpackage] = loc
475 self.errors[mpackage] = errors
476 self.warnings[mpackage] = warnings
477 if loc > 0 then
478 self.warnings_per_kloc[mpackage] = warnings * 1000 / loc
479 end
480 var documentation_score = (100.0 * doc_score / entity_score).to_i
481 self.documentation_score[mpackage] = documentation_score
482 #score += mmodules.score
483 score += mclasses.score
484 score += mmethods.score
485 score += loc.score
486 score += documentation_score.score
487
488 self.score[mpackage] = score.to_i
489 end
490
491 # Collect more information on a package using the `git` tool.
492 fun git_info(mpackage: MPackage)
493 do
494 var ini = mpackage.ini
495 if ini == null then return
496
497 var root = mpackage.root
498 if root == null then return
499
500 # TODO use real git info
501 #var repo = ini.get_or_null("upstream.git")
502 #var branch = ini.get_or_null("upstream.git.branch")
503 #var directory = ini.get_or_null("upstream.git.directory")
504
505 var dirpath = root.filepath
506 if dirpath == null then return
507
508 # Collect commits info
509 var res = git_run("log", "--no-merges", "--follow", "--pretty=tformat:%ad;%aN <%aE>", "--", dirpath)
510 var contributors = new Counter[String]
511 var commits = res.split("\n")
512 if commits.not_empty and commits.last == "" then commits.pop
513 self.commits[mpackage] = commits.length
514 for l in commits do
515 var s = l.split_once_on(';')
516 if s.length != 2 or s.last == "" then continue
517
518 # Collect date of last and first commit
519 if mpackage.metadata.last_date == null then mpackage.metadata.last_date = s.first
520 mpackage.metadata.first_date = s.first
521
522 # Count contributors
523 contributors.inc(s.last)
524 end
525 for c in contributors.sort.reverse_iterator do
526 register_contrib(c, mpackage)
527 end
528 end
529
530 # Compose package stats
531 fun mpackage_stats(mpackage: MPackage): MPackageStats do
532 var stats = new MPackageStats
533 stats.mmodules = mmodules[mpackage]
534 stats.mclasses = mclasses[mpackage]
535 stats.mmethods = mmethods[mpackage]
536 stats.loc = loc[mpackage]
537 stats.errors = errors[mpackage]
538 stats.warnings = warnings[mpackage]
539 stats.warnings_per_kloc = warnings_per_kloc[mpackage]
540 stats.documentation_score = documentation_score[mpackage]
541 stats.commits = commits[mpackage]
542 stats.score = score[mpackage]
543
544 mpackages_stats[mpackage] = stats
545 return stats
546 end
547
548 # Compose catalog stats
549 var catalog_stats: CatalogStats is lazy do
550 var stats = new CatalogStats
551 stats.packages = mpackages.length
552 stats.maintainers = maint2proj.length
553 stats.contributors = contrib2proj.length
554 stats.tags = tag2proj.length
555 stats.modules = mmodules.sum
556 stats.classes = mclasses.sum
557 stats.methods = mmethods.sum
558 stats.loc = loc.sum
559 return stats
560 end
561 end
562
563 # Catalog statistics
564 class CatalogStats
565
566 # Number of packages
567 var packages = 0
568
569 # Number of maintainers
570 var maintainers = 0
571
572 # Number of contributors
573 var contributors = 0
574
575 # Number of tags
576 var tags = 0
577
578 # Number of modules
579 var modules = 0
580
581 # Number of classes
582 var classes = 0
583
584 # Number of methods
585 var methods = 0
586
587 # Number of line of codes
588 var loc = 0
589 end
590
591 # MPackage statistics for the catalog
592 class MPackageStats
593
594 # Number of modules
595 var mmodules = 0
596
597 # Number of classes
598 var mclasses = 0
599
600 # Number of methods
601 var mmethods = 0
602
603 # Number of lines of code
604 var loc = 0
605
606 # Number of errors
607 var errors = 0
608
609 # Number of warnings and advices
610 var warnings = 0
611
612 # Number of warnings per 1000 lines of code (w/kloc)
613 var warnings_per_kloc = 0
614
615 # Documentation score (between 0 and 100)
616 var documentation_score = 0
617
618 # Number of commits by package
619 var commits = 0
620
621 # Score by package
622 #
623 # The score is loosely computed using other metrics
624 var score = 0
625 end
626
627 # Sort the mpackages by their score
628 class CatalogScoreSorter
629 super Comparator
630
631 # Catalog used to access scores
632 var catalog: Catalog
633
634 redef type COMPARED: MPackage
635
636 redef fun compare(a, b) do
637 if not catalog.mpackages_stats.has_key(a) then return 1
638 if not catalog.mpackages_stats.has_key(b) then return -1
639 var astats = catalog.mpackages_stats[a]
640 var bstats = catalog.mpackages_stats[b]
641 return bstats.score <=> astats.score
642 end
643 end
644
645 # Sort tabs alphabetically
646 class CatalogTagsSorter
647 super Comparator
648
649 redef type COMPARED: String
650
651 redef fun compare(a, b) do return a <=> b
652 end
653
654 # Execute a git command and return the result
655 fun git_run(command: String...): String
656 do
657 # print "git {command.join(" ")}"
658 var p = new ProcessReader("git", command...)
659 var res = p.read_all
660 p.close
661 p.wait
662 return res
663 end