catalog: add documentation score
[nit.git] / src / catalog.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Basic catalog generator for Nit packages
16 #
17 # See: <http://nitlanguage.org/catalog/>
18 #
19 # The tool scans packages and generates the HTML files of a catalog.
20 #
21 # ## Features
22 #
23 # * [X] scan packages and their `.ini`
24 # * [X] generate lists of packages
25 # * [X] generate a page per package with the readme and most metadata
26 # * [ ] link/include/be included in the documentation
27 # * [ ] propose `related packages`
28 # * [X] show directory content (a la nitls)
29 # * [X] gather git information from the working directory
30 # * [ ] gather git information from the repository
31 # * [ ] gather package information from github
32 # * [ ] gather people information from github
33 # * [X] reify people
34 # * [X] separate information gathering from rendering
35 # * [ ] move up information gathering in (existing or new) service modules
36 # * [X] add command line options
37 # * [ ] harden HTML (escaping, path injection, etc)
38 # * [ ] nitcorn server with RESTful API
39 #
40 # ## Issues and limitations
41 #
42 # The tool works likee the other tools and expects to find valid Nit source code in the directories
43 #
44 # * cruft and temporary files will be collected
45 # * missing source file (e.g. not yet generated by nitcc) will make information
46 # incomplete (e.g. invalid module thus partial dependency and metrics)
47 #
48 # How to use the tool as the basis of a Nit code archive on the web usable with a package manager is not clear.
49 module catalog
50
51 import md5 # To get gravatar images
52 import counter # For statistics
53 import modelize # To process and count classes and methods
54
55 redef class MPackage
56 # Return the associated metadata from the `ini`, if any
57 fun metadata(key: String): nullable String
58 do
59 var ini = self.ini
60 if ini == null then return null
61 return ini[key]
62 end
63
64 # The consolidated list of tags
65 var tags = new Array[String]
66
67 # The list of maintainers
68 var maintainers = new Array[Person]
69
70 # The list of contributors
71 var contributors = new Array[Person]
72
73 # The date of the most recent commit
74 var last_date: nullable String = null
75
76 # The date of the oldest commit
77 var first_date: nullable String = null
78 end
79
80 redef class Int
81 # Returns `log(self+1)`. Used to compute score of packages
82 fun score: Float do return (self+1).to_f.log
83 end
84
85 # A contributor/author/etc.
86 #
87 # It comes from git or the metadata
88 #
89 # TODO get more things from github by using the email as a key
90 # "https://api.github.com/search/users?q={email}+in:email"
91 class Person
92 # The name. Eg "John Doe"
93 var name: String is writable
94
95 # The email, Eg "john.doe@example.com"
96 var email: nullable String is writable
97
98 # Some homepage. Eg "http://example.com/~jdoe"
99 var page: nullable String is writable
100
101 # Return a full-featured link to a person
102 fun to_html: String
103 do
104 var res = ""
105 var e = name.html_escape
106 var page = self.page
107 if page != null then
108 res += "<a href=\"{page.html_escape}\">"
109 end
110 var email = self.email
111 if email != null then
112 var md5 = email.md5.to_lower
113 res += "<img src=\"https://secure.gravatar.com/avatar/{md5}?size=20&amp;default=retro\">&nbsp;"
114 end
115 res += e
116 if page != null then res += "</a>"
117 return res
118 end
119
120 # The standard representation of a person.
121 #
122 # ~~~
123 # var jd = new Person("John Doe", "john.doe@example.com", "http://example.com/~jdoe")
124 # assert jd.to_s == "John Doe <john.doe@example.com> (http://example.com/~jdoe)"
125 # ~~~
126 #
127 # It can be used as the input of `parse`.
128 #
129 # ~~~
130 # var jd2 = new Person.parse(jd.to_s)
131 # assert jd2.to_s == jd.to_s
132 # ~~~
133 redef fun to_s
134 do
135 var res = name
136 var email = self.email
137 if email != null then res += " <{email}>"
138 var page = self.page
139 if page != null then res += " ({page})"
140 return res
141 end
142
143 # Crete a new person from its standard textual representation.
144 #
145 # ~~~
146 # var jd = new Person.parse("John Doe <john.doe@example.com> (http://example.com/~jdoe)")
147 # assert jd.name == "John Doe"
148 # assert jd.email == "john.doe@example.com"
149 # assert jd.page == "http://example.com/~jdoe"
150 # ~~~
151 #
152 # Emails and page are optional.
153 #
154 # ~~~
155 # var jd2 = new Person.parse("John Doe")
156 # assert jd2.name == "John Doe"
157 # assert jd2.email == null
158 # assert jd2.page == null
159 # ~~~
160 init parse(person: String)
161 do
162 var name = person
163 var email = null
164 var page = null
165 # Regular expressions are broken, need to investigate.
166 # So split manually.
167 #
168 #var re = "([^<(]*?)(<([^>]*?)>)?(\\((.*)\\))?".to_re
169 #var m = (person+" ").search(re)
170 #print "{person}: `{m or else "?"}` `{m[1] or else "?"}` `{m[3] or else "?"}` `{m[5] or else "?"}`"
171 do
172 var sp1 = person.split_once_on("<")
173 if sp1.length < 2 then
174 break
175 end
176 var sp2 = sp1.last.split_once_on(">")
177 if sp2.length < 2 then
178 break
179 end
180 name = sp1.first.trim
181 email = sp2.first.trim
182 var sp3 = sp2.last.split_once_on("(")
183 if sp3.length < 2 then
184 break
185 end
186 var sp4 = sp3.last.split_once_on(")")
187 if sp4.length < 2 then
188 break
189 end
190 page = sp4.first.trim
191 end
192
193 init(name, email, page)
194 end
195 end
196
197
198 # The main class of the calatog generator that has the knowledge
199 class Catalog
200
201 # The modelbuilder
202 # used to access the files and count source lines of code
203 var modelbuilder: ModelBuilder
204
205 # Packages by tag
206 var tag2proj = new MultiHashMap[String, MPackage]
207
208 # Packages by category
209 var cat2proj = new MultiHashMap[String, MPackage]
210
211 # Packages by maintainer
212 var maint2proj = new MultiHashMap[Person, MPackage]
213
214 # Packages by contributors
215 var contrib2proj = new MultiHashMap[Person, MPackage]
216
217 # Dependency between packages
218 var deps = new POSet[MPackage]
219
220 # Number of modules by package
221 var mmodules = new Counter[MPackage]
222
223 # Number of classes by package
224 var mclasses = new Counter[MPackage]
225
226 # Number of methods by package
227 var mmethods = new Counter[MPackage]
228
229 # Number of line of code by package
230 var loc = new Counter[MPackage]
231
232 # Number of errors
233 var errors = new Counter[MPackage]
234
235 # Number of warnings and advices
236 var warnings = new Counter[MPackage]
237
238 # Documentation score (between 0 and 100)
239 var documentation_score = new Counter[MPackage]
240
241 # Number of commits by package
242 var commits = new Counter[MPackage]
243
244 # Score by package
245 #
246 # The score is loosely computed using other metrics
247 var score = new Counter[MPackage]
248
249 # List of known people
250 var persons = new HashMap[String, Person]
251
252 # Scan, register and add a contributor to a package
253 fun register_contrib(person: String, mpackage: MPackage): Person
254 do
255 var p = persons.get_or_null(person)
256 if p == null then
257 p = new Person.parse(person)
258 persons[person] = p
259 end
260 var projs = contrib2proj[p]
261 if not projs.has(mpackage) then
262 projs.add mpackage
263 mpackage.contributors.add p
264 end
265 return p
266 end
267
268 # Compute information for a package
269 fun package_page(mpackage: MPackage)
270 do
271 var score = score[mpackage].to_f
272
273 var mdoc = mpackage.mdoc_or_fallback
274 if mdoc != null then
275 score += 100.0
276 score += mdoc.content.length.score
277 end
278
279
280 var tryit = mpackage.metadata("upstream.tryit")
281 if tryit != null then
282 score += 1.0
283 end
284 var apk = mpackage.metadata("upstream.apk")
285 if apk != null then
286 score += 1.0
287 end
288
289 var homepage = mpackage.metadata("upstream.homepage")
290 if homepage != null then
291 score += 5.0
292 end
293 var maintainer = mpackage.metadata("package.maintainer")
294 if maintainer != null then
295 score += 5.0
296 var person = register_contrib(maintainer, mpackage)
297 mpackage.maintainers.add person
298 var projs = maint2proj[person]
299 if not projs.has(mpackage) then projs.add mpackage
300 end
301 var license = mpackage.metadata("package.license")
302 if license != null then
303 score += 5.0
304 end
305
306 var browse = mpackage.metadata("upstream.browse")
307 if browse != null then
308 score += 5.0
309 end
310
311 var tags = mpackage.metadata("package.tags")
312 var ts = mpackage.tags
313 if tags != null then
314 for t in tags.split(",") do
315 t = t.trim
316 if t == "" then continue
317 ts.add t
318 end
319 end
320 if ts.is_empty then ts.add "none"
321 if tryit != null then ts.add "tryit"
322 if apk != null then ts.add "apk"
323 for t in ts do
324 tag2proj[t].add mpackage
325 end
326 var cat = ts.first
327 cat2proj[cat].add mpackage
328 score += ts.length.score
329
330 if deps.has(mpackage) then
331 score += deps[mpackage].greaters.length.score
332 score += deps[mpackage].direct_greaters.length.score
333 score += deps[mpackage].smallers.length.score
334 score += deps[mpackage].direct_smallers.length.score
335 end
336
337 var contributors = mpackage.contributors
338 var more_contributors = mpackage.metadata("package.more_contributors")
339 if more_contributors != null then
340 for c in more_contributors.split(",") do
341 register_contrib(c.trim, mpackage)
342 end
343 end
344 score += contributors.length.to_f
345
346 var mmodules = 0
347 var mclasses = 0
348 var mmethods = 0
349 var loc = 0
350 var errors = 0
351 var warnings = 0
352 # The documentation value of each entity is ad hoc.
353 var entity_score = 0.0
354 var doc_score = 0.0
355 for g in mpackage.mgroups do
356 mmodules += g.mmodules.length
357 entity_score += 1.0
358 if g.mdoc != null then doc_score += 1.0
359 for m in g.mmodules do
360 var source = m.location.file
361 if source != null then
362 for msg in source.messages do
363 if msg.level == 2 then
364 errors += 1
365 else
366 warnings += 1
367 end
368 end
369 end
370 var am = modelbuilder.mmodule2node(m)
371 if am != null then
372 var file = am.location.file
373 if file != null then
374 loc += file.line_starts.length - 1
375 end
376 end
377 entity_score += 1.0
378 if m.mdoc != null then doc_score += 1.0
379 for cd in m.mclassdefs do
380 var s = 0.2
381 if not cd.is_intro then s /= 10.0
382 if not cd.mclass.visibility <= private_visibility then s /= 10.0
383 entity_score += s
384 if cd.mdoc != null then doc_score += s
385 mclasses += 1
386 for pd in cd.mpropdefs do
387 s = 0.1
388 if not pd.is_intro then s /= 10.0
389 if not pd.mproperty.visibility <= private_visibility then s /= 10.0
390 entity_score += s
391 if pd.mdoc != null then doc_score += s
392 if not pd isa MMethodDef then continue
393 mmethods += 1
394 end
395 end
396 end
397 end
398 self.mmodules[mpackage] = mmodules
399 self.mclasses[mpackage] = mclasses
400 self.mmethods[mpackage] = mmethods
401 self.loc[mpackage] = loc
402 self.errors[mpackage] = errors
403 self.warnings[mpackage] = warnings
404 var documentation_score = (100.0 * doc_score / entity_score).to_i
405 self.documentation_score[mpackage] = documentation_score
406
407 #score += mmodules.score
408 score += mclasses.score
409 score += mmethods.score
410 score += loc.score
411 score += documentation_score.score
412
413 self.score[mpackage] = score.to_i
414 end
415
416 # Collect more information on a package using the `git` tool.
417 fun git_info(mpackage: MPackage)
418 do
419 var ini = mpackage.ini
420 if ini == null then return
421
422 # TODO use real git info
423 #var repo = ini.get_or_null("upstream.git")
424 #var branch = ini.get_or_null("upstream.git.branch")
425 #var directory = ini.get_or_null("upstream.git.directory")
426
427 var dirpath = mpackage.root.filepath
428 if dirpath == null then return
429
430 # Collect commits info
431 var res = git_run("log", "--no-merges", "--follow", "--pretty=tformat:%ad;%aN <%aE>", "--", dirpath)
432 var contributors = new Counter[String]
433 var commits = res.split("\n")
434 if commits.not_empty and commits.last == "" then commits.pop
435 self.commits[mpackage] = commits.length
436 for l in commits do
437 var s = l.split_once_on(';')
438 if s.length != 2 or s.last == "" then continue
439
440 # Collect date of last and first commit
441 if mpackage.last_date == null then mpackage.last_date = s.first
442 mpackage.first_date = s.first
443
444 # Count contributors
445 contributors.inc(s.last)
446 end
447 for c in contributors.sort.reverse_iterator do
448 register_contrib(c, mpackage)
449 end
450
451 end
452 end
453
454 # Execute a git command and return the result
455 fun git_run(command: String...): String
456 do
457 # print "git {command.join(" ")}"
458 var p = new ProcessReader("git", command...)
459 var res = p.read_all
460 p.close
461 p.wait
462 return res
463 end