nit: Added link to `CONTRIBUTING.md` from the README
[nit.git] / src / catalog.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Basic catalog generator for Nit packages
16 #
17 # See: <http://nitlanguage.org/catalog/>
18 #
19 # The tool scans packages and generates the HTML files of a catalog.
20 #
21 # ## Features
22 #
23 # * [X] scan packages and their `.ini`
24 # * [X] generate lists of packages
25 # * [X] generate a page per package with the readme and most metadata
26 # * [ ] link/include/be included in the documentation
27 # * [ ] propose `related packages`
28 # * [X] show directory content (a la nitls)
29 # * [X] gather git information from the working directory
30 # * [ ] gather git information from the repository
31 # * [ ] gather package information from github
32 # * [ ] gather people information from github
33 # * [X] reify people
34 # * [X] separate information gathering from rendering
35 # * [ ] move up information gathering in (existing or new) service modules
36 # * [X] add command line options
37 # * [ ] harden HTML (escaping, path injection, etc)
38 # * [ ] nitcorn server with RESTful API
39 #
40 # ## Issues and limitations
41 #
42 # The tool works likee the other tools and expects to find valid Nit source code in the directories
43 #
44 # * cruft and temporary files will be collected
45 # * missing source file (e.g. not yet generated by nitcc) will make information
46 # incomplete (e.g. invalid module thus partial dependency and metrics)
47 #
48 # How to use the tool as the basis of a Nit code archive on the web usable with a package manager is not clear.
49 module catalog
50
51 import md5 # To get gravatar images
52 import counter # For statistics
53 import modelize # To process and count classes and methods
54
55 redef class MPackage
56 # Return the associated metadata from the `ini`, if any
57 fun metadata(key: String): nullable String
58 do
59 var ini = self.ini
60 if ini == null then return null
61 return ini[key]
62 end
63
64 # The consolidated list of tags
65 var tags = new Array[String]
66
67 # The list of maintainers
68 var maintainers = new Array[Person]
69
70 # The list of contributors
71 var contributors = new Array[Person]
72
73 # The date of the most recent commit
74 var last_date: nullable String = null
75
76 # The date of the oldest commit
77 var first_date: nullable String = null
78 end
79
80 redef class Int
81 # Returns `log(self+1)`. Used to compute score of packages
82 fun score: Float do return (self+1).to_f.log
83 end
84
85 # A contributor/author/etc.
86 #
87 # It comes from git or the metadata
88 #
89 # TODO get more things from github by using the email as a key
90 # "https://api.github.com/search/users?q={email}+in:email"
91 class Person
92 # The name. Eg "John Doe"
93 var name: String is writable
94
95 # The email, Eg "john.doe@example.com"
96 var email: nullable String is writable
97
98 # Some homepage. Eg "http://example.com/~jdoe"
99 var page: nullable String is writable
100
101 # Return a full-featured link to a person
102 fun to_html: String
103 do
104 var res = ""
105 var e = name.html_escape
106 var page = self.page
107 if page != null then
108 res += "<a href=\"{page.html_escape}\">"
109 end
110 var email = self.email
111 if email != null then
112 var md5 = email.md5.to_lower
113 res += "<img src=\"https://secure.gravatar.com/avatar/{md5}?size=20&amp;default=retro\">&nbsp;"
114 end
115 res += e
116 if page != null then res += "</a>"
117 return res
118 end
119
120 # The standard representation of a person.
121 #
122 # ~~~
123 # var jd = new Person("John Doe", "john.doe@example.com", "http://example.com/~jdoe")
124 # assert jd.to_s == "John Doe <john.doe@example.com> (http://example.com/~jdoe)"
125 # ~~~
126 #
127 # It can be used as the input of `parse`.
128 #
129 # ~~~
130 # var jd2 = new Person.parse(jd.to_s)
131 # assert jd2.to_s == jd.to_s
132 # ~~~
133 redef fun to_s
134 do
135 var res = name
136 var email = self.email
137 if email != null then res += " <{email}>"
138 var page = self.page
139 if page != null then res += " ({page})"
140 return res
141 end
142
143 # Crete a new person from its standard textual representation.
144 #
145 # ~~~
146 # var jd = new Person.parse("John Doe <john.doe@example.com> (http://example.com/~jdoe)")
147 # assert jd.name == "John Doe"
148 # assert jd.email == "john.doe@example.com"
149 # assert jd.page == "http://example.com/~jdoe"
150 # ~~~
151 #
152 # Emails and page are optional.
153 #
154 # ~~~
155 # var jd2 = new Person.parse("John Doe")
156 # assert jd2.name == "John Doe"
157 # assert jd2.email == null
158 # assert jd2.page == null
159 # ~~~
160 init parse(person: String)
161 do
162 var name = person
163 var email = null
164 var page = null
165 # Regular expressions are broken, need to investigate.
166 # So split manually.
167 #
168 #var re = "([^<(]*?)(<([^>]*?)>)?(\\((.*)\\))?".to_re
169 #var m = (person+" ").search(re)
170 #print "{person}: `{m or else "?"}` `{m[1] or else "?"}` `{m[3] or else "?"}` `{m[5] or else "?"}`"
171 do
172 var sp1 = person.split_once_on("<")
173 if sp1.length < 2 then
174 break
175 end
176 var sp2 = sp1.last.split_once_on(">")
177 if sp2.length < 2 then
178 break
179 end
180 name = sp1.first.trim
181 email = sp2.first.trim
182 var sp3 = sp2.last.split_once_on("(")
183 if sp3.length < 2 then
184 break
185 end
186 var sp4 = sp3.last.split_once_on(")")
187 if sp4.length < 2 then
188 break
189 end
190 page = sp4.first.trim
191 end
192
193 init(name, email, page)
194 end
195 end
196
197
198 # The main class of the calatog generator that has the knowledge
199 class Catalog
200
201 # The modelbuilder
202 # used to access the files and count source lines of code
203 var modelbuilder: ModelBuilder
204
205 # Packages by tag
206 var tag2proj = new MultiHashMap[String, MPackage]
207
208 # Packages by category
209 var cat2proj = new MultiHashMap[String, MPackage]
210
211 # Packages by maintainer
212 var maint2proj = new MultiHashMap[Person, MPackage]
213
214 # Packages by contributors
215 var contrib2proj = new MultiHashMap[Person, MPackage]
216
217 # Dependency between packages
218 var deps = new POSet[MPackage]
219
220 # Number of modules by package
221 var mmodules = new Counter[MPackage]
222
223 # Number of classes by package
224 var mclasses = new Counter[MPackage]
225
226 # Number of methods by package
227 var mmethods = new Counter[MPackage]
228
229 # Number of line of code by package
230 var loc = new Counter[MPackage]
231
232 # Number of errors
233 var errors = new Counter[MPackage]
234
235 # Number of warnings and advices
236 var warnings = new Counter[MPackage]
237
238 # Number of warnings per 1000 lines of code (w/kloc)
239 var warnings_per_kloc = new Counter[MPackage]
240
241 # Documentation score (between 0 and 100)
242 var documentation_score = new Counter[MPackage]
243
244 # Number of commits by package
245 var commits = new Counter[MPackage]
246
247 # Score by package
248 #
249 # The score is loosely computed using other metrics
250 var score = new Counter[MPackage]
251
252 # List of known people
253 var persons = new HashMap[String, Person]
254
255 # Scan, register and add a contributor to a package
256 fun register_contrib(person: String, mpackage: MPackage): Person
257 do
258 var p = persons.get_or_null(person)
259 if p == null then
260 var new_p = new Person.parse(person)
261 # Maybe, we already have this person in fact?
262 p = persons.get_or_null(new_p.to_s)
263 if p == null then
264 p = new_p
265 persons[p.to_s] = p
266 end
267 end
268 var projs = contrib2proj[p]
269 if not projs.has(mpackage) then
270 projs.add mpackage
271 mpackage.contributors.add p
272 end
273 return p
274 end
275
276 # Compute information for a package
277 fun package_page(mpackage: MPackage)
278 do
279 var score = score[mpackage].to_f
280
281 var mdoc = mpackage.mdoc_or_fallback
282 if mdoc != null then
283 score += 100.0
284 score += mdoc.content.length.score
285 end
286
287
288 var tryit = mpackage.metadata("upstream.tryit")
289 if tryit != null then
290 score += 1.0
291 end
292 var apk = mpackage.metadata("upstream.apk")
293 if apk != null then
294 score += 1.0
295 end
296
297 var homepage = mpackage.metadata("upstream.homepage")
298 if homepage != null then
299 score += 5.0
300 end
301 var maintainer = mpackage.metadata("package.maintainer")
302 if maintainer != null then
303 score += 5.0
304 var person = register_contrib(maintainer, mpackage)
305 mpackage.maintainers.add person
306 var projs = maint2proj[person]
307 if not projs.has(mpackage) then projs.add mpackage
308 end
309 var license = mpackage.metadata("package.license")
310 if license != null then
311 score += 5.0
312 end
313
314 var browse = mpackage.metadata("upstream.browse")
315 if browse != null then
316 score += 5.0
317 end
318
319 var tags = mpackage.metadata("package.tags")
320 var ts = mpackage.tags
321 if tags != null then
322 for t in tags.split(",") do
323 t = t.trim
324 if t == "" then continue
325 ts.add t
326 end
327 end
328 if ts.is_empty then ts.add "none"
329 if tryit != null then ts.add "tryit"
330 if apk != null then ts.add "apk"
331 for t in ts do
332 tag2proj[t].add mpackage
333 end
334 var cat = ts.first
335 cat2proj[cat].add mpackage
336 score += ts.length.score
337
338 if deps.has(mpackage) then
339 score += deps[mpackage].greaters.length.score
340 score += deps[mpackage].direct_greaters.length.score
341 score += deps[mpackage].smallers.length.score
342 score += deps[mpackage].direct_smallers.length.score
343 end
344
345 var contributors = mpackage.contributors
346 var more_contributors = mpackage.metadata("package.more_contributors")
347 if more_contributors != null then
348 for c in more_contributors.split(",") do
349 register_contrib(c.trim, mpackage)
350 end
351 end
352 score += contributors.length.to_f
353
354 var mmodules = 0
355 var mclasses = 0
356 var mmethods = 0
357 var loc = 0
358 var errors = 0
359 var warnings = 0
360 # The documentation value of each entity is ad hoc.
361 var entity_score = 0.0
362 var doc_score = 0.0
363 for g in mpackage.mgroups do
364 mmodules += g.mmodules.length
365 entity_score += 1.0
366 if g.mdoc != null then doc_score += 1.0
367 for m in g.mmodules do
368 var source = m.location.file
369 if source != null then
370 for msg in source.messages do
371 if msg.level == 2 then
372 errors += 1
373 else
374 warnings += 1
375 end
376 end
377 end
378 var am = modelbuilder.mmodule2node(m)
379 if am != null then
380 var file = am.location.file
381 if file != null then
382 loc += file.line_starts.length - 1
383 end
384 end
385 entity_score += 1.0
386 if m.mdoc != null then doc_score += 1.0
387 for cd in m.mclassdefs do
388 var s = 0.2
389 if not cd.is_intro then s /= 10.0
390 if not cd.mclass.visibility <= private_visibility then s /= 10.0
391 entity_score += s
392 if cd.mdoc != null then doc_score += s
393 mclasses += 1
394 for pd in cd.mpropdefs do
395 s = 0.1
396 if not pd.is_intro then s /= 10.0
397 if not pd.mproperty.visibility <= private_visibility then s /= 10.0
398 entity_score += s
399 if pd.mdoc != null then doc_score += s
400 if not pd isa MMethodDef then continue
401 mmethods += 1
402 end
403 end
404 end
405 end
406 self.mmodules[mpackage] = mmodules
407 self.mclasses[mpackage] = mclasses
408 self.mmethods[mpackage] = mmethods
409 self.loc[mpackage] = loc
410 self.errors[mpackage] = errors
411 self.warnings[mpackage] = warnings
412 if loc > 0 then
413 self.warnings_per_kloc[mpackage] = warnings * 1000 / loc
414 end
415 var documentation_score = (100.0 * doc_score / entity_score).to_i
416 self.documentation_score[mpackage] = documentation_score
417
418 #score += mmodules.score
419 score += mclasses.score
420 score += mmethods.score
421 score += loc.score
422 score += documentation_score.score
423
424 self.score[mpackage] = score.to_i
425 end
426
427 # Collect more information on a package using the `git` tool.
428 fun git_info(mpackage: MPackage)
429 do
430 var ini = mpackage.ini
431 if ini == null then return
432
433 # TODO use real git info
434 #var repo = ini.get_or_null("upstream.git")
435 #var branch = ini.get_or_null("upstream.git.branch")
436 #var directory = ini.get_or_null("upstream.git.directory")
437
438 var dirpath = mpackage.root.filepath
439 if dirpath == null then return
440
441 # Collect commits info
442 var res = git_run("log", "--no-merges", "--follow", "--pretty=tformat:%ad;%aN <%aE>", "--", dirpath)
443 var contributors = new Counter[String]
444 var commits = res.split("\n")
445 if commits.not_empty and commits.last == "" then commits.pop
446 self.commits[mpackage] = commits.length
447 for l in commits do
448 var s = l.split_once_on(';')
449 if s.length != 2 or s.last == "" then continue
450
451 # Collect date of last and first commit
452 if mpackage.last_date == null then mpackage.last_date = s.first
453 mpackage.first_date = s.first
454
455 # Count contributors
456 contributors.inc(s.last)
457 end
458 for c in contributors.sort.reverse_iterator do
459 register_contrib(c, mpackage)
460 end
461
462 end
463 end
464
465 # Execute a git command and return the result
466 fun git_run(command: String...): String
467 do
468 # print "git {command.join(" ")}"
469 var p = new ProcessReader("git", command...)
470 var res = p.read_all
471 p.close
472 p.wait
473 return res
474 end