nitcatalog: add a new class Person
[nit.git] / src / catalog.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Basic catalog generator for Nit packages
16 #
17 # See: <http://nitlanguage.org/catalog/>
18 #
19 # The tool scans packages and generates the HTML files of a catalog.
20 #
21 # ## Features
22 #
23 # * [X] scan packages and their `.ini`
24 # * [X] generate lists of packages
25 # * [X] generate a page per package with the readme and most metadata
26 # * [ ] link/include/be included in the documentation
27 # * [ ] propose `related packages`
28 # * [X] show directory content (a la nitls)
29 # * [X] gather git information from the working directory
30 # * [ ] gather git information from the repository
31 # * [ ] gather package information from github
32 # * [ ] gather people information from github
33 # * [X] reify people
34 # * [X] separate information gathering from rendering
35 # * [ ] move up information gathering in (existing or new) service modules
36 # * [X] add command line options
37 # * [ ] harden HTML (escaping, path injection, etc)
38 # * [ ] nitcorn server with RESTful API
39 #
40 # ## Issues and limitations
41 #
42 # The tool works likee the other tools and expects to find valid Nit source code in the directories
43 #
44 # * cruft and temporary files will be collected
45 # * missing source file (e.g. not yet generated by nitcc) will make information
46 # incomplete (e.g. invalid module thus partial dependency and metrics)
47 #
48 # How to use the tool as the basis of a Nit code archive on the web usable with a package manager is not clear.
49 module catalog
50
51 import md5 # To get gravatar images
52 import counter # For statistics
53 import modelize # To process and count classes and methods
54
55 redef class MPackage
56 # Return the associated metadata from the `ini`, if any
57 fun metadata(key: String): nullable String
58 do
59 var ini = self.ini
60 if ini == null then return null
61 return ini[key]
62 end
63
64 # The consolidated list of tags
65 var tags = new Array[String]
66
67 # The list of maintainers
68 var maintainers = new Array[Person]
69
70 # The list of contributors
71 var contributors = new Array[Person]
72
73 # The date of the most recent commit
74 var last_date: nullable String = null
75
76 # The date of the oldest commit
77 var first_date: nullable String = null
78 end
79
80 redef class Int
81 # Returns `log(self+1)`. Used to compute score of packages
82 fun score: Float do return (self+1).to_f.log
83 end
84
85 # A contributor/author/etc.
86 #
87 # It comes from git or the metadata
88 #
89 # TODO get more things from github by using the email as a key
90 # "https://api.github.com/search/users?q={email}+in:email"
91 class Person
92 # The name. Eg "John Doe"
93 var name: String is writable
94
95 # The email, Eg "john.doe@example.com"
96 var email: nullable String is writable
97
98 # Some homepage. Eg "http://example.com/~jdoe"
99 var page: nullable String is writable
100
101 # Return a full-featured link to a person
102 fun to_html: String
103 do
104 var res = ""
105 var e = name.html_escape
106 var page = self.page
107 if page != null then
108 res += "<a href=\"{page.html_escape}\">"
109 end
110 var email = self.email
111 if email != null then
112 var md5 = email.md5.to_lower
113 res += "<img src=\"https://secure.gravatar.com/avatar/{md5}?size=20&amp;default=retro\">&nbsp;"
114 end
115 res += e
116 if page != null then res += "</a>"
117 return res
118 end
119
120 # The standard representation of a person.
121 #
122 # ~~~
123 # var jd = new Person("John Doe", "john.doe@example.com", "http://example.com/~jdoe")
124 # assert jd.to_s == "John Doe <john.doe@example.com> (http://example.com/~jdoe)"
125 # ~~~
126 #
127 # It can be used as the input of `parse`.
128 #
129 # ~~~
130 # var jd2 = new Person.parse(jd.to_s)
131 # assert jd2.to_s == jd.to_s
132 # ~~~
133 redef fun to_s
134 do
135 var res = name
136 var email = self.email
137 if email != null then res += " <{email}>"
138 var page = self.page
139 if page != null then res += " ({page})"
140 return res
141 end
142
143 # Crete a new person from its standard textual representation.
144 #
145 # ~~~
146 # var jd = new Person.parse("John Doe <john.doe@example.com> (http://example.com/~jdoe)")
147 # assert jd.name == "John Doe"
148 # assert jd.email == "john.doe@example.com"
149 # assert jd.page == "http://example.com/~jdoe"
150 # ~~~
151 #
152 # Emails and page are optional.
153 #
154 # ~~~
155 # var jd2 = new Person.parse("John Doe")
156 # assert jd2.name == "John Doe"
157 # assert jd2.email == null
158 # assert jd2.page == null
159 # ~~~
160 init parse(person: String)
161 do
162 var name = person
163 var email = null
164 var page = null
165 # Regular expressions are broken, need to investigate.
166 # So split manually.
167 #
168 #var re = "([^<(]*?)(<([^>]*?)>)?(\\((.*)\\))?".to_re
169 #var m = (person+" ").search(re)
170 #print "{person}: `{m or else "?"}` `{m[1] or else "?"}` `{m[3] or else "?"}` `{m[5] or else "?"}`"
171 do
172 var sp1 = person.split_once_on("<")
173 if sp1.length < 2 then
174 break
175 end
176 var sp2 = sp1.last.split_once_on(">")
177 if sp2.length < 2 then
178 break
179 end
180 name = sp1.first.trim
181 email = sp2.first.trim
182 var sp3 = sp2.last.split_once_on("(")
183 if sp3.length < 2 then
184 break
185 end
186 var sp4 = sp3.last.split_once_on(")")
187 if sp4.length < 2 then
188 break
189 end
190 page = sp4.first.trim
191 end
192
193 init(name, email, page)
194 end
195 end
196
197
198 # The main class of the calatog generator that has the knowledge
199 class Catalog
200
201 # The modelbuilder
202 # used to access the files and count source lines of code
203 var modelbuilder: ModelBuilder
204
205 # Packages by tag
206 var tag2proj = new MultiHashMap[String, MPackage]
207
208 # Packages by category
209 var cat2proj = new MultiHashMap[String, MPackage]
210
211 # Packages by maintainer
212 var maint2proj = new MultiHashMap[Person, MPackage]
213
214 # Packages by contributors
215 var contrib2proj = new MultiHashMap[Person, MPackage]
216
217 # Dependency between packages
218 var deps = new POSet[MPackage]
219
220 # Number of modules by package
221 var mmodules = new Counter[MPackage]
222
223 # Number of classes by package
224 var mclasses = new Counter[MPackage]
225
226 # Number of methods by package
227 var mmethods = new Counter[MPackage]
228
229 # Number of line of code by package
230 var loc = new Counter[MPackage]
231
232 # Number of commits by package
233 var commits = new Counter[MPackage]
234
235 # Score by package
236 #
237 # The score is loosely computed using other metrics
238 var score = new Counter[MPackage]
239
240 # List of known people
241 var persons = new HashMap[String, Person]
242
243 # Scan, register and add a contributor to a package
244 fun register_contrib(person: String, mpackage: MPackage): Person
245 do
246 var p = persons.get_or_null(person)
247 if p == null then
248 p = new Person.parse(person)
249 persons[person] = p
250 end
251 var projs = contrib2proj[p]
252 if not projs.has(mpackage) then
253 projs.add mpackage
254 mpackage.contributors.add p
255 end
256 return p
257 end
258
259 # Compute information for a package
260 fun package_page(mpackage: MPackage)
261 do
262 var score = score[mpackage].to_f
263
264 var mdoc = mpackage.mdoc_or_fallback
265 if mdoc != null then
266 score += 100.0
267 score += mdoc.content.length.score
268 end
269
270
271 var tryit = mpackage.metadata("upstream.tryit")
272 if tryit != null then
273 score += 1.0
274 end
275 var apk = mpackage.metadata("upstream.apk")
276 if apk != null then
277 score += 1.0
278 end
279
280 var homepage = mpackage.metadata("upstream.homepage")
281 if homepage != null then
282 score += 5.0
283 end
284 var maintainer = mpackage.metadata("package.maintainer")
285 if maintainer != null then
286 score += 5.0
287 var person = register_contrib(maintainer, mpackage)
288 mpackage.maintainers.add person
289 var projs = maint2proj[person]
290 if not projs.has(mpackage) then projs.add mpackage
291 end
292 var license = mpackage.metadata("package.license")
293 if license != null then
294 score += 5.0
295 end
296
297 var browse = mpackage.metadata("upstream.browse")
298 if browse != null then
299 score += 5.0
300 end
301
302 var tags = mpackage.metadata("package.tags")
303 var ts = mpackage.tags
304 if tags != null then
305 for t in tags.split(",") do
306 t = t.trim
307 if t == "" then continue
308 ts.add t
309 end
310 end
311 if ts.is_empty then ts.add "none"
312 if tryit != null then ts.add "tryit"
313 if apk != null then ts.add "apk"
314 for t in ts do
315 tag2proj[t].add mpackage
316 end
317 var cat = ts.first
318 cat2proj[cat].add mpackage
319 score += ts.length.score
320
321 if deps.has(mpackage) then
322 score += deps[mpackage].greaters.length.score
323 score += deps[mpackage].direct_greaters.length.score
324 score += deps[mpackage].smallers.length.score
325 score += deps[mpackage].direct_smallers.length.score
326 end
327
328 var contributors = mpackage.contributors
329 var more_contributors = mpackage.metadata("package.more_contributors")
330 if more_contributors != null then
331 for c in more_contributors.split(",") do
332 register_contrib(c.trim, mpackage)
333 end
334 end
335 score += contributors.length.to_f
336
337 var mmodules = 0
338 var mclasses = 0
339 var mmethods = 0
340 var loc = 0
341 for g in mpackage.mgroups do
342 mmodules += g.mmodules.length
343 for m in g.mmodules do
344 var am = modelbuilder.mmodule2node(m)
345 if am != null then
346 var file = am.location.file
347 if file != null then
348 loc += file.line_starts.length - 1
349 end
350 end
351 for cd in m.mclassdefs do
352 mclasses += 1
353 for pd in cd.mpropdefs do
354 if not pd isa MMethodDef then continue
355 mmethods += 1
356 end
357 end
358 end
359 end
360 self.mmodules[mpackage] = mmodules
361 self.mclasses[mpackage] = mclasses
362 self.mmethods[mpackage] = mmethods
363 self.loc[mpackage] = loc
364
365 #score += mmodules.score
366 score += mclasses.score
367 score += mmethods.score
368 score += loc.score
369
370 self.score[mpackage] = score.to_i
371 end
372
373 # Collect more information on a package using the `git` tool.
374 fun git_info(mpackage: MPackage)
375 do
376 var ini = mpackage.ini
377 if ini == null then return
378
379 # TODO use real git info
380 #var repo = ini.get_or_null("upstream.git")
381 #var branch = ini.get_or_null("upstream.git.branch")
382 #var directory = ini.get_or_null("upstream.git.directory")
383
384 var dirpath = mpackage.root.filepath
385 if dirpath == null then return
386
387 # Collect commits info
388 var res = git_run("log", "--no-merges", "--follow", "--pretty=tformat:%ad;%aN <%aE>", "--", dirpath)
389 var contributors = new Counter[String]
390 var commits = res.split("\n")
391 if commits.not_empty and commits.last == "" then commits.pop
392 self.commits[mpackage] = commits.length
393 for l in commits do
394 var s = l.split_once_on(';')
395 if s.length != 2 or s.last == "" then continue
396
397 # Collect date of last and first commit
398 if mpackage.last_date == null then mpackage.last_date = s.first
399 mpackage.first_date = s.first
400
401 # Count contributors
402 contributors.inc(s.last)
403 end
404 for c in contributors.sort.reverse_iterator do
405 register_contrib(c, mpackage)
406 end
407
408 end
409 end
410
411 # Execute a git command and return the result
412 fun git_run(command: String...): String
413 do
414 # print "git {command.join(" ")}"
415 var p = new ProcessReader("git", command...)
416 var res = p.read_all
417 p.close
418 p.wait
419 return res
420 end