# This file is part of NIT ( http://www.nitlanguage.org ). # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Basic catalog generator for Nit packages # # See: # # The tool scans packages and generates the HTML files of a catalog. # # ## Features # # * [X] scan packages and their `.ini` # * [X] generate lists of packages # * [X] generate a page per package with the readme and most metadata # * [ ] link/include/be included in the documentation # * [ ] propose `related packages` # * [X] show directory content (a la nitls) # * [X] gather git information from the working directory # * [ ] gather git information from the repository # * [ ] gather package information from github # * [ ] gather people information from github # * [X] reify people # * [X] separate information gathering from rendering # * [ ] move up information gathering in (existing or new) service modules # * [X] add command line options # * [ ] harden HTML (escaping, path injection, etc) # * [ ] nitcorn server with RESTful API # # ## Issues and limitations # # The tool works likee the other tools and expects to find valid Nit source code in the directories # # * cruft and temporary files will be collected # * missing source file (e.g. not yet generated by nitcc) will make information # incomplete (e.g. invalid module thus partial dependency and metrics) # # How to use the tool as the basis of a Nit code archive on the web usable with a package manager is not clear. module catalog import md5 # To get gravatar images import counter # For statistics import modelize # To process and count classes and methods redef class MPackage # Metadata related to this package var metadata = new MPackageMetadata(self) end # The metadata extracted from a MPackage class MPackageMetadata # The mpacakge this metadata belongs to var mpackage: MPackage # Return the associated metadata from the `ini`, if any fun metadata(key: String): nullable String do var ini = mpackage.ini if ini == null then return null return ini[key] end # The consolidated list of tags var tags: Array[String] is lazy do var tags = new Array[String] var string = metadata("package.tags") if string == null then return tags for tag in string.split(",") do tag = tag.trim if tag.is_empty then continue tags.add tag end if tryit != null then tags.add "tryit" if apk != null then tags.add "apk" if tags.is_empty then tags.add "none" return tags end # The list of all maintainers var maintainers = new Array[Person] # The list of contributors var contributors = new Array[Person] # The date of the most recent commit var last_date: nullable String = null # The date of the oldest commit var first_date: nullable String = null # Key: package.maintainer` var maintainer: nullable String is lazy do return metadata("package.maintainer") # Key: `package.more_contributors` var more_contributors: Array[String] is lazy do var res = new Array[String] var string = metadata("package.more_contributors") if string == null then return res for c in string.split(",") do c = c.trim if c.is_empty then continue res.add c end return res end # Key: `package.license` var license: nullable String is lazy do return metadata("package.license") # Key: `upstream.tryit` var tryit: nullable String is lazy do return metadata("upstream.tryit") # Key: `upstream.apk` var apk: nullable String is lazy do return metadata("upstream.apk") # Key: `upstream.homepage` var homepage: nullable String is lazy do return metadata("upstream.homepage") # Key: `upstream.browse` var browse: nullable String is lazy do return metadata("upstream.browse") # Package git clone address var git: nullable String is lazy do return metadata("upstream.git") # Package issue tracker var issues: nullable String is lazy do return metadata("upstream.issues") end redef class Int # Returns `log(self+1)`. Used to compute score of packages fun score: Float do return (self+1).to_f.log end # A contributor/author/etc. # # It comes from git or the metadata # # TODO get more things from github by using the email as a key # "https://api.github.com/search/users?q={email}+in:email" class Person # The name. Eg "John Doe" var name: String is writable # The email, Eg "john.doe@example.com" var email: nullable String is writable # Some homepage. Eg "http://example.com/~jdoe" var page: nullable String is writable # Gravatar id var gravatar: nullable String is lazy do var email = self.email if email == null then return null return email.md5.to_lower end # The standard representation of a person. # # ~~~ # var jd = new Person("John Doe", "john.doe@example.com", "http://example.com/~jdoe") # assert jd.to_s == "John Doe (http://example.com/~jdoe)" # ~~~ # # It can be used as the input of `parse`. # # ~~~ # var jd2 = new Person.parse(jd.to_s) # assert jd2.to_s == jd.to_s # ~~~ redef fun to_s do var res = name var email = self.email if email != null then res += " <{email}>" var page = self.page if page != null then res += " ({page})" return res end # Crete a new person from its standard textual representation. # # ~~~ # var jd = new Person.parse("John Doe (http://example.com/~jdoe)") # assert jd.name == "John Doe" # assert jd.email == "john.doe@example.com" # assert jd.page == "http://example.com/~jdoe" # ~~~ # # Emails and page are optional. # # ~~~ # var jd2 = new Person.parse("John Doe") # assert jd2.name == "John Doe" # assert jd2.email == null # assert jd2.page == null # ~~~ init parse(person: String) do var name = person var email = null var page = null # Regular expressions are broken, need to investigate. # So split manually. # #var re = "([^<(]*?)(<([^>]*?)>)?(\\((.*)\\))?".to_re #var m = (person+" ").search(re) #print "{person}: `{m or else "?"}` `{m[1] or else "?"}` `{m[3] or else "?"}` `{m[5] or else "?"}`" do var sp1 = person.split_once_on("<") if sp1.length < 2 then break end var sp2 = sp1.last.split_once_on(">") if sp2.length < 2 then break end name = sp1.first.trim email = sp2.first.trim var sp3 = sp2.last.split_once_on("(") if sp3.length < 2 then break end var sp4 = sp3.last.split_once_on(")") if sp4.length < 2 then break end page = sp4.first.trim end init(name, email, page) end end # The main class of the calatog generator that has the knowledge class Catalog # The modelbuilder # used to access the files and count source lines of code var modelbuilder: ModelBuilder # List of all packages by their names var mpackages = new HashMap[String, MPackage] # Packages by tag var tag2proj = new MultiHashMap[String, MPackage] # Packages by category var cat2proj = new MultiHashMap[String, MPackage] # Packages by maintainer var maint2proj = new MultiHashMap[Person, MPackage] # Packages by contributors var contrib2proj = new MultiHashMap[Person, MPackage] # Dependency between packages fun deps: HashDigraph[MPackage] do return modelbuilder.model.mpackage_importation_graph # Number of modules by package var mmodules = new Counter[MPackage] # Number of classes by package var mclasses = new Counter[MPackage] # Number of methods by package var mmethods = new Counter[MPackage] # Number of line of code by package var loc = new Counter[MPackage] # Number of errors var errors = new Counter[MPackage] # Number of warnings and advices var warnings = new Counter[MPackage] # Number of warnings per 1000 lines of code (w/kloc) var warnings_per_kloc = new Counter[MPackage] # Documentation score (between 0 and 100) var documentation_score = new Counter[MPackage] # Number of commits by package var commits = new Counter[MPackage] # Score by package # # The score is loosely computed using other metrics var score = new Counter[MPackage] # List of known people by their git string var persons = new HashMap[String, Person] # Map person short names to person objects var name2person = new HashMap[String, Person] # Package statistics cache var mpackages_stats = new HashMap[MPackage, MPackageStats] # Scan, register and add a contributor to a package fun register_contrib(person: String, mpackage: MPackage): Person do var p = persons.get_or_null(person) if p == null then var new_p = new Person.parse(person) # Maybe, we already have this person in fact? p = persons.get_or_null(new_p.to_s) if p == null then p = new_p persons[p.to_s] = p end end var projs = contrib2proj[p] if not projs.has(mpackage) then projs.add mpackage mpackage.metadata.contributors.add p end name2person[p.name] = p return p end # Compute information for a package fun package_page(mpackage: MPackage) do mpackages[mpackage.full_name] = mpackage var score = score[mpackage].to_f var mdoc = mpackage.mdoc_or_fallback if mdoc != null then score += 100.0 score += mdoc.content.length.score end var metadata = mpackage.metadata var tryit = metadata.tryit if tryit != null then score += 1.0 end var apk = metadata.apk if apk != null then score += 1.0 end var homepage = metadata.homepage if homepage != null then score += 5.0 end var maintainer = metadata.maintainer if maintainer != null then score += 5.0 var person = register_contrib(maintainer, mpackage) mpackage.metadata.maintainers.add person var projs = maint2proj[person] if not projs.has(mpackage) then projs.add mpackage end var license = metadata.license if license != null then score += 5.0 end var browse = metadata.browse if browse != null then score += 5.0 end var tags = metadata.tags for tag in tags do tag2proj[tag].add mpackage end if tags.not_empty then var cat = tags.first cat2proj[cat].add mpackage score += tags.length.score end if deps.has_vertex(mpackage) then score += deps.predecessors(mpackage).length.score score += deps.get_all_predecessors(mpackage).length.score score += deps.successors(mpackage).length.score score += deps.get_all_successors(mpackage).length.score end var contributors = mpackage.metadata.contributors var more_contributors = metadata.more_contributors for c in more_contributors do register_contrib(c, mpackage) end score += contributors.length.to_f var mmodules = 0 var mclasses = 0 var mmethods = 0 var loc = 0 var errors = 0 var warnings = 0 # The documentation value of each entity is ad hoc. var entity_score = 0.0 var doc_score = 0.0 for g in mpackage.mgroups do mmodules += g.mmodules.length var gs = 1.0 entity_score += gs if g.mdoc != null then doc_score += gs for m in g.mmodules do var source = m.location.file if source != null then for msg in source.messages do if msg.level == 2 then errors += 1 else warnings += 1 end end end var am = modelbuilder.mmodule2node(m) if am != null then var file = am.location.file if file != null then loc += file.line_starts.length - 1 end end var ms = gs if m.is_test then ms /= 100.0 entity_score += ms if m.mdoc != null then doc_score += ms else ms /= 10.0 for cd in m.mclassdefs do var cs = ms * 0.2 if not cd.is_intro then cs /= 100.0 if not cd.mclass.visibility <= private_visibility then cs /= 100.0 entity_score += cs if cd.mdoc != null then doc_score += cs mclasses += 1 for pd in cd.mpropdefs do var ps = ms * 0.1 if not pd.is_intro then ps /= 100.0 if not pd.mproperty.visibility <= private_visibility then ps /= 100.0 entity_score += ps if pd.mdoc != null then doc_score += ps if not pd isa MMethodDef then continue mmethods += 1 end end end end self.mmodules[mpackage] = mmodules self.mclasses[mpackage] = mclasses self.mmethods[mpackage] = mmethods self.loc[mpackage] = loc self.errors[mpackage] = errors self.warnings[mpackage] = warnings if loc > 0 then self.warnings_per_kloc[mpackage] = warnings * 1000 / loc end var documentation_score = (100.0 * doc_score / entity_score).to_i self.documentation_score[mpackage] = documentation_score #score += mmodules.score score += mclasses.score score += mmethods.score score += loc.score score += documentation_score.score self.score[mpackage] = score.to_i end # Collect more information on a package using the `git` tool. fun git_info(mpackage: MPackage) do var ini = mpackage.ini if ini == null then return var root = mpackage.root if root == null then return # TODO use real git info #var repo = ini.get_or_null("upstream.git") #var branch = ini.get_or_null("upstream.git.branch") #var directory = ini.get_or_null("upstream.git.directory") var dirpath = root.filepath if dirpath == null then return # Collect commits info var res = git_run("log", "--no-merges", "--follow", "--pretty=tformat:%ad;%aN <%aE>", "--", dirpath) var contributors = new Counter[String] var commits = res.split("\n") if commits.not_empty and commits.last == "" then commits.pop self.commits[mpackage] = commits.length for l in commits do var s = l.split_once_on(';') if s.length != 2 or s.last == "" then continue # Collect date of last and first commit if mpackage.metadata.last_date == null then mpackage.metadata.last_date = s.first mpackage.metadata.first_date = s.first # Count contributors contributors.inc(s.last) end for c in contributors.sort.reverse_iterator do register_contrib(c, mpackage) end end # Compose package stats fun mpackage_stats(mpackage: MPackage): MPackageStats do var stats = new MPackageStats stats.mmodules = mmodules[mpackage] stats.mclasses = mclasses[mpackage] stats.mmethods = mmethods[mpackage] stats.loc = loc[mpackage] stats.errors = errors[mpackage] stats.warnings = warnings[mpackage] stats.warnings_per_kloc = warnings_per_kloc[mpackage] stats.documentation_score = documentation_score[mpackage] stats.commits = commits[mpackage] stats.score = score[mpackage] mpackages_stats[mpackage] = stats return stats end # Compose catalog stats var catalog_stats: CatalogStats is lazy do var stats = new CatalogStats stats.packages = mpackages.length stats.maintainers = maint2proj.length stats.contributors = contrib2proj.length stats.tags = tag2proj.length stats.modules = mmodules.sum stats.classes = mclasses.sum stats.methods = mmethods.sum stats.loc = loc.sum return stats end end # Catalog statistics class CatalogStats # Number of packages var packages = 0 # Number of maintainers var maintainers = 0 # Number of contributors var contributors = 0 # Number of tags var tags = 0 # Number of modules var modules = 0 # Number of classes var classes = 0 # Number of methods var methods = 0 # Number of line of codes var loc = 0 # Return the stats as a Map associating each stat key to its value fun to_map: Map[String, Int] do var map = new HashMap[String, Int] map["packages"] = packages map["maintainers"] = maintainers map["contributors"] = contributors map["tags"] = tags map["modules"] = modules map["classes"] = classes map["methods"] = methods map["loc"] = loc return map end end # MPackage statistics for the catalog class MPackageStats # Number of modules var mmodules = 0 # Number of classes var mclasses = 0 # Number of methods var mmethods = 0 # Number of lines of code var loc = 0 # Number of errors var errors = 0 # Number of warnings and advices var warnings = 0 # Number of warnings per 1000 lines of code (w/kloc) var warnings_per_kloc = 0 # Documentation score (between 0 and 100) var documentation_score = 0 # Number of commits by package var commits = 0 # Score by package # # The score is loosely computed using other metrics var score = 0 end # Sort the mpackages by their score class CatalogScoreSorter super Comparator # Catalog used to access scores var catalog: Catalog redef type COMPARED: MPackage redef fun compare(a, b) do if not catalog.mpackages_stats.has_key(a) then return 1 if not catalog.mpackages_stats.has_key(b) then return -1 var astats = catalog.mpackages_stats[a] var bstats = catalog.mpackages_stats[b] return bstats.score <=> astats.score end end # Sort tabs alphabetically class CatalogTagsSorter super Comparator redef type COMPARED: String redef fun compare(a, b) do return a <=> b end # Execute a git command and return the result fun git_run(command: String...): String do # print "git {command.join(" ")}" var p = new ProcessReader("git", command...) var res = p.read_all p.close p.wait return res end