Collect common metrics about README files

Also works with generic Markdown files.

Introduced classes

class MarkdownMetrics

nitc :: MarkdownMetrics

A Markdown decorator that collects metrics about a Readme content
class ReadmeMetric

nitc :: ReadmeMetric

Readme metrics associated to a Package
class ReadmeMetrics

nitc :: ReadmeMetrics

All metrics about the readmes
private class ReadmeMetricsPhase

nitc :: ReadmeMetricsPhase

Extract metrics about README files

Redefined classes

redef class ToolContext

nitc :: readme_metrics $ ToolContext

Global context for tools

All class definitions

class MarkdownMetrics

nitc $ MarkdownMetrics

A Markdown decorator that collects metrics about a Readme content
class ReadmeMetric

nitc $ ReadmeMetric

Readme metrics associated to a Package
class ReadmeMetrics

nitc $ ReadmeMetrics

All metrics about the readmes
private class ReadmeMetricsPhase

nitc $ ReadmeMetricsPhase

Extract metrics about README files
redef class ToolContext

nitc :: readme_metrics $ ToolContext

Global context for tools
package_diagram nitc::readme_metrics readme_metrics nitc::metrics_base metrics_base nitc::readme_metrics->nitc::metrics_base nitc::model_collect model_collect nitc::readme_metrics->nitc::model_collect markdown2 markdown2 nitc::readme_metrics->markdown2 nitc::modelbuilder modelbuilder nitc::metrics_base->nitc::modelbuilder csv csv nitc::metrics_base->csv counter counter nitc::metrics_base->counter nitc::model_filters model_filters nitc::model_collect->nitc::model_filters core core markdown2->core config config markdown2->config json json markdown2->json template template markdown2->template ...nitc::modelbuilder ... ...nitc::modelbuilder->nitc::modelbuilder ...csv ... ...csv->csv ...counter ... ...counter->counter ...nitc::model_filters ... ...nitc::model_filters->nitc::model_filters ...core ... ...core->core ...config ... ...config->config ...json ... ...json->json ...template ... ...template->template nitc::metrics metrics nitc::metrics->nitc::readme_metrics nitc::nitmetrics nitmetrics nitc::nitmetrics->nitc::metrics nitc::api_metrics api_metrics nitc::api_metrics->nitc::metrics nitc::nitmetrics... ... nitc::nitmetrics...->nitc::nitmetrics nitc::api_metrics... ... nitc::api_metrics...->nitc::api_metrics

Ancestors

module abstract_collection

core :: abstract_collection

Abstract collection classes and services.
module abstract_text

core :: abstract_text

Abstract class for manipulation of sequences of characters
module annotation

nitc :: annotation

Management and utilities on annotations
module array

core :: array

This module introduces the standard array structure.
module bitset

core :: bitset

Services to handle BitSet
module bytes

core :: bytes

Services for byte streams and arrays
module caching

serialization :: caching

Services for caching serialization engines
module circular_array

core :: circular_array

Efficient data structure to access both end of the sequence.
module codec_base

core :: codec_base

Base for codecs to use with streams
module codecs

core :: codecs

Group module for all codec-related manipulations
module collection

core :: collection

This module define several collection classes.
module console

console :: console

Defines some ANSI Terminal Control Escape Sequences.
module core

core :: core

Standard classes and methods used by default by Nit programs and libraries.
module counter

counter :: counter

Simple numerical statistical analysis and presentation
module csv

csv :: csv

CSV document handling.
module digraph

graph :: digraph

Implementation of directed graphs, also called digraphs.
module engine_tools

serialization :: engine_tools

Advanced services for serialization engines
module environ

core :: environ

Access to the environment variables of the process
module error

core :: error

Standard error-management infrastructure.
module exec

core :: exec

Invocation and management of operating system sub-processes.
module file

core :: file

File manipulations (create, read, write, etc.)
module fixed_ints

core :: fixed_ints

Basic integers of fixed-precision
module fixed_ints_text

core :: fixed_ints_text

Text services to complement fixed_ints
module flat

core :: flat

All the array-based text representations
module gc

core :: gc

Access to the Nit internal garbage collection mechanism
module hash_collection

core :: hash_collection

Introduce HashMap and HashSet.
module ini

ini :: ini

Read and write INI configuration files
module inspect

serialization :: inspect

Refine Serializable::inspect to show more useful information
module iso8859_1

core :: iso8859_1

Codec for ISO8859-1 I/O
module kernel

core :: kernel

Most basic classes and methods.
module lexer

nitc :: lexer

Lexer and its tokens.
module lexer_work

nitc :: lexer_work

Internal algorithm and data structures for the Nit lexer
module list

core :: list

This module handle double linked lists
module literal

nitc :: literal

Parsing of literal values in the abstract syntax tree.
module loader

nitc :: loader

Loading of Nit source files
module location

nitc :: location

Nit source-file and locations in source-file
module markdown_ast

markdown2 :: markdown_ast

Markdown AST representation
module markdown_block_parsing

markdown2 :: markdown_block_parsing

Markdown blocks parsing
module markdown_github

markdown2 :: markdown_github

Markdown Github mode
module markdown_html_rendering

markdown2 :: markdown_html_rendering

HTML rendering of Markdown documents
module markdown_inline_parsing

markdown2 :: markdown_inline_parsing

Parser for inline markdown
module markdown_rendering

markdown2 :: markdown_rendering

Markdown document rendering
module math

core :: math

Mathematical operations
module mdoc

nitc :: mdoc

Documentation of model entities
module meta

meta :: meta

Simple user-defined meta-level to manipulate types of instances as object.
module mmodule

nitc :: mmodule

modules and module hierarchies in the metamodel
module mmodule_data

nitc :: mmodule_data

Define and retrieve data in modules
module model

nitc :: model

Classes, types and properties
module model_base

nitc :: model_base

The abstract concept of model and related common things
module model_examples

nitc :: model_examples

Examples for Model entities
module modelbuilder_base

nitc :: modelbuilder_base

Load nit source files and build the associated model
module modelize_class

nitc :: modelize_class

Analysis and verification of class definitions to instantiate model element
module modelize_property

nitc :: modelize_property

Analysis and verification of property definitions to instantiate model element
module more_collections

more_collections :: more_collections

Highly specific, but useful, collections-related classes.
module mpackage

nitc :: mpackage

Modelisation of a Nit package
module native

core :: native

Native structures for text and bytes
module nitpm_shared

nitc :: nitpm_shared

Services related to the Nit package manager
module numeric

core :: numeric

Advanced services for Numeric types
module opts

opts :: opts

Management of options on the command line
module ordered_tree

ordered_tree :: ordered_tree

Manipulation and presentation of ordered trees.
module parse_annotations

nitc :: parse_annotations

Simple annotation parsing
module parser

nitc :: parser

Parser.
module parser_nodes

nitc :: parser_nodes

AST nodes of the Nit language
module parser_prod

nitc :: parser_prod

Production AST nodes full definition.
module parser_work

nitc :: parser_work

Internal algorithm and data structures for the Nit parser
module phase

nitc :: phase

Phases of the processing of nit programs
module poset

poset :: poset

Pre order sets and partial order set (ie hierarchies)
module protocol

core :: protocol

module queue

core :: queue

Queuing data structures and wrappers
module range

core :: range

Module for range of discrete objects.
module re

core :: re

Regular expression support for all services based on Pattern
module ropes

core :: ropes

Tree-based representation of a String.
module serialization

serialization :: serialization

General serialization services
module serialization_core

serialization :: serialization_core

Abstract services to serialize Nit objects to different formats
module sorter

core :: sorter

This module contains classes used to compare things and sorts arrays.
module stream

core :: stream

Input and output streams of characters
module tables

nitc :: tables

Module that interfaces the parsing tables.
module template

template :: template

Basic template system
module text

core :: text

All the classes and methods related to the manipulation of text entities
module time

core :: time

Management of time and dates
module toolcontext

nitc :: toolcontext

Common command-line tool infrastructure than handle options and error messages
module union_find

core :: union_find

union–find algorithm using an efficient disjoint-set data structure
module utf8

core :: utf8

Codec for UTF-8 I/O
module version

nitc :: version

This file was generated by git-gen-version.sh

Parents

module metrics_base

nitc :: metrics_base

Helpers for various statistics tools.
module model_collect

nitc :: model_collect

Collect things from the model.

Children

module metrics

nitc :: metrics

Various statistics about Nit models and programs

Descendants

module a_star-m

a_star-m

module api

nitc :: api

Components required to build a web server about the nit model.
module nitmetrics

nitc :: nitmetrics

A program that collects various metrics on nit programs and libraries
module nitweb

nitc :: nitweb

Runs a webserver based on nitcorn that render things from model.
# Collect common metrics about README files
#
# Also works with generic Markdown files.
module readme_metrics

import metrics_base
import model::model_collect
import markdown2

redef class ToolContext

	# README related metrics phase
	var readme_metrics_phase: Phase = new ReadmeMetricsPhase(self, null)
end

# Extract metrics about README files
private class ReadmeMetricsPhase
	super Phase

	redef fun process_mainmodule(mainmodule, given_mmodules) do
		if not toolcontext.opt_readme.value and not toolcontext.opt_all.value then return

		print toolcontext.format_h1("\n# ReadMe metrics")
		var model = toolcontext.modelbuilder.model

		var metrics = new ReadmeMetrics
		metrics.collect_metrics(model.mpackages)
		metrics.to_console(toolcontext)

		var csv = toolcontext.opt_csv.value
		if csv then metrics.to_csv.write_to_file("{toolcontext.opt_dir.value or else "metrics"}/readme.csv")
	end
end

# A Markdown decorator that collects metrics about a Readme content
class MarkdownMetrics
	super MdVisitor

	# Count nodes
	var nodes_counter = new Counter[String]

	# Count heading levels
	var headings_counter = new Counter[Int]

	redef fun visit(node) do
		nodes_counter.inc node.class_name
		if node isa MdHeading then
			headings_counter.inc node.level
		end
		node.visit_all self
	end
end

# All metrics about the readmes
class ReadmeMetrics
	super HashMap[MPackage, ReadmeMetric]

	# Collect all metric names from submetrics
	fun metrics_names: ArraySet[String] do
		var keys = new ArraySet[String]
		keys.add "MPackage"
		for mpackage, values in self do
			keys.add_all values.keys
		end
		return keys
	end

	# Render `self` as a CsvDocument
	fun to_csv: CsvDocument do
		var doc = new CsvDocument
		doc.header = metrics_names.to_a

		var metrics = metrics_names
		for mpackage in self.keys do
			doc.records.add self[mpackage].to_csv_record(metrics)
		end
		return doc
	end

	# Print `self` into stdout
	fun to_console(toolcontext: ToolContext) do
		for mpackage, values in self do
			if not values.has_readme then continue
			values.to_console(toolcontext)
		end
	end

	# Collect metrics for all `mpackages`
	fun collect_metrics(mpackages: Collection[MPackage]) do
		for mpackage in mpackages do
			var metric = new ReadmeMetric(mpackage)
			metric.collect_metrics
			self[mpackage] = metric
		end
	end
end

# Readme metrics associated to a Package
class ReadmeMetric
	super HashMap[String, Int]

	# Package this Readme is about
	var mpackage: MPackage

	# Render `self` as a CsvDocument record
	fun to_csv_record(keys: ArraySet[String]): Array[String] do
		var record = new Array[String]
		record.add mpackage.full_name
		for key in keys do
			if key == keys.first then continue
			var value = if self.has_key(key) then self[key] else 0
			record.add value.to_s
		end
		return record
	end

	# Return the value associated with `key` or `0`.
	fun value_or_zero(key: String): Int do
		return if self.has_key(key) then self[key] else 0
	end

	# Print `self` on stdout
	fun to_console(toolcontext: ToolContext) do
		print toolcontext.format_h2("\n ## package {mpackage} ({readme_path or else "no readme"})")
		for key, value in self do
			print "  * {key} {value}"
		end
	end

	# Collect metrics about `mpackage`
	fun collect_metrics do
		if not has_package_dir then
			print "Warning: no source file for `{mpackage}`"
			self["has_package"] = 0
			return
		end
		self["has_package"] = 1

		if not has_readme then
			print "Warning: no readme file for `{mpackage}`"
			self["has_readme"] = 0
			return
		end
		self["has_readme"] = 1
		self["md_lines"] = md_lines.length

		var parser = new MdParser
		var node = parser.parse(md_lines.join("\n"))
		var v = new MarkdownMetrics
		v.enter_visit(node)
		for md_node, value in v.nodes_counter do
			self[md_node] = value
		end
		for level, value in v.headings_counter do
			self["HL {level}"] = value
		end
	end

	# Path to the package
	var package_path: nullable SourceFile is lazy do return mpackage.location.file

	# Is `mpackage` in its own directory?
	var has_package_dir: Bool is lazy do
		var path = package_path
		if path == null then return false
		return not path.filename.has_suffix(".nit")
	end

	# Return the path to the `mpackage` Readme file
	var readme_path: nullable String is lazy do
		var package_path = self.package_path
		if package_path == null then return null
		return package_path.filename / "README.md"
	end

	# Does `mpackage` has a Readme file?
	var has_readme: Bool is lazy do
		var readme_path = self.readme_path
		if readme_path == null then return false
		return readme_path.to_s.file_exists
	end

	# Read markdown lines
	#
	# Returns an empty array if the Readme does not exist.
	var md_lines: Array[String] is lazy do
		var path = readme_path
		if path == null then return new Array[String]
		return path.to_path.read_lines
	end
end
src/metrics/readme_metrics.nit:15,1--205,3