nitcc: generate more intermediate automaton
[nit.git] / contrib / nitcc / src / nitcc.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # nitcc, a parser and lexer generator for Nit
16 module nitcc
17
18 import nitcc_semantic
19
20 # Load the grammar file
21
22 if args.is_empty then
23 print "usage: nitcc <file> | -"
24 exit 1
25 end
26 var fi = args.first
27
28 var text
29 if fi != "-" then
30 var f = new FileReader.open(fi)
31 text = f.read_all
32 f.close
33 else
34 text = stdin.read_all
35 end
36
37 # Parse the grammar file
38
39 var l = new Lexer_nitcc(text)
40 var ts = l.lex
41
42 var p = new Parser_nitcc
43 p.tokens.add_all ts
44
45 var node = p.parse
46
47 if not node isa NProd then
48 assert node isa NError
49 print "{node.position.as(not null)} Syntax Error: {node.message}"
50 exit 1
51 abort
52 end
53
54 var name = node.children.first.as(Ngrammar).children[1].as(Nid).text
55
56 print "Grammar {name} (see {name}.gram.dot))"
57 node.to_dot("{name}.gram.dot")
58
59 # Semantic analysis
60
61 var v2 = new CollectNameVisitor
62 v2.start(node)
63 var gram = v2.gram
64
65 if gram.prods.is_empty then
66 print "Error: grammar with no production"
67 exit(1)
68 end
69
70 # Generate the LR automaton
71
72 var lr = gram.lr0
73
74 var conflitcs = new ArraySet[Production]
75 for s in lr.states do
76 for i in s.conflicting_items do conflitcs.add(i.alt.prod)
77 end
78
79 if not conflitcs.is_empty then
80 print "Error: there is conflicts"
81 end
82
83 if false then loop
84 if conflitcs.is_empty then break
85 print "Inline {conflitcs.join(" ")}"
86 gram.inline(conflitcs)
87 lr=gram.lr0
88 end
89
90 # Output concrete grammar and LR automaton
91
92 var nbalts = 0
93 for prod in gram.prods do nbalts += prod.alts.length
94 print "Concrete grammar: {gram.prods.length} productions, {nbalts} alternatives (see {name}.concrete_grammar.out)"
95
96 var pretty = gram.pretty
97 var f = new FileWriter.open("{name}.concrete_grammar.out")
98 f.write "// Concrete grammar of {name}\n"
99 f.write pretty
100 f.close
101
102 print "LR automaton: {lr.states.length} states (see {name}.lr.dot and {name}.lr.out)"
103 lr.to_dot("{name}.lr.dot")
104 pretty = lr.pretty
105 f = new FileWriter.open("{name}.lr.out")
106 f.write "// LR automaton of {name}\n"
107 f.write pretty
108 f.close
109
110 # NFA and DFA
111
112 var nfa = v2.nfa
113 print "NFA automaton: {nfa.states.length} states (see {name}.nfa.dot)"
114 nfa.to_dot.write_to_file("{name}.nfa.dot")
115 var nfanoe = nfa.to_nfa_noe
116 nfanoe.to_dot.write_to_file("{name}.nfanoe.dot")
117 print "NFA automaton without epsilon: {nfanoe.states.length} states (see {name}.nfanoe.dot)"
118
119 var dfa = nfa.to_dfa
120 dfa.to_dot.write_to_file("{name}.dfanomin.dot")
121 print "DFA automaton (non minimal): {dfa.states.length} states (see {name}.dfanomin.dot)"
122
123 dfa = dfa.to_minimal_dfa
124
125 dfa.solve_token_inclusion
126
127 print "DFA automaton: {dfa.states.length} states (see {name}.dfa.dot)"
128 dfa.to_dot.write_to_file("{name}.dfa.dot")
129
130 if dfa.tags.has_key(dfa.start) then
131 print "Error: Empty tokens {dfa.tags[dfa.start].join(" ")}"
132 exit(1)
133 end
134 for s, tks in dfa.tags do
135 if tks.length <= 1 then continue
136 print "Error: Conflicting tokens: {tks.join(" ")}"
137 exit(1)
138 end
139 for t in gram.tokens do
140 if t.name == "Eof" then continue
141 if dfa.retrotags.has_key(t) and not dfa.retrotags[t].is_empty then continue
142 print "Error: Token {t} matches nothing"
143 exit(1)
144 end
145
146 # Generate Nit code
147
148 print "Generate {name}_lexer.nit {name}_parser.nit {name}_test_parser.nit"
149 dfa.gen_to_nit("{name}_lexer.nit", name, "{name}_parser")
150 lr.gen_to_nit("{name}_parser.nit", name)
151
152 f = new FileWriter.open("{name}_test_parser.nit")
153 f.write """# Generated by nitcc for the language {{{name}}}
154
155 # Standalone parser tester for the language {{{name}}}
156 module {{{name}}}_test_parser is generated
157 import nitcc_runtime
158 import {{{name}}}_lexer
159 import {{{name}}}_parser
160
161 # Class to test the parser for the language {{{name}}}
162 class TestParser_{{{name}}}
163 super TestParser
164 redef fun name do return \"{{{name}}}\"
165 redef fun new_lexer(text) do return new Lexer_{{{name}}}(text)
166 redef fun new_parser do return new Parser_{{{name}}}
167 end
168 var t = new TestParser_{{{name}}}
169 t.main
170 """
171 f.close