nitcc: introduce nitcc
[nit.git] / contrib / nitcc / nitcc.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # nitcc, a parser and lexer generator for Nit
16 module nitcc
17
18 import nitcc_semantic
19
20 # Load the grammar file
21
22 if args.is_empty then
23 print "usage: nitcc <file> | -"
24 exit 1
25 end
26 var fi = args.first
27
28 var text
29 if fi != "-" then
30 var f = new IFStream.open(fi)
31 text = f.read_all
32 f.close
33 else
34 text = stdin.read_all
35 end
36
37 # Parse the grammar file
38
39 var l = new MyLexer(text)
40 var ts = l.lex
41
42 var p = new MyParser
43 p.tokens.add_all ts
44
45 var node = p.parse
46
47 if not node isa NProd then
48 print node
49 exit 1
50 abort
51 end
52
53 var name = node.children.first.as(Ngrammar).children[1].as(Nid).text
54
55 print "Grammar {name} (see {name}.gram.dot))"
56 node.to_dot("{name}.gram.dot")
57
58 # Semantic analysis
59
60 var v2 = new CollectNameVisitor
61 v2.start(node)
62 var gram = v2.gram
63
64 if gram.prods.is_empty then
65 print "Error: grammar with no production"
66 exit(1)
67 end
68
69 # Generate the LR automaton
70
71 var lr = gram.lr0
72
73 var conflitcs = new ArraySet[Production]
74 for s in lr.states do for t, a in s.guarded_reduce do if a.length > 1 or s.guarded_shift.has_key(t) then
75 for i in a do conflitcs.add(i.alt.prod)
76 end
77
78 if not conflitcs.is_empty then
79 print "Error: there is conflicts"
80 end
81
82 if false then loop
83 if conflitcs.is_empty then break
84 print "Inline {conflitcs.join(" ")}"
85 gram.inline(conflitcs)
86 lr=gram.lr0
87 end
88
89 # Output concrete grammar and LR automaton
90
91 var nbalts = 0
92 for prod in gram.prods do nbalts += prod.alts.length
93 print "Concrete grammar: {gram.prods.length} productions, {nbalts} alternatives (see {name}.concrete_grammar.txt)"
94
95 var pretty = gram.pretty
96 var f = new OFStream.open("{name}.concrete_grammar.txt")
97 f.write "// Concrete grammar of {name}\n"
98 f.write pretty
99 f.close
100
101 print "LR automaton: {lr.states.length} states (see {name}.lr.dot and {name}.lr.txt)"
102 lr.to_dot("{name}.lr.dot")
103 pretty = lr.pretty
104 f = new OFStream.open("{name}.lr.txt")
105 f.write "// LR automaton of {name}\n"
106 f.write pretty
107 f.close
108
109 # NFA and DFA
110
111 var nfa = v2.nfa
112 print "NFA automaton: {nfa.states.length} states (see {name}.nfa.dot)"
113 nfa.to_dot("{name}.nfa.dot")
114
115 var dfa = nfa.to_dfa
116 if dfa.tags.has_key(dfa.start) then
117 print "ERROR: Empty tokens {dfa.tags[dfa.start].join(" ")}"
118 end
119 dfa.solve_token_inclusion
120 for s, tks in dfa.tags do
121 if tks.length <= 1 then continue
122 print "ERROR: Conflicting tokens: {tks.join(" ")}"
123 end
124 print "DFA automaton: {dfa.states.length} states (see {name}.dfa.dot)"
125 dfa.to_dot("{name}.dfa.dot")
126
127 # Generate Nit code
128
129 print "Generate {name}_lexer.nit {name}_parser.nit {name}_test_parser.nit"
130 dfa.gen_to_nit("{name}_lexer.nit", "{name}_parser")
131 lr.gen_to_nit("{name}_parser.nit")
132
133 f = new OFStream.open("{name}_test_parser.nit")
134 f.write """# Generated by nitcc for the language {{{name}}}
135 import nitcc_runtime
136 import {{{name}}}_lexer
137 import {{{name}}}_parser
138 class MyTest
139 super TestParser
140 redef fun name do return \"{{{name}}}\"
141 redef fun new_lexer(text) do return new MyLexer(text)
142 redef fun new_parser do return new MyParser
143 end
144 var t = new MyTest
145 t.main
146 """
147 f.close