Merge: Reworked crypto.nit to introduce basic XOR attacks
[nit.git] / contrib / benitlux / src / server / benitlux_daily.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2014 Alexis Laferrière <alexis.laf@xymus.net>
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16
17 # Daily program to fetch and parse the Web site, update the database and email subscribers
18 module benitlux_daily
19
20 import curl
21 import sendmail
22 import opts
23
24 import benitlux_model
25 import benitlux_db
26
27 redef class Text
28 # Return a `String` without any HTML tags (such as `<br />`) from `recv`
29 fun strip_tags: String
30 do
31 var str = to_s
32 var new_str = ""
33
34 var from = 0
35 loop
36 var at = str.index_of_from('<', from)
37 if at == -1 then break
38
39 new_str += str.substring(from, at-from)
40
41 at = str.index_of_from('>', at)
42 assert at != -1
43
44 from = at+1
45 end
46
47 return new_str
48 end
49
50 # Return an `Array` of the non-empty lines in `self`
51 #
52 # assert ["a", "asdf", "", " ", "&nbsp;", "b"].join("\n").to_clean_lines == ["a", "asdf", "b"]
53 fun to_clean_lines: Array[String]
54 do
55 var orig_lines = split_with("\n")
56 var new_lines = new Array[String]
57
58 for line in orig_lines do
59 line = line.trim
60
61 # remove empty lines
62 if line == "&nbsp;" then continue
63 if line.is_empty then continue
64
65 new_lines.add line.to_s
66 end
67
68 return new_lines
69 end
70 end
71
72 # Main logic of the program to be executed daily
73 class Benitlux
74 # The street on which is the Benelux
75 var street: String
76
77 # The url of this precise Benelux
78 var url = "www.brasseriebenelux.com/{street}" is lazy
79
80 # Path to the database
81 var db_path = "benitlux_{street}.db" is lazy
82
83 # Where to save the sample email
84 var sample_email_path = "benitlux_{street}.email" is lazy
85
86 # Execute the main program logic
87 fun run(send_emails: Bool)
88 do
89 # Get the web page
90 var body = download_html_page
91
92 if opts.verbose.value > 1 then
93 print " # Body"
94 print body
95 end
96
97 # Parse the Web page and get the available beers
98 var beers = parse_beers_from_html(body)
99
100 if opts.verbose.value > 0 then
101 print " # Beers"
102 print beers
103 end
104
105 var db = new BenitluxDB.open(db_path)
106
107 # Update the database with the beers of the day
108 db.insert_beers_of_the_day beers
109
110 # Query the beer-related events of today
111 var beer_events = db.beer_events_today
112
113 if beer_events == null then
114 print_error "Failed to read beer events from the DB"
115 db.close
116 return
117 end
118
119 # Generate the email title and content, store them in attributes
120 generate_email beer_events
121
122 # Save as sample email to file
123 var f = new FileWriter.open(sample_email_path)
124 f.write email_title + "\n"
125 for line in email_content do f.write line + "\n"
126 f.close
127
128 # Set the email if desired
129 if send_emails then
130 var subs = db.subscribers
131 if opts.verbose.value > 0 then
132 print " # Subscribers"
133 print subs
134 end
135 send_emails_to subs
136 end
137
138 db.close
139 end
140
141 # Fetch the Web page at `url`
142 fun download_html_page: String
143 do
144 var request = new CurlHTTPRequest(url)
145 var response = request.execute
146
147 if response isa CurlResponseSuccess then
148 var body = response.body_str
149 return body
150 else if response isa CurlResponseFailed then
151 print "Failed downloading URL '{url}' with: {response.error_msg} ({response.error_code})"
152 exit 1
153 end
154 abort
155 end
156
157 # Extract the beers of the day information from the HTML if `body`
158 fun parse_beers_from_html(body: String): HashSet[Beer]
159 do
160 # Parts of the HTML page expected to encapsulate the interesting section
161 var header = "<h1>Bières<br /></h1>"
162 var ender = "</div></div></div>"
163
164 var match = body.search(header)
165 assert match != null else print body
166 var start = match.after
167
168 match = body.search_from(ender, start)
169 assert match != null
170 var finish = match.from
171
172 var of_interest = body.substring(start, finish-start)
173 var lines = of_interest.strip_tags.to_clean_lines
174
175 if opts.verbose.value > 0 then
176 print " # Lines"
177 print lines
178 end
179
180 var beers = new HashSet[Beer]
181 for line in lines do
182 var parts = line.split("- ")
183 if parts.length >= 2 then
184 # Let the DB set the id, use 0 temporary
185 beers.add new Beer(0, parts[0].trim, parts[1].trim)
186 end
187 end
188 return beers
189 end
190
191 # Content lines of the email
192 var email_content: Array[String] is noautoinit
193
194 # Title of the email
195 var email_title: String is noautoinit
196
197 # Generate email and fill the attributes `email_content` and `email_title`
198 fun generate_email(beer_events: BeerEvents)
199 do
200 email_title = "Benitlux {street.capitalized}{beer_events.to_email_title}"
201 email_content = beer_events.to_email_content
202 end
203
204 # Send the email to all the addresses in `subs`
205 fun send_emails_to(subs: Array[String])
206 do
207 for email in subs do
208 var unsub_link = "http://benitlux.xymus.net/?unsub=&email={email}"
209 var content = """
210 {{{email_content.join("<br />\n")}}}
211 <br /><br />
212 To unsubscribe, go to <a href="{{{unsub_link}}}">{{{unsub_link}}}</a>
213 """
214
215 var mail = new Mail("Benitlux <benitlux@xymus.net>", email_title, content)
216 mail.to.add email
217 mail.header["Content-Type"] = "text/html; charset=\"UTF-8\""
218 mail.header["List-Unsubscribe"] = unsub_link
219 mail.header["Precedence"] = "bulk"
220 mail.encrypt_with_base64
221
222 mail.send
223 end
224 end
225 end
226
227 redef class OptionContext
228 # Shall we mail the mailing list?
229 var send_emails = new OptionBool("Send emails to subscribers", "-e", "--email")
230
231 # Display more debug messages
232 var verbose = new OptionCount("Display extra debug messages", "-v")
233
234 # Print the usage message
235 var help = new OptionBool("Print this help message", "-h", "--help")
236
237 redef init do add_option(send_emails, verbose, help)
238 end
239
240 redef class Sys
241 # Command line options
242 var opts = new OptionContext
243 end
244
245 # Avoid executing when running tests
246 if "NIT_TESTING".environ == "true" then exit 0
247
248 opts.parse args
249 if not opts.errors.is_empty or opts.help.value == true then
250 print opts.errors.join("\n")
251 print "Usage: benitlux_daily [Options]"
252 opts.usage
253 exit 1
254 end
255
256 var ben = new Benitlux("sherbrooke")
257 ben.run(opts.send_emails.value)
258
259 # The parsing logic for the wellington location is active (to gather data)
260 # but the web interface do not allow to subscribe to its mailing list.
261 #
262 # TODO revamp mailing list Web interface
263 ben = new Benitlux("wellington")
264 ben.run(opts.send_emails.value)