This PR adds a few features to rss_downloader to support more RSS feeds. It can be configured to decompress the feed and to customize the tag to find the download link. The output has be improved, both on normal behavior and on errors.
As a bonus, add the program I used to debug the RSS feeds as an example for the `dom` module.
Pull-Request: #1590
Reviewed-by: Jean Privat <jean@pryen.org>
Reviewed-by: Alexandre Terrasa <alexandre@moz-code.org>
Reviewed-by: Lucas Bajolet <r4pass@hotmail.com>
# XML tag used for pattern recognition
fun tag_title: String do return "title"
+ # XML tag of the link to act upon
+ fun tag_link: String do return "link"
+
+ # Are the feeds at `rss_source_urls` compressed?
+ var compressed: nullable Bool
+
# Action to apply on each selected RSS element
fun act_on(element: Element)
do
var elements = new HashSet[Element]
for rss_url in config.rss_source_urls do
var rss = rss_url.fetch_rss_content
+ if config.compressed == true then rss = rss.gunzip
elements.add_all rss.to_rss_elements
end
if sys.verbose then
print "\n# {matches.length} matching elements:"
- print matches.join("\n")
+ print "* " + matches.join("\n* ")
print "\n# Downloading..."
end
# Do not download a file that is not unique according to `unique_id`
if not element.is_unique_exception(config) then
# We make some exceptions
- if sys.verbose then print "File in log, skipping {element}"
+ if sys.verbose then print "- Skipping {element}"
continue
end
end
# Download element
- if sys.verbose then print "Acting on {element}"
+ if sys.verbose then print "+ Acting on {element}"
tool_config.act_on element
fun to_rss_elements: Array[Element]
do
var xml = to_xml
+ if xml isa XMLError then
+ print_error "RSS Parse Error: {xml.message}:{xml.location or else "null"}"
+ return new Array[Element]
+ end
var items = xml["rss"].first["channel"].first["item"]
var elements = new Array[Element]
for item in items do
var title = item[tool_config.tag_title].first.as(XMLStartTag).data
- var link = item["link"].first.as(XMLStartTag).data
+ var link = item[tool_config.tag_link].first.as(XMLStartTag).data
elements.add new Element(title, link)
end
if sys.verbose then
print "# Found elements:"
- print elements.join("\n")
+ print "* " + elements.join("\n* ")
end
return elements
end
+
+ # Expand the Lempel-Ziv encoded `self`
+ fun gunzip: String
+ do
+ var proc = new ProcessDuplex("gunzip", new Array[String]...)
+ var res = proc.write_and_read(self)
+ assert proc.status == 0 else print_error "gunzip failed: {proc.last_error or else "Unknown"}"
+ return res
+ end
end
# Implement this method in your module to configure this tool
--- /dev/null
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# This file is free software, which comes along with NIT. This software is
+# distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. You can modify it is you want, provided this header
+# is kept unaltered, and a notification of the changes is added.
+# You are allowed to redistribute it and sell it, alone or is a part of
+# another product.
+
+# Simple XML validity checker using the `dom` module
+module checker
+
+import dom
+
+# Check arguments
+if args.length != 1 then
+ print_error "Usage: checker xml_file"
+ exit 2
+end
+
+var path = args.first
+if not path.file_exists then
+ print_error "Path '{path}' does not exist"
+ exit 3
+end
+
+# Read file
+var content = path.to_path.read_all
+
+# Parse XML
+var xml = content.to_xml
+
+# Check for errors
+if xml isa XMLError then
+ print_error "XML file at '{path}' is invalid:"
+ print_error xml.message
+ var loc = xml.location
+ if loc != null then print_error loc
+ exit 1
+else
+ print "XML file at '{path}' is valid"
+end
redef fun pipeflags do return 3
- redef fun execute
+ redef fun execute do super
+
+ # Write `input` to process and return its output
+ #
+ # Writing and reading are processed line by line,
+ # reading only when something is available.
+ #
+ # ~~~
+ # var proc = new ProcessDuplex("tr", "[:lower:]", "[:upper:]")
+ # assert proc.write_and_read("""
+ # Alice
+ # Bob
+ # """) == """
+ # ALICE
+ # BOB
+ # """
+ # ~~~
+ fun write_and_read(input: Text): String
do
- super
+ var read = new Buffer #new Array[String]
+
+ # Main loop, read and write line by line
+ var prev = 0
+ for delimiter in input.search_all('\n') do
+ write input.substring(prev, delimiter.after-prev)
+ prev = delimiter.after
+
+ while stream_in.poll_in do
+ read.append stream_in.read_line
+ end
+ end
+
+ # Write the last line
+ write input.substring_from(prev)
+ stream_out.close
+
+ # Read the rest, may be everything for some programs
+ read.append stream_in.read_all
+ stream_in.close
+
+ # Clean up
+ wait
+ return read.to_s
end
end
end_reached = true
end
end
+
+ redef fun poll_in
+ do
+ var res = native_poll_in(fd)
+ if res == -1 then
+ last_error = new IOError(errno.to_s)
+ return false
+ else return res > 0
+ end
+
+ private fun native_poll_in(fd: Int): Int `{
+ struct pollfd fds = {fd, POLLIN, 0};
+ return poll(&fds, 1, 0);
+ `}
end
# `Stream` that can write to a File
path = "/dev/stdin"
prepare_buffer(1)
end
-
- redef fun poll_in `{
- struct pollfd fd = {0, POLLIN, 0};
- int res = poll(&fd, 1, 0);
- if (res == -1) {
- perror("Error poll stdin");
- exit(EXIT_FAILURE);
- }
- return res > 0;
- `}
end
# Standard output stream.
--- /dev/null
+Usage: checker xml_file