vsm :: FileIndex :: accept_file
Ispath
accepted depending on whitelist_exts
and blacklist_exts
?
vsm :: FileIndex :: blacklist_exts=
File extensions black listvsm :: FileIndex :: defaultinit
vsm :: FileIndex :: index_file
Index a file from itspath
.
vsm :: FileIndex :: index_files
Index multiple filesvsm :: FileIndex :: parse_file
Parse thefile
content as a Vector
vsm :: FileIndex :: whitelist_exts=
File extensions white listvsm :: FileIndex :: accept_file
Ispath
accepted depending on whitelist_exts
and blacklist_exts
?
vsm :: FileIndex :: blacklist_exts=
File extensions black listcore :: Object :: class_factory
Implementation used byget_class
to create the specific class.
vsm :: FileIndex :: defaultinit
core :: Object :: defaultinit
vsm :: VSMIndex :: defaultinit
vsm :: StringIndex :: defaultinit
vsm :: VSMIndex :: index_document
Index a documentvsm :: FileIndex :: index_file
Index a file from itspath
.
vsm :: FileIndex :: index_files
Index multiple filesvsm :: StringIndex :: index_string
Index a new Document fromtitle
, uri
and string string
.
vsm :: VSMIndex :: inverse_doc_frequency
Inverse document frequencyvsm :: VSMIndex :: inverse_doc_frequency=
Inverse document frequencyvsm :: VSMIndex :: inversed_index
Inversed indexvsm :: VSMIndex :: inversed_index=
Inversed indexcore :: Object :: is_same_instance
Return true ifself
and other
are the same instance (i.e. same identity).
core :: Object :: is_same_serialized
Isself
the same as other
in a serialization context?
core :: Object :: is_same_type
Return true ifself
and other
have the same dynamic type.
vsm :: StringIndex :: match_string
Match thequery
string against all indexed documents
vsm :: VSMIndex :: match_vector
Matchquery
vector to all index document vectors
core :: Object :: output_class_name
Display class name on stdout (debug only).vsm :: FileIndex :: parse_file
Parse thefile
content as a Vector
vsm :: StringIndex :: parse_string
Parse thestring
as a Vector
vsm :: VSMIndex :: terms_doc_count
Count for all terms in all indexed documentsvsm :: VSMIndex :: terms_doc_count=
Count for all terms in all indexed documentsvsm :: FileIndex :: whitelist_exts=
File extensions white list
# A VSMIndex to index files
class FileIndex
super StringIndex
# Index a file from its `path`.
#
# Return the created document or null if `path` is not accepted by `accept_file`.
#
# See `index_document`.
fun index_file(path: String, auto_update: nullable Bool): nullable DOC do
if not accept_file(path) then return null
var vector = parse_file(path)
var doc = new Document(path, path, vector)
index_document(doc, auto_update)
return doc
end
# Index multiple files
#
# The recursive method `index_dir` will be called for each directory found
# in `paths`.
#
# See `index_file`
fun index_files(paths: Collection[String], auto_update: nullable Bool) do
for path in paths do
if path.to_path.is_dir then
index_dir(path, false)
else
index_file(path, false)
end
end
if auto_update != null and auto_update then update_index
end
# Index all files in `dir` recursively
#
# See `index_file`.
fun index_dir(dir: String, auto_update: nullable Bool) do
if not dir.to_path.is_dir then return
for file in dir.files do
var path = dir / file
if path.to_path.is_dir then
index_dir(path, false)
else
index_file(path, false)
end
end
if auto_update != null and auto_update then update_index
end
# Is `path` accepted depending on `whitelist_exts` and `blacklist_exts`?
fun accept_file(path: String): Bool do
var ext = path.file_extension
if ext != null then
ext = ext.to_lower
if blacklist_exts.has(ext) then return false
if whitelist_exts.not_empty and not whitelist_exts.has(ext) then return false
end
return whitelist_exts.is_empty
end
# Parse the `file` content as a Vector
#
# See `parse_string`.
fun parse_file(file: String): Vector do
return parse_string(file.to_path.read_all)
end
# File extensions white list
#
# If not empty, only files with these extensions will be indexed.
#
# If an extension is in both `whitelist_exts` and `blacklist_exts`, the
# blacklist will prevail and the file will be ignored.
var whitelist_exts = new Array[String] is writable
# File extensions black list
#
# Files with these extensions will not be indexed.
var blacklist_exts = new Array[String] is writable
end
lib/vsm/vsm.nit:255,1--335,3