n-dimensions vectors are used to represent a text document or an object.
vsm :: Vector :: cosine_similarity
Cosine similarity ofself
and other
.
vsm :: Vector :: defaultinit
serialization :: Serializable :: accept_json_serializer
Refinable service to customize the serialization of this class to JSONserialization :: Serializable :: accept_msgpack_attribute_counter
Hook to customize the behavior of theAttributeCounter
serialization :: Serializable :: accept_msgpack_serializer
Hook to customize the serialization of this class to MessagePackserialization :: Serializable :: add_to_bundle
Called by[]=
to dynamically choose the appropriate method according
core :: Object :: class_factory
Implementation used byget_class
to create the specific class.
serialization :: Serializable :: core_serialize_to
Actual serialization ofself
to serializer
vsm :: Vector :: cosine_similarity
Cosine similarity ofself
and other
.
core :: HashMap :: defaultinit
vsm :: Vector :: defaultinit
core :: Map :: defaultinit
core :: MapRead :: defaultinit
core :: Object :: defaultinit
core :: MapRead :: filter_keys
Return all elements ofkeys
that have a value.
serialization :: Serializable :: from_deserializer
Create an instance of this class from thedeserializer
core :: MapRead :: get_or_default
Get the item atkey
or return default
if not in map
core :: MapRead :: get_or_null
Get the item atkey
or null if key
is not in the map.
core :: Object :: is_same_instance
Return true ifself
and other
are the same instance (i.e. same identity).
core :: Object :: is_same_serialized
Isself
the same as other
in a serialization context?
core :: Object :: is_same_type
Return true ifself
and other
have the same dynamic type.
core :: MapRead :: keys_sorted_by_values
Return an array of all keys sorted with their values usingcomparator
.
core :: MapRead :: lookup_all_values
Search all the values inpe.greaters
.
core :: MapRead :: lookup_values
Combine the values inpe.greaters
from the most smaller elements that have a value.
serialization :: Serializable :: msgpack_extra_array_items
Hook to request a larger than usual metadata arraycore :: Object :: output_class_name
Display class name on stdout (debug only).core :: MapRead :: provide_default_value
Called by the underling implementation of[]
to provide a default value when a key
has no value
serialization :: Serializable :: serialize_msgpack
Serializeself
to MessagePack bytes
serialization :: Serializable :: serialize_to
Serializeself
to serializer
serialization :: Serializable :: serialize_to_json
Serializeself
to JSON
core :: MapRead :: to_map_comparator
A comparator that compares things with their values in self.serialization :: Serializable :: to_pretty_json
Serializeself
to plain pretty JSON
core :: MapRead :: values_sorted_by_key
Return an array of all values sorted with their keys usingcomparator
.
# A n-dimensions vector
#
# *n-dimensions* vectors are used to represent a text document or an object.
class Vector
super HashMap[nullable Object, Float]
# Cosine similarity of `self` and `other`.
#
# Gives the proximity in the range `[0.0 .. 1.0]` where 0.0 means that the
# two vectors are orthogonal and 1.0 means that they are identical.
#
# ~~~
# var v1 = new Vector
# v1["x"] = 1.0
# v1["y"] = 2.0
# v1["z"] = 3.0
#
# var v2 = new Vector
# v2["x"] = 1.0
# v2["y"] = 2.0
# v2["z"] = 3.0
#
# var v3 = new Vector
# v3["a"] = 1.0
# v3["b"] = 2.0
# v3["c"] = 3.0
#
# print v1.cosine_similarity(v2)
# assert v1.cosine_similarity(v2) == 1.0
# print v1.cosine_similarity(v3)
# assert v1.cosine_similarity(v3) == 0.0
# ~~~
fun cosine_similarity(other: SELF): Float do
# Collect terms
var terms = new HashSet[nullable Object]
for k in self.keys do terms.add k
for k in other.keys do terms.add k
# Get dot product of two vectors
var dot = 0.0
for term in terms do
dot += self.get_or_default(term, 0.0) * other.get_or_default(term, 0.0)
end
var cos = dot.to_f / (self.norm * other.norm)
if cos.is_nan then return 0.0
return cos
end
redef fun [](k) do
if not has_key(k) then return 0.0
return super
end
# Increment value for `obj` term
#
# If the term isn't already in the vector, the new value is 1.0.
fun inc(obj: nullable Object) do
if has_key(obj) then
self[obj] += 1.0
else
self[obj] = 1.0
end
end
# The norm of the vector.
#
# `||x|| = (x1 ** 2 ... + xn ** 2).sqrt`
#
# ~~~
# var v = new Vector
# v["x"] = 1.0
# v["y"] = 1.0
# v["z"] = 1.0
# v["t"] = 1.0
# assert v.norm.is_approx(2.0, 0.001)
#
# v["x"] = 1.0
# v["y"] = 2.0
# v["z"] = 3.0
# v["t"] = 0.0
# assert v.norm.is_approx(3.742, 0.001)
# ~~~
fun norm: Float do
var sum = 0.0
for v in self.values do sum += v.pow(2.0)
return sum.to_f.sqrt
end
redef fun to_s do
return "[{join(", ", ":")}]"
end
end
lib/vsm/vsm.nit:27,1--118,3