A n-dimensions vector

n-dimensions vectors are used to represent a text document or an object.

Introduced properties

fun cosine_similarity(other: SELF): Float

vsm :: Vector :: cosine_similarity

Cosine similarity of self and other.
fun inc(obj: nullable Object)

vsm :: Vector :: inc

Increment value for obj term
fun norm: Float

vsm :: Vector :: norm

The norm of the vector.

Redefined properties

redef type SELF: Vector

vsm $ Vector :: SELF

Type of this instance, automatically specialized in every class
redef fun [](k: nullable Object): Float

vsm $ Vector :: []

Get the item at key
redef fun to_s: String

vsm $ Vector :: to_s

User readable representation of self.

All properties

fun !=(other: nullable Object): Bool

core :: Object :: !=

Have self and other different values?
fun ==(other: nullable Object): Bool

core :: Object :: ==

Have self and other the same value?
type CLASS: Class[SELF]

core :: Object :: CLASS

The type of the class of self.
type SELF: Object

core :: Object :: SELF

Type of this instance, automatically specialized in every class
abstract fun [](key: nullable Object): V

core :: MapRead :: []

Get the item at key
abstract fun []=(key: K, value: V)

core :: Map :: []=

Set the value at key.
protected fun accept_json_serializer(v: JsonSerializer)

serialization :: Serializable :: accept_json_serializer

Refinable service to customize the serialization of this class to JSON
protected fun accept_msgpack_attribute_counter(v: AttributeCounter)

serialization :: Serializable :: accept_msgpack_attribute_counter

Hook to customize the behavior of the AttributeCounter
protected fun accept_msgpack_serializer(v: MsgPackSerializer)

serialization :: Serializable :: accept_msgpack_serializer

Hook to customize the serialization of this class to MessagePack
fun add_all(map: MapRead[K, V])

core :: Map :: add_all

Add each (key,value) of map into self.
protected fun add_to_bundle(bundle: NativeBundle, key: JavaString)

serialization :: Serializable :: add_to_bundle

Called by []= to dynamically choose the appropriate method according
protected fun class_factory(name: String): CLASS

core :: Object :: class_factory

Implementation used by get_class to create the specific class.
fun class_name: String

core :: Object :: class_name

The class name of the object.
abstract fun clear

core :: Map :: clear

Remove all items
fun core_serialize_to(serializer: Serializer)

serialization :: Serializable :: core_serialize_to

Actual serialization of self to serializer
fun cosine_similarity(other: SELF): Float

vsm :: Vector :: cosine_similarity

Cosine similarity of self and other.
fun filter_keys(keys: Collection[nullable Object]): Array[K]

core :: MapRead :: filter_keys

Return all elements of keys that have a value.
init from(coll: Map[K, V])

core :: HashMap :: from

Build a list filled with the items of coll.
init from_deserializer(deserializer: Deserializer)

serialization :: Serializable :: from_deserializer

Create an instance of this class from the deserializer
fun get_class: CLASS

core :: Object :: get_class

The meta-object representing the dynamic type of self.
fun get_or_default(key: nullable Object, default: V): V

core :: MapRead :: get_or_default

Get the item at key or return default if not in map
fun get_or_null(key: nullable Object): nullable V

core :: MapRead :: get_or_null

Get the item at key or null if key is not in the map.
fun has_key(key: nullable Object): Bool

core :: MapRead :: has_key

Is there an item associated with key?
fun hash: Int

core :: Object :: hash

The hash code of the object.
fun inc(obj: nullable Object)

vsm :: Vector :: inc

Increment value for obj term
init init

core :: Object :: init

fun inspect: String

core :: Object :: inspect

Developer readable representation of self.
protected fun inspect_head: String

core :: Object :: inspect_head

Return "CLASSNAME:#OBJECTID".
abstract fun is_empty: Bool

core :: MapRead :: is_empty

Is there no item in the collection?
intern fun is_same_instance(other: nullable Object): Bool

core :: Object :: is_same_instance

Return true if self and other are the same instance (i.e. same identity).
fun is_same_serialized(other: nullable Object): Bool

core :: Object :: is_same_serialized

Is self the same as other in a serialization context?
intern fun is_same_type(other: Object): Bool

core :: Object :: is_same_type

Return true if self and other have the same dynamic type.
abstract fun iterator: MapIterator[K, V]

core :: MapRead :: iterator

Get a new iterator on the map.
abstract fun join(sep: String, couple_sep: String): String

core :: Map :: join

Concatenate couples of key value.
abstract fun keys: Collection[K]

core :: MapRead :: keys

Return the point of view of self on the keys only.
protected fun keys=(keys: RemovableCollection[K])

core :: HashMap :: keys=

fun keys_sorted_by_values(comparator: Comparator): Array[K]

core :: MapRead :: keys_sorted_by_values

Return an array of all keys sorted with their values using comparator.
abstract fun length: Int

core :: MapRead :: length

Number of items in the collection.
fun lookup_all_values(pe: POSetElement[K]): Set[V]

core :: MapRead :: lookup_all_values

Search all the values in pe.greaters.
fun lookup_values(pe: POSetElement[K]): Set[V]

core :: MapRead :: lookup_values

Combine the values in pe.greaters from the most smaller elements that have a value.
protected fun msgpack_extra_array_items: Int

serialization :: Serializable :: msgpack_extra_array_items

Hook to request a larger than usual metadata array
init new: Map[K, V]

core :: Map :: new

Get a HashMap[K, V] as default implementation
fun norm: Float

vsm :: Vector :: norm

The norm of the vector.
fun not_empty: Bool

core :: MapRead :: not_empty

Alias for not is_empty.
intern fun object_id: Int

core :: Object :: object_id

An internal hash code for the object based on its identity.
fun output

core :: Object :: output

Display self on stdout (debug only).
intern fun output_class_name

core :: Object :: output_class_name

Display class name on stdout (debug only).
protected fun provide_default_value(key: nullable Object): V

core :: MapRead :: provide_default_value

Called by the underling implementation of [] to provide a default value when a key has no value
fun recover_with(map: MapRead[K, V])

core :: Map :: recover_with

Alias for add_all
fun serialization_hash: Int

core :: Object :: serialization_hash

Hash value use for serialization
fun serialize_msgpack(plain: nullable Bool): Bytes

serialization :: Serializable :: serialize_msgpack

Serialize self to MessagePack bytes
fun serialize_to(serializer: Serializer)

serialization :: Serializable :: serialize_to

Serialize self to serializer
fun serialize_to_json(plain: nullable Bool, pretty: nullable Bool): String

serialization :: Serializable :: serialize_to_json

Serialize self to JSON
intern fun sys: Sys

core :: Object :: sys

Return the global sys object, the only instance of the Sys class.
fun to_json: String

serialization :: Serializable :: to_json

Serialize self to plain JSON
abstract fun to_jvalue(env: JniEnv): JValue

core :: Object :: to_jvalue

fun to_map_comparator(comparator: Comparator): MapComparator[K, V]

core :: MapRead :: to_map_comparator

A comparator that compares things with their values in self.
fun to_pretty_json: String

serialization :: Serializable :: to_pretty_json

Serialize self to plain pretty JSON
fun to_s: String

core :: Object :: to_s

User readable representation of self.
abstract fun values: Collection[V]

core :: MapRead :: values

Return the point of view of self on the values only.
protected fun values=(values: RemovableCollection[V])

core :: HashMap :: values=

fun values_sorted_by_key(comparator: Comparator): Array[V]

core :: MapRead :: values_sorted_by_key

Return an array of all values sorted with their keys using comparator.
package_diagram vsm::Vector Vector core::HashMap HashMap vsm::Vector->core::HashMap core::Map Map core::HashMap->core::Map ...core::Map ... ...core::Map->core::Map

Ancestors

interface Map[K: nullable Object, V: nullable Object]

core :: Map

Maps are associative collections: key -> item.
interface MapRead[K: nullable Object, V: nullable Object]

core :: MapRead

MapRead are abstract associative collections: key -> item.
interface Object

core :: Object

The root of the class hierarchy.
interface Serializable

serialization :: Serializable

Instances of this class can be passed to Serializer::serialize

Parents

class HashMap[K: nullable Object, V: nullable Object]

core :: HashMap

A Map implemented with a hash table.

Class definitions

vsm $ Vector
# A n-dimensions vector
#
# *n-dimensions* vectors are used to represent a text document or an object.
class Vector
	super HashMap[nullable Object, Float]

	# Cosine similarity of `self` and `other`.
	#
	# Gives the proximity in the range `[0.0 .. 1.0]` where 0.0 means that the
	# two vectors are orthogonal and 1.0 means that they are identical.
	#
	# ~~~
	# var v1 = new Vector
	# v1["x"] = 1.0
	# v1["y"] = 2.0
	# v1["z"] = 3.0
	#
	# var v2 = new Vector
	# v2["x"] = 1.0
	# v2["y"] = 2.0
	# v2["z"] = 3.0
	#
	# var v3 = new Vector
	# v3["a"] = 1.0
	# v3["b"] = 2.0
	# v3["c"] = 3.0
	#
	# print v1.cosine_similarity(v2)
	# assert v1.cosine_similarity(v2) == 1.0
	# print v1.cosine_similarity(v3)
	# assert v1.cosine_similarity(v3) == 0.0
	# ~~~
	fun cosine_similarity(other: SELF): Float do
		# Collect terms
		var terms = new HashSet[nullable Object]
		for k in self.keys do terms.add k
		for k in other.keys do terms.add k

		# Get dot product of two vectors
		var dot = 0.0
		for term in terms do
			dot += self.get_or_default(term, 0.0) * other.get_or_default(term, 0.0)
		end
		var cos = dot.to_f / (self.norm * other.norm)
		if cos.is_nan then return 0.0
		return cos
	end

	redef fun [](k) do
		if not has_key(k) then return 0.0
		return super
	end

	# Increment value for `obj` term
	#
	# If the term isn't already in the vector, the new value is 1.0.
	fun inc(obj: nullable Object) do
		if has_key(obj) then
			self[obj] += 1.0
		else
			self[obj] = 1.0
		end
	end

	# The norm of the vector.
	#
	# `||x|| = (x1 ** 2 ... + xn ** 2).sqrt`
	#
	# ~~~
	# var v = new Vector
	# v["x"] = 1.0
	# v["y"] = 1.0
	# v["z"] = 1.0
	# v["t"] = 1.0
	# assert v.norm.is_approx(2.0, 0.001)
	#
	# v["x"] = 1.0
	# v["y"] = 2.0
	# v["z"] = 3.0
	# v["t"] = 0.0
	# assert v.norm.is_approx(3.742, 0.001)
	# ~~~
	fun norm: Float do
		var sum = 0.0
		for v in self.values do sum += v.pow(2.0)
		return sum.to_f.sqrt
	end

	redef fun to_s do
		return "[{join(", ", ":")}]"
	end
end
lib/vsm/vsm.nit:27,1--118,3