lib/crapto: Introduce 2 new attacks on XOR ciphers
[nit.git] / lib / crapto / xor.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Cryptographic attacks and utilities for XOR-based algorithms.
16 module xor
17
18 import combinations
19 import crypto
20 import english_utils
21
22 redef class SingleByteXorCipher
23 # Tries to find key using frequency analysis on all possible plaintexts.
24 # Populates `self.key`
25 fun find_key do
26
27 # Accumulate best result
28 var max = 0.0
29 var best = 0.to_b
30
31 # Iterate on possible values for a byte
32 var xor_b = new Bytes.with_capacity(1)
33 for b in [0 .. 255] do
34 # Need `Bytes` to pass to xor
35 xor_b[0] = b.to_b
36
37 # Xor and evaluate result
38 var xored = ciphertext.xorcipher(xor_b)
39 var result = xored.to_s.english_scoring
40 if result > max then
41 max = result
42 best = b.to_b
43 end
44 end
45
46 self.key = best
47
48 end
49 end
50
51 redef class RepeatingKeyXorCipher
52 # Attempts to find the key by using frequency analysis on the resulting plaintexts.
53 # Best key lengths are estimated using the hamming distance of blocks of keylength bytes.
54 # Ciphertext is then transposed in such a way that it can be solved by sequences of
55 # SingleByteXor (one for each offset in the key).
56 #
57 # `min_keylength` and `max_keylength` are limits as to what key lengths are tested.
58 # `considered_keylength_count` is the number of best key lengths that are kept to be
59 # analysed by the SingleByteXor frequency analysis.
60 fun find_key(min_keylength, max_keylength, considered_keylength_count: nullable Int) do
61
62 # Set default values
63 if min_keylength == null then min_keylength = 2
64 if max_keylength == null then max_keylength = 40
65 if considered_keylength_count == null then considered_keylength_count = 3
66
67 # Find the best candidate key lengths based on the normalized hamming distances
68 var best_sizes = get_normalized_hamming_distances(min_keylength, max_keylength, considered_keylength_count)
69
70 var best = 0.0
71 var best_key: nullable Bytes = null
72 for ks in best_sizes do
73
74 # Rearrange ciphertext to be in SingleByteXORable blocks
75 var transposed = transpose_ciphertext(ks)
76
77 var key = new Bytes.empty
78 for slot in transposed do
79 var sbx = new SingleByteXorCipher
80 sbx.ciphertext = slot
81 sbx.find_key
82 key.add sbx.key
83 end
84
85 # Evaluate the resulting plaintext based on english frequency analysis
86 var eng_score = ciphertext.xorcipher(key).to_s.english_scoring
87 if eng_score > best then
88 best_key = key
89 best = eng_score
90 end
91
92 assert best_key != null
93 self.key = best_key
94
95 end
96 end
97
98 # Computes the normalized hamming distances between blocks of ciphertext of length between `min_length` and `max_length`.
99 # The `considered_keylength_count` smallest results are returned
100 private fun get_normalized_hamming_distances(min_keylength, max_keylength, considered_keylength_count: Int): Array[Int] do
101
102 var normalized_distances = new HashMap[Float, Int]
103
104 # Iterate over all given key lengths
105 for ks in [min_keylength .. max_keylength[ do
106
107 # Initialize the blocks of size `ks`
108 var blocks = new Array[Bytes]
109 while (blocks.length + 1) * ks < ciphertext.length do
110 blocks.add(ciphertext.slice(blocks.length * ks, ks))
111 end
112
113 # Compute the normalized hamming distance with all block combinations
114 var pairs = new CombinationCollection[Bytes](blocks, 2)
115 var hamming_dists = new Array[Float]
116 for p in pairs do
117 hamming_dists.add(p[0].hamming_distance(p[1]).to_f / ks.to_f)
118 end
119
120 # Normalize the results based on the number of blocks
121 var normalized = 0.0
122 for dist in hamming_dists do normalized += dist
123 normalized /= hamming_dists.length.to_f
124 normalized_distances[normalized] = ks
125
126 end
127
128 # Collect the best candidates
129 var distances = normalized_distances.keys.to_a
130 default_comparator.sort(distances)
131 var best_distances = distances.subarray(0, considered_keylength_count)
132 var best_sizes = [for d in best_distances do normalized_distances[d]]
133
134 return best_sizes
135 end
136
137 # Returns a rearranged format of the ciphertext where every byte of a slot will be XORed with the same offset of a key of length `keylength`.
138 private fun transpose_ciphertext(keylength: Int): Array[Bytes] do
139 var transposed = new Array[Bytes]
140 for i in [0 .. keylength[ do
141 transposed.add(new Bytes.empty)
142 end
143
144 for byte_idx in [0 .. ciphertext.length[ do
145 transposed[byte_idx % keylength].add(ciphertext[byte_idx])
146 end
147
148 return transposed
149 end
150 end