diff --git a/ciphers/break_vigenere.py b/ciphers/break_vigenere.py new file mode 100644 index 000000000000..9cd69e9c9f01 --- /dev/null +++ b/ciphers/break_vigenere.py @@ -0,0 +1,216 @@ +LETTER_FREQUENCIES_DICT = { + "A": 8.12, + "B": 1.49, + "C": 2.71, + "D": 4.32, + "E": 12.02, + "F": 2.3, + "G": 2.03, + "H": 5.92, + "I": 7.31, + "J": 0.1, + "K": 0.69, + "L": 3.92, + "M": 2.61, + "N": 6.95, + "O": 7.68, + "P": 1.82, + "Q": 0.11, + "R": 6.02, + "S": 6.28, + "T": 9.10, + "U": 2.88, + "V": 1.11, + "W": 2.09, + "X": 0.17, + "Y": 2.11, + "Z": 0.07, +} +LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +PARAMETER = 0.0665 # index of confidence of the entire language (for +# english 0.0665) +MAX_KEYLENGTH = ( + None # None is the default, you can also try a positive integer ( + # example: 10) +) + + +def index_of_coincidence(frequencies: dict, length: int) -> float: + """ + Calculates the index of coincidence for a text. + :param frequencies: dictionary of the form {letter_of_the_alphabet: amount + of times it appears in the text as a percentage} + :param length: the length of the text + :return: the index of coincidence + """ + index = 0.0 + for value in frequencies.values(): + index += value * (value - 1) + return index / (length * (length - 1)) + + +def calculate_indexes_of_coincidence(ciphertext: str, step: int) -> list: + """ + For each number j in the range [0, step) the function checks the letters of + the ciphertext whose position has the form j+n*step, where n is an integer + and for these letters it calculates the index of coincidence. It returns a + list with step elements, which represent the indexes of coincidence. + :param ciphertext: s string (text) + :param step: the step when traversing through the cipher + :return: a list with the indexes of coincidence + """ + indexes_of_coincidence = [] + length = len(ciphertext) + + # for every starting point in [0, step) + for j in range(step): + frequencies: dict[str, int] = {} + c = 0 + for i in range(0 + j, length, step): + c += 1 + try: # in case the frequencies dictionary does not already have + # this key + letter = ciphertext[i] + temp = frequencies[letter] + frequencies[ciphertext[i]] = temp + 1 + except KeyError: + frequencies[ciphertext[i]] = 1 + if c> 1: # to avoid division by zero in the index_of_coincidence + # function + indexes_of_coincidence.append(index_of_coincidence(frequencies, c)) + + return indexes_of_coincidence + + +def friedman_method(ciphertext: str, max_keylength: int | None = None) -> int: + """ + Implements Friedman's method for finding the length of the key of a + Vigenere cipher. It finds the length with an index of confidence closer + to that of an average text in the english language. Check the wikipedia + page: https://en.wikipedia.org/wiki/Vigen%C3%A8re_cipher The algorithm + is in the book "Introduction to Cryptography", K. Draziotis + https://repository.kallipos.gr/handle/11419/8183 + :param ciphertext: a string (text) + :param max_keylength: the maximum length of key that Friedman's method + should check, if None then it defaults to the length of the cipher + :return: the length of the key + """ + # sets the default value of max_keylength + if max_keylength is None: + max_keylength = len(ciphertext) + + frequencies = [ + 1.5 + ] # the zeroth position should not be used: length of key is greater + # than zero + + # for every length of key + for i in range(1, max_keylength + 1): + # for a specific length it finds the minimum index of coincidence + min1 = 15.0 + for val in calculate_indexes_of_coincidence(ciphertext, i): + if abs(val - PARAMETER) < abs(min1 - PARAMETER): + min1 = val + frequencies.append(min1) + + # finds which length of key has the minimum difference with the language + # PARAMETER + li = (15.0, -1) # initialization + for i in range(len(frequencies)): + if abs(frequencies[i] - PARAMETER) < abs(li[0] - PARAMETER): + li = (frequencies[i], i) + + return li[1] + + +def get_frequencies() -> tuple: + """Return the values of the global variable @LETTER_FREQUENCIES_DICT as a + tuple ex. (0.25, 1.42, ...). + """ + t = tuple(LETTER_FREQUENCIES_DICT[chr(i)] for i in range(ord("A"), ord("A") + 26)) + return tuple(num / 100 for num in t) + + +def find_key(ciphertext: str, key_length: int) -> str: + """ + Finds the key of a text which has been encrypted with the Vigenere + algorithm, using statistical analysis. The function needs an estimation + of the length of the key. Firstly it finds the frequencies of the + letters in the text. Then it compares these frequencies with those of an + average text in the english language. For each letter it multiplies its + frequency with the average one and adds them all together, then it + shifts the frequencies of the text cyclically by one position and + repeats the process. The shift that produces the largest sum corresponds + to a letter of the key. The whole procedure takes place for every letter + of the key (essentially as many times as the length of the key). See + here: https://www.youtube.com/watch?v=LaWp_Kq0cKs + :param ciphertext: a string (text) + :param key_length: a supposed length of the key + :return: the key as a string + """ + a = ord("A") + cipher_length = len(ciphertext) + alphabet_length = 26 # the length of the english alphabet + + key = [] + + # for every letter of the key + for k in range(key_length): + # find the frequencies of the letters in the message: the frequency + # of 'A' is in the first position of the freq list and so on + freq = [0.0] * alphabet_length + c = 0 + for i in range(k, cipher_length, key_length): + freq[ord(ciphertext[i]) - a] += 1 + c += 1 + freq = [num / c for num in freq] + + # find the max sum -> part of the key + real_freq = get_frequencies() + max1 = [-1, None] # value, position + for i in range(alphabet_length): + new_val = sum((freq[j] * real_freq[j]) for j in range(alphabet_length)) + if max1[0] < new_val: + max1 = [new_val, i] + freq.append( + freq.pop(0) + ) # shift the list cyclically one position to the left + key.append(max1[1]) + + key_as_list_of_letters = [] + for num in key: + if num is not None: + key_as_list_of_letters.append(chr(num + a)) + + return "".join(key_as_list_of_letters) # return the key as a string + + +def find_key_from_vigenere_cipher(ciphertext: str) -> str: + """ + Tries to find the key length and then the actual key of a Vigenere + ciphertext. It uses Friedman's method and statistical analysis. It works + best for large pieces of text written in the english language. + """ + clean_ciphertext_list = [] + for symbol in ciphertext.upper(): + if symbol in LETTERS: + clean_ciphertext_list.append(symbol) + + clean_ciphertext = "".join(clean_ciphertext_list) + + key_length = friedman_method(clean_ciphertext, max_keylength=MAX_KEYLENGTH) + print(f"The length of the key is {key_length}") + if key_length <= 0: + raise ValueError("The length of the key should be strictly positive") + + key = find_key(clean_ciphertext, key_length) + return key + + +if __name__ == "__main__": + print("") + # # how to execute + # with open("out.txt") as file: + # ciphertext = file.read() + # key = find_key_from_vigenere_cipher(ciphertext) + # print(key) diff --git a/ciphers/test_break_vigenere.py b/ciphers/test_break_vigenere.py new file mode 100644 index 000000000000..7998c9e5282a --- /dev/null +++ b/ciphers/test_break_vigenere.py @@ -0,0 +1,48 @@ +import math + +from ciphers.break_vigenere import ( + LETTER_FREQUENCIES_DICT, + calculate_indexes_of_coincidence, + find_key, + find_key_from_vigenere_cipher, + friedman_method, + get_frequencies, + index_of_coincidence, +) + + +class Test: + def test_index_of_coincidence(self): + ic = index_of_coincidence({"a": 50, "b": 50}, 50) + assert math.isclose(ic, 2.0) + + def test_calculate_indexes_of_coincidence(self): + ciphertext = "hellothere" + result = calculate_indexes_of_coincidence(ciphertext, 2) + assert result == [0.1, 0.3] + + def test_friedman_method(self): + ciphertext = "asqsfdybpypvhftnboexqumfsnglmcstyefv".upper() + result = friedman_method(ciphertext, 5) + assert result == 3 + + def test_get_frequencies(self): + result = get_frequencies() + expected = tuple(num / 100 for num in LETTER_FREQUENCIES_DICT.values()) + assert result == expected + + def test_find_key(self): + ciphertext = "asqsfdybpypvhftnboexqumfsnglmcstyefv".upper() + result = find_key(ciphertext, 3) + assert result == "ABC" + + def test_find_key_from_vigenere_cipher(self): + ciphertext = ( + "A dqxryeocqgj mpth ms sptusb ticq ms aoihv. Fgf " + "edrsou ylxmes jhv, sos exwyon uweqe igu msfjplxj " + "vbtliyy. Bno xme xqupi's b uwele, bpg eql ujh qjn bpg " + "atmfp piwema spfyftv. E wotg ec fnz qwljr ocpi bovng " + "wremn dw xwfgw." + ) + result = find_key_from_vigenere_cipher(ciphertext) + assert result == "ABCDEF" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 66b5d8a6b94e..000000000000 --- a/requirements.txt +++ /dev/null @@ -1,19 +0,0 @@ -beautifulsoup4 -fake-useragent -httpx -imageio -keras -lxml -matplotlib -numpy -opencv-python -pandas -pillow -rich -scikit-learn -sphinx-pyproject -statsmodels -sympy -tweepy -typing_extensions -xgboost

AltStyle によって変換されたページ (->オリジナル) /