I've been working on this for a while, and now that I've got a working program I thought I'd see what other people think about it.
Basically if there is anything you think would make it faster or make it simpler, or make it better in general, please share your thoughts. As for actual strength, I have no idea how to test that. And yes, I know I should just use AES, but this is just for practice/fun more than anything.
Anyway, here's the code. (255 lines, sorry)
import base64
import os
import random
# from datetime import datetime
def add_padding(s, i=128):
padding = len(s) % i
for j in range(i - padding):
s += '='
s = ascii_to_bin(s)
# make last byte equal to number of padding bytes
s = s[:len(s) - 8] + decimal_to_binary([i - padding])
return s
def xor_string(k, s):
xored_secret = ''
for i in range(len(s) // len(k)):
if i > 0:
k = round_key(k)
xored_secret += decimal_to_binary([bin_to_decimal(k, len(k))[0] ^ bin_to_decimal(s[i * len(k):len(k) + (i * len(k))], len(k))[0]], len(k))
return xored_secret
def generate_key(k):
if len(k) == 128:
k = ascii_to_bin(k)
return k
elif len(k) < 128:
k = ascii_to_decimal(k)
for i in range(128 - len(k)):
b = decimal_to_binary([k[i]])
b = xor_string(decimal_to_binary([int(sum(k) / len(k))]), b[::-1])
k.append(int(b, 2))
s = ''
for i in k:
s += str(i)
j = str(s[:len(s) // 2])
y = str(s[len(s) // 2:])
s = decimal_to_binary([int(y + j)])
s = s[:1024]
return s
def bin_to_base64(binary):
return base64.b64encode(bytes([int(binary[i * 8:8 + i * 8], 2) for i in range(len(binary) // 8)])).decode()
def ascii_to_bin(string):
return decimal_to_binary(ascii_to_decimal(string))
def bin_to_decimal(binary, length=8):
b = [binary[i * length:length + (i * length)] for i in range(len(binary) // length)]
decimal = [int(i, 2) for i in b]
# returns an list of ints
return decimal
def decimal_to_binary(decimal, length=8):
output = ''
for i in range(len(decimal)):
output += str(bin(decimal[i])[2:].zfill(length))
# returns a string
return output
def ascii_to_decimal(string):
# returns a list of ints
return [ord(i) for i in string]
def bin_to_ascii(binary):
x = bin_to_decimal(binary)
s = ''
for i in x:
s += chr(i)
# returns a string
return s
def base64_to_bin(base):
decoded = ''
for letter in base64.b64decode(base):
# print(letter)
decoded += bin(letter)[2:].zfill(8)
return decoded
def matrix_to_str(m):
s = ''
for i in range(32):
for j in range(32):
s += str(m[i][j])
return s
def obfuscate(binary, k, x, xd):
b = ''
d = k # donkey kong
for i in range(len(binary) // 1024):
if i > 0:
d = round_key(d)
# m = [list(binary[j * 32 + i * 1024:j * 32 + i * 1024 + 32]) for j in range(32)]
if x:
m = [list(binary[j * 32 + i * 1024:j * 32 + i * 1024 + 32]) for j in range(32)]
m = shuffle(m, bin_to_decimal(d, 1024)[0], xd)
b += xor_string(d, matrix_to_str(m))
elif not x:
xor = xor_string(d, binary[i * 1024:i * 1024 + 1024])
m = [list(xor[j * 32:j * 32 + 32]) for j in range(32)]
m = reverse_shuffle(m, bin_to_decimal(d, 1024)[0], xd)
b += matrix_to_str(m)
return xor_string(k, b)
def shuffle(m, d, xd):
for j in range(xd):
# move columns to the right
m = [row[-1:] + row[:-1] for row in m]
# move rows down
m = m[-1:] + m[:-1]
shuffled_m = [[0] * 32 for _ in range(32)]
for idx, sidx in enumerate(test(d)):
shuffled_m[idx // 32][idx % 32] = m[sidx // 32][sidx % 32]
m = shuffled_m
# cut in half and flip halves
m = m[len(m) // 2:] + m[:len(m) // 2]
# test
m = list(map(list, zip(*m)))
return m
def reverse_shuffle(m, d, xd):
for j in range(xd):
# test
m = list(map(list, zip(*m)))
# cut in half and flip halves
m = m[len(m) // 2:] + m[:len(m) // 2]
shuffled_m = [[0] * 32 for _ in range(32)]
for idx, sidx in enumerate(test(d)):
shuffled_m[sidx // 32][sidx % 32] = m[idx // 32][idx % 32]
m = shuffled_m
# move rows up
m = m[1:] + m[:1]
# move columns to the left
m = [row[1:] + row[:1] for row in m]
return m
def test(d):
random.seed(d)
lst = list(range(1024))
random.shuffle(lst)
return lst
def round_key(k):
k = [[k[(j * 32 + n)] for n in range(32)] for j in range(32)]
# get the last column
col = [i[-1] for i in k]
# interweave
col = [x for i in range(len(col) // 2) for x in (col[-i - 1], col[i])]
new_key = ''
for i in range(32):
cols = ''
for row in k:
cols += row[i]
cols = cols[16:] + cols[:16]
new_key += xor_string(''.join(str(ele) for ele in col), cols)
return new_key
def encrypt(p, s, xd):
k = generate_key(p)
s = add_padding(s)
s = xor_string(k, s)
s = obfuscate(s, k, True, xd)
s = bin_to_base64(s)
return s
def decrypt(p, b, xd):
k = generate_key(p)
b = base64_to_bin(b)
b = xor_string(k, b)
b = obfuscate(b, k, False, xd)
pad = b[len(b) - 8:]
b = bin_to_ascii(b)
b = b[:len(b) - bin_to_decimal(pad)[0]]
return b
if __name__ == '__main__':
while True:
os.system('cls')
com = input('1)Encrypt Text \n2)Decrypt Text\n3)Exit\n')
if com == '1':
os.system('cls')
secret = input('Enter the text you wish to encrypt: ')
os.system('cls')
key = input('Enter your key: ')
os.system('cls')
print(f'Encrypted text: {encrypt(key, secret, 1)}') # the 1 is the number of loops, I'm not sure how many I should do :/
input()
elif com == '2':
os.system('cls')
b64 = input('Enter the text you wish to decrypt: ')
os.system('cls')
key = input('Enter your key: ')
os.system('cls')
print(f'Decrypted text: {decrypt(key, b64, 1)}')
input()
elif com == '3':
break
If you need clarification on anything, just ask. Thanks!
3 Answers 3
Add Padding
def add_padding(s, i=128):
padding = len(s) % i
for j in range(i - padding):
s += '='
...
If s
is a 127 characters, padding
becomes 127, but only 1 =
character is added.
If s
is 128 characters, padding
becomes zero, which makes sense. But then 128 =
characters get added to the string, which is really unexpected. I understand it is necessary, because you replace the last with the amount of padding, but it violates the principle of least surprise.
Using...
s += '=' * (i - padding)
would be more efficient than a loop.
After adding the padding, and converting it into bits, you remove the last 8 bits of the padding, replacing it with the length of the padding. You could have saved some work by not adding the extra padding character in the first place.
Here's some reworked code:
def add_padding(message, frame_size=128):
payload_len = len(message) + 1 # Extra character encodes padding length
padding = -payload_len % frame_size # Amount of padding needed to fill frame
frames = message + '=' * padding + chr(padding + 1)
return ascii_to_bin(frames)
ASCII to Decimal
def ascii_to_decimal(string):
# returns a list of ints
return [ord(i) for i in string]
While this looks nice and simple, much of your code expects 8-bit bytes, not integers. The first ā
in the string will get turned into a 257, and a subsequent bin(letter)[2:].zfill(length)
will expand that to '100000001'
, despite being longer than 8 characters
Python has a builtin function which converts a string into an array of bytes. str.encode()
. By default, it will use the UTF-8
encoding and will encode non-ASCII characters into multiple bytes, as required:
>>> msg = 'cbā'
>>> msg_bytes = msg.encode()
>>> print(len(msg), len(msg_bytes), type(msg_bytes))
3 4 <class 'bytes'>
But note the length of the byte-array is (in this case) greater than the length of the string, which will mess up your padding. The simplest fix is to convert the string into an array of bytes, and then determine how much padding is required, and add that to the byte array.
>>> msg_bytes = msg.encode()
>>> payload_len = len(msg_bytes) + 1
>>> padding = -payload_len % frame_size
>>> frames = msg_bytes + b'=' * padding + bytes([padding + 1])
As a bonus, the bytes
(read-only) and bytearray
(mutable) objects are way more efficient than using lists of integers, so you can gain some speed and/or memory efficiency by switching to them.
To reverse this in the decoding step:
>>> padding = frames[-1] # amount of padding added
>>> msg_bytes = frames[:-padding] # unpadded message bytes
>>> msg = msg_bytes.decode() # decode bytes back to a string
>>> msg
'cbā'
-
\$\begingroup\$ So I replaced
return [ord(i) for i in string]
withreturn bytearray(string.encode())
. Is that what you meant? The change actually seems to be slowing it down :/.0:00:00.002810
old vs0:00:00.003025
new (average of 10000 runs) \$\endgroup\$Have a nice day– Have a nice day2021年04月23日 01:16:53 +00:00Commented Apr 23, 2021 at 1:16 -
\$\begingroup\$ So when you say, 'I'm suggesting encoding to a byte array and then padding.', I did this:
frames = (message + '=' * padding + chr(padding + 1)).encode()
. Is that what you meant? Sorry for all the questions, but I really appropriate the help. \$\endgroup\$Have a nice day– Have a nice day2021年04月23日 02:25:27 +00:00Commented Apr 23, 2021 at 2:25 -
\$\begingroup\$ See updated "ASCII to Decimal" section in the review. \$\endgroup\$AJNeufeld– AJNeufeld2021年04月23日 19:20:55 +00:00Commented Apr 23, 2021 at 19:20
-
\$\begingroup\$ So I have applied the changes, but there is an issue. After decrypting, it converts from binary to ascii. In that process it converts every 8 bits to an int, and then
chr(num)
's it. But how can I tell if it sould be more than 8, for theā
? \$\endgroup\$Have a nice day– Have a nice day2021年04月23日 22:40:31 +00:00Commented Apr 23, 2021 at 22:40 -
\$\begingroup\$ Your not suppose to tell if you need more than one byte per letter; that is the job of
bytes.decode()
. It sounds like you are still trying to use a list of ints, instead ofbytes
(an array of value-restricted ints). We're beyond a code review here. I've suggested an improvement direction, and you've run into problems implementing it. You probably want to post a new question on Stack Overflow asking what you are doing wrong in the conversion. Answers there can be answers to a question, instead of comments on an answer here. (Post a link to the new question here, so we can follow.) \$\endgroup\$AJNeufeld– AJNeufeld2021年04月24日 05:09:38 +00:00Commented Apr 24, 2021 at 5:09
Welcome to Code Review.
good things
- reasonable function names
- making it importable by putting the CLI into the
if __name__ == '__main__':
block - good indentation and use of white space
suggestions
- stuffing the entire function into the
return
line works better onascii_to_bin
thanbin_to_base64
. - single letter variable names are frowned upon because they are easy for folks to get confused. Using
key
instead ofk
, and such, would help the code be more readable - before one of the serious pythonistas comes along you should read up on PEP8. There are linters to help with this.
- this is limited to running on DOS-derived systems.
cls
doesn't exist on Macs or Linux. Here is a portable solution. - include a sh-bang line (
#!
) to clarify which python this should run under.
-
\$\begingroup\$ Ty for the answer! Just one of follow-up question. When you say "stuffing the entire function into the
return
line works better onascii_to_bin
thanbin_to_base64
." , was that just because it is so long? \$\endgroup\$Have a nice day– Have a nice day2021年04月22日 01:03:47 +00:00Commented Apr 22, 2021 at 1:03 -
\$\begingroup\$ The length of the line was one factor, but it also seemed much more complicated. Laying that out over a few lines would be kinder to the casual reader. \$\endgroup\$chicks– chicks2021年04月22日 03:21:17 +00:00Commented Apr 22, 2021 at 3:21
-
\$\begingroup\$ Gotcha. Although if I had thought you would mention complicated lines I figured it would have been the one in
xor_string
;) \$\endgroup\$Have a nice day– Have a nice day2021年04月22日 04:08:05 +00:00Commented Apr 22, 2021 at 4:08 -
\$\begingroup\$ As you can see readily on this page, code reviews catch what they notice and some things might not crop up until a subsequent review cycle. (Note on this site that you can't modify your current question's code. A new review needs a new question.) \$\endgroup\$chicks– chicks2021年04月22日 04:55:09 +00:00Commented Apr 22, 2021 at 4:55
Needs docstrings and better parameter names.
-
3\$\begingroup\$ I didn't downvote this, but it does seem pretty curt and barely qualifies as a valid answer. \$\endgroup\$chicks– chicks2021年04月21日 21:21:33 +00:00Commented Apr 21, 2021 at 21:21
-
3\$\begingroup\$ There's no such rule that I'm aware of. There might might a rule about showing extra courtesy and effort to new contributors to the site. The community expectations are different on CodeReview.SE than StackOverflow. You might not get downvoted as much for this over there. \$\endgroup\$chicks– chicks2021年04月21日 23:09:21 +00:00Commented Apr 21, 2021 at 23:09
-
2\$\begingroup\$ @Haveaniceday A certain kind, yes. For example, I have no idea what your
shuffle(m, d, xd)
is supposed to do. The parameter names are meaningless, and there's no explanation, either. A docstring would be a good way to say what that function does. If it said what the parameter names mean, then maybe some of them could even remain as they are (for example, ifm
is a message and the docstring says something like "Shuffles the message m by blah blah..."). \$\endgroup\$Manuel– Manuel2021年04月22日 01:06:17 +00:00Commented Apr 22, 2021 at 1:06 -
2\$\begingroup\$ Short reviews like this lack an explanation of how and/or why your suggestion makes the code better, please explain how and/or why you would implement your solution, this helps us all understand how and why it would make the code better in this situation. Please see our Help Section on How To Give Good Answer \$\endgroup\$Malachi– Malachi2021年04月23日 19:28:14 +00:00Commented Apr 23, 2021 at 19:28
-
2\$\begingroup\$ We do appreciated short answers that are straight to the point, and we do have several Meta Posts about short answers. With that said, none of the good short answers on Code Review are this short, please take some time and check out some of the examples put together on this Meta Post \$\endgroup\$Malachi– Malachi2021年04月23日 19:35:43 +00:00Commented Apr 23, 2021 at 19:35