I need to compress some text, I'm wondering if my program could be made more efficient. The only things I could think of would be the 'import re' and turning the filename input and read part as a function.
import re
from ast import literal_eval
################## Compression #####################
def comp():
while True:
try:
fileName=input('Please input text file name in current directory: ')+'.txt'
a=open(fileName)
break
except:
print('No Such text file in current directory name: '+fileName)
content = a.read()
a.close()
p = re.compile(r'[\w]+|[\W]')
split = p.findall(content)
b = []
wordList = []
for word in split:
try:
r = wordList.index(word) + 1
except ValueError:
wordList.append(word)
r = len(wordList)
b.append(r)
f=open('compressed.txt', 'w')
f=open('compressed.txt', 'r+')
f.write(str(wordList)+'\n'+str(b))
f.close()
####################################################
################## De-Compression ##################
def decomp():
while True:
try:
fileName=input('Please input text file name in current directory: ')+'.txt'
a=open(fileName)
break
except:
print('No Such text file in current directory name: '+fileName)
words = literal_eval(a.readline().rstrip('\n'))
pos = literal_eval(a.readline())
temp = []
for index in pos:
temp.append(words[index-1])
sentence = ''.join(temp)
print(sentence)
####################################################
-
\$\begingroup\$ Please see what you may and may not do after receiving answers , as you have an answer that directly addressed the code you removed. \$\endgroup\$Peilonrayz– Peilonrayz ♦2017年05月19日 12:13:25 +00:00Commented May 19, 2017 at 12:13
1 Answer 1
As you said, not much to do, I only defined the file read as a function, used the with
context manager to read and write files instead of opening and closing, some PEP8 enhancements and string.format()
instead of concatenation.
import re
import os
from ast import literal_eval
def get_filename():
while True:
filename = input('Please input text file name in current directory: \n')
filename = filename if filename.endswith('.txt') else '{}.txt'.format(filename)
if os.path.isfile(filename):
return filename
else:
print('No Such text file in current directory: {}'.format(filename))
def comp():
with open(get_filename(), 'rb') as file_in:
p = re.compile(r'[\w]+|[\W]')
split = p.findall(file_in.read())
word_list, b = []
for word in split:
try:
r = word_list.index(word) + 1
except ValueError:
word_list.append(word)
r = len(word_list)
b.append(r)
with open('compressed.txt', 'r+') as file_out:
file_out.write('{}\n{}'.format(str(word_list), str(b)))
def decomp():
with open(get_filename(), 'rb') as file_in:
words = literal_eval(file_in.readline().rstrip('\n'))
pos = literal_eval(file_in.readline())
temp = []
for index in pos:
temp.append(words[index-1])
print(''.join(temp))
I might help some more, but need some more context on what you are trying to accomplish. Maybe working on multiple files, taking filenames as arguments or some more improvements.
EDIT:
After reviewing the assignment the previous solution won't take care of punctuation, this will:
import string
import os
def get_filename():
"""Function to get the file name from user input"""
# Infinite loop to ask the user for a file name that must be valid in order to break the loop.
while True:
# Ask the user for a file name.
filename = input('Please input text file name in current directory: \n')
# Append .txt extension if not provided.
filename = filename if filename.endswith('.txt') else '{}.txt'.format(filename)
# Test if the file exist and return it, print an error and continue loop if not.
if os.path.isfile(filename):
return filename
else:
print('No Such text file in current directory: {}'.format(filename))
def comp():
filename = get_filename()
with open(filename, 'r') as file_in:
full_text = file_in.read()
# Making a translation table to get rid of punctuation and keep only letters.
table = str.maketrans(string.punctuation, ' ' * len(string.punctuation))
only_words = full_text.translate(table)
# Create a sorted list of unique elements.
word_list = sorted(set(only_words.split()))
# Replace every word of the original text by it's index in the word list.
for index, word in enumerate(word_list):
full_text = full_text.replace(word, '{{{}}}'.format(index)) # Double brackets are ignored by .format()
# Create a separate file with the same name but _COMP_, to dump the compressed text.
with open('_COMP_{}'.format(filename), 'w') as file_out:
file_out.write('{}\n***{}***'.format(full_text, '|'.join(word_list))) # The word list reference is appended.
def decomp():
with open(get_filename(), 'r') as file_in:
# Read each line to a list element.
full_text = file_in.readlines()
# Remove the last line of the text and convert it to a sorted list of the reference words.
reference = sorted(full_text.pop(-1).strip('*').split('|'))
# Print the full text replacing the indexes by the corresponding word in the list.
print(''.join(full_text).format(*reference))
-
\$\begingroup\$ Sorry, I expressed myself wrong, the second script I posted keeps punctuation in place while compressing everything else. \$\endgroup\$Dalvenjia– Dalvenjia2016年09月23日 20:00:40 +00:00Commented Sep 23, 2016 at 20:00