Managing a CSV of users and permissions

Question 1

I've got a CSV that contains users and permissions in the below format, where users can have as little as one or as many as eight different permissions:

USER,PERM1,PERM2,PERM3,PERM4,PERM5,PERM6,PERM7,PERM8
jdoe,perm1,perm2
tsmith,perm1,perm2,perm3,perm4,perm5,perm6,perm7,perm8

This is the desired format, with each unique user and permission pair on a new line:

USER,PERM
jdoe,perm1
jdoe,perm2
tsmith,perm1
tsmith,perm2
tsmith,perm3
tsmith,perm4
tsmith,perm5
tsmith,perm6
tsmith,perm7
tsmith,perm8

My script below accomplishes this, but it's ugly, repetitive and I know there's a more Pythonic way to do it. Even worse, I had to bring the output file into Excel afterwards to filter and delete the rows with blank PERM values. Any recommendations to shorten this code and cut down on repetition would be much appreciated.

import csv
def reformat_ul(original_ul, formated_ul):
 with open(original_ul) as user_list:
 dict_reader = csv.DictReader(user_list)
 ul = []
 for row in dict_reader:
 ul.append(row)
 with open(formated_ul, 'w') as output2:
 output2.write('USER,PERM\n')
 for uperm in ul:
 p1 = '{},{}\n'.format(uperm['USER'], uperm['PERM1'])
 p2 = '{},{}\n'.format(uperm['USER'], uperm['PERM2'])
 p3 = '{},{}\n'.format(uperm['USER'], uperm['PERM3'])
 p4 = '{},{}\n'.format(uperm['USER'], uperm['PERM4'])
 p5 = '{},{}\n'.format(uperm['USER'], uperm['PERM5'])
 p6 = '{},{}\n'.format(uperm['USER'], uperm['PERM6'])
 p7 = '{},{}\n'.format(uperm['USER'], uperm['PERM7'])
 p8 = '{},{}\n'.format(uperm['USER'], uperm['PERM8'])
 output2.write(p1)
 output2.write(p2)
 output2.write(p3)
 output2.write(p4)
 output2.write(p5)
 output2.write(p6)
 output2.write(p7)
 output2.write(p8)
reformat_ul('user_list.csv', 'output.txt')

Question 2

This is shorter:

import csv
def reformat_ul(original_ul, formated_ul):
 with open(formated_ul, 'w') as output2:
 output2.write('USER,PERM\n')
 with open(original_ul) as user_list:
 dict_reader = csv.DictReader(user_list)
 for row in dict_reader:
 user = row['USER']
 for key in sorted(row)[:-1]:
 if row[key]:
 output2.write("%s,%s\n" % (user, row[key]))
reformat_ul('user_list.csv', 'output.txt')

The highlights:

It doesn't waste memory by storing the rows in a temporary list. Using the two nested with there, it outputs while reading the output, piping properly
In sorted(row)[:-1] I take advantage of the fact that I know the column names, and that USER will come after all the PERM1..PERM8. If you need a more flexible implementation then you can amend the deepest if there, for example if key.startswith('PERM') and row[key]

Question 3

With included csvwriter writerow method.

 import csv
 from collections import OrderedDict
 from itertools import ifilter
 def reformat_ul(original_ul, formated_ul):
 with open(formated_ul, 'w') as output2, open(original_ul) as user_list:
 csv_dictreader = csv.DictReader(user_list)
 field1, field2 = csv_dictreader.fieldnames[0:2]
 field2 = field2[:-1]
 csvwriter = csv.writer(output2)
 csvwriter.writerow([field1, field2])
 for row in csv_dictreader:
 user = row.pop(field1)
 perms = ifilter(lambda k: k,
 OrderedDict(sorted(row.items())).itervalues())
 map(lambda p: csvwriter.writerow([user, p]), perms)
 reformat_ul('user_list.csv', 'output.txt')

Question 4

A simple csv.reader does the job for me. For each row, I pop off the username and then simply loop over the rest of the data in the row and yield it with the username.

import csv
def read_original(path):
 with open(path, 'r') as f:
 reader = csv.reader(f)
 reader.next()
 for row in reader:
 user = row.pop(0)
 for perm in row:
 yield user, perm
def reformat_ul(original_ul, formatted_ul):
 with open(formatted_ul, 'w') as f:
 writer = csv.writer(f)
 writer.writerow(['USER', 'PERM'])
 for row in read_original(original_ul):
 writer.writerow(row)
reformat_ul('user_list.csv', 'output.txt')

This allows the read_original iterator to be used independently.

Question 5

As you don't really care for which permissions are present or not, just to split into those existing I would suggest reading the file using the simpler csv.reader(), skipping the first line, and then output the permissions in the same run.

Here is some code:

import csv
import sys
DEFAULT_SOURCEFILE = 'user_list.csv'
DEFAULT_DESTINATIONFILE = 'output.txt'
def split_permissions(sourcefile, destinationfile):
 """Split permissions from sourcefile into single permission in destinationfile."""
 # Open both files simultaneously, to make an efficient read and write loop
 with open(sourcefile) as source, open(destinationfile, 'w') as output:
 dict_reader = csv.reader(source)
 next(dict_reader) # Skip header line
 output.write('USER,PERM\n')
 # Read each line in source, and split into multiple output lines
 for row in dict_reader:
 print row
 user = row[0]
 for perm in row[1:]:
 output.write('{},{}\n'.format(user, perm))
if __name__ == '__main__':
 # Use filenames from command line if present, else use defaults
 if len(sys.argv) == 3:
 sourcefile = sys.argv[1]
 destinationfile = sys.argv[2]
 else:
 sourcefile = DEFAULT_SOURCEFILE
 destinationfile = DEFAULT_DESTINATIONFILE
 # Split the permissions file into single permissions
 split_permissions(sourcefile, destinationfile)

In addition to the new function to actually do the split, I've also added some basic handling to allow for filenames to be defined on the command line. And packed everything in a construct so that the file can be used as a module presenting the split_permissions() function.

janos janos 113k15 gold badges154 silver badges396 bronze badges · Answer 1 · 2013-08-30 05:08:43Z

This is shorter:

import csv
def reformat_ul(original_ul, formated_ul):
 with open(formated_ul, 'w') as output2:
 output2.write('USER,PERM\n')
 with open(original_ul) as user_list:
 dict_reader = csv.DictReader(user_list)
 for row in dict_reader:
 user = row['USER']
 for key in sorted(row)[:-1]:
 if row[key]:
 output2.write("%s,%s\n" % (user, row[key]))
reformat_ul('user_list.csv', 'output.txt')

The highlights:

It doesn't waste memory by storing the rows in a temporary list. Using the two nested with there, it outputs while reading the output, piping properly
In sorted(row)[:-1] I take advantage of the fact that I know the column names, and that USER will come after all the PERM1..PERM8. If you need a more flexible implementation then you can amend the deepest if there, for example if key.startswith('PERM') and row[key]

P̲̳x͓L̳ P̲̳x͓L̳ 1194 bronze badges · Answer 2 · 2013-08-30 22:59:43Z

With included csvwriter writerow method.

 import csv
 from collections import OrderedDict
 from itertools import ifilter
 def reformat_ul(original_ul, formated_ul):
 with open(formated_ul, 'w') as output2, open(original_ul) as user_list:
 csv_dictreader = csv.DictReader(user_list)
 field1, field2 = csv_dictreader.fieldnames[0:2]
 field2 = field2[:-1]
 csvwriter = csv.writer(output2)
 csvwriter.writerow([field1, field2])
 for row in csv_dictreader:
 user = row.pop(field1)
 perms = ifilter(lambda k: k,
 OrderedDict(sorted(row.items())).itervalues())
 map(lambda p: csvwriter.writerow([user, p]), perms)
 reformat_ul('user_list.csv', 'output.txt')

Graeme Stuart Graeme Stuart 6127 silver badges14 bronze badges · Answer 3 · 2013-09-06 23:06:41Z

A simple csv.reader does the job for me. For each row, I pop off the username and then simply loop over the rest of the data in the row and yield it with the username.

import csv
def read_original(path):
 with open(path, 'r') as f:
 reader = csv.reader(f)
 reader.next()
 for row in reader:
 user = row.pop(0)
 for perm in row:
 yield user, perm
def reformat_ul(original_ul, formatted_ul):
 with open(formatted_ul, 'w') as f:
 writer = csv.writer(f)
 writer.writerow(['USER', 'PERM'])
 for row in read_original(original_ul):
 writer.writerow(row)
reformat_ul('user_list.csv', 'output.txt')

This allows the read_original iterator to be used independently.

holroy holroy 11.8k1 gold badge27 silver badges59 bronze badges · Answer 4 · 2015-11-26 01:47:36Z

As you don't really care for which permissions are present or not, just to split into those existing I would suggest reading the file using the simpler csv.reader(), skipping the first line, and then output the permissions in the same run.

Here is some code:

import csv
import sys
DEFAULT_SOURCEFILE = 'user_list.csv'
DEFAULT_DESTINATIONFILE = 'output.txt'
def split_permissions(sourcefile, destinationfile):
 """Split permissions from sourcefile into single permission in destinationfile."""
 # Open both files simultaneously, to make an efficient read and write loop
 with open(sourcefile) as source, open(destinationfile, 'w') as output:
 dict_reader = csv.reader(source)
 next(dict_reader) # Skip header line
 output.write('USER,PERM\n')
 # Read each line in source, and split into multiple output lines
 for row in dict_reader:
 print row
 user = row[0]
 for perm in row[1:]:
 output.write('{},{}\n'.format(user, perm))
if __name__ == '__main__':
 # Use filenames from command line if present, else use defaults
 if len(sys.argv) == 3:
 sourcefile = sys.argv[1]
 destinationfile = sys.argv[2]
 else:
 sourcefile = DEFAULT_SOURCEFILE
 destinationfile = DEFAULT_DESTINATIONFILE
 # Split the permissions file into single permissions
 split_permissions(sourcefile, destinationfile)

In addition to the new function to actually do the split, I've also added some basic handling to allow for filenames to be defined on the command line. And packed everything in a construct so that the file can be used as a module presenting the split_permissions() function.

Stack Exchange Network

Managing a CSV of users and permissions

4 Answers 4

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Managing a CSV of users and permissions

4 Answers 4

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions