I run a tiny open source project to help create speech aids for disabled people (the GitHub is here).
One of the things that is useful is for people to design speech setups in Powerpoint, and then have the PowerPoint file processed to extract the images and the other information. The script below processes the PowerPoint file using the python-pttx library.
I'm a pretty poor Python programmer - any hints for making things pretty or generally better would be very much appreciated.
#!/usr/bin/python
"Extracting Utterances from CommuniKate pagesets designed in PowerPoint"
#Todo - make the class a relevent thing
#Make the images export more effectively
from pptx import Presentation
from pptx.enum.shapes import MSO_SHAPE
from pptx.enum.shapes import MSO_SHAPE_TYPE
import io
import os
from PIL import Image
import uuid
COL_TABLE = {152400: 0, 1503659: 1, 1600200: 1, 2861846: 2,
2819400: 2, 2854919: 2, 2854925: 2, 4170660: 3,
4191000: 3, 5542260: 4, 5769114: 4, 5562600: 4, 5769125: 4}
ROW_TABLE = {0: 0, 152400: 0, 152401: 0, 1981200: 1, 3771900: 2, 5562600: 3,
5610125: 3, 6095999: 3, 7314625: 4, 7340121: 4, 7340600: 4}
# Note: This may not be robust to internationalisation.
alpha = "abcdefghijklmnopqrstuvwxyz1234567890_"
# dictionary of icons,
# key = (row, col)
# value = list of one or more PICTURE shapes.
images = {}
def resizeImage(image, scaleFactor):
oldSize = image.size
newSize = (scaleFactor*oldSize[0],
scaleFactor*oldSize[1])
return image.resize(newSize, Image.ANTIALIAS)
# Helper for testing - generate unique chars.
def getShortUuid():
u = str(uuid.uuid1())
u = u.split("-")[0]
return u
def remove_punctuation(s):
s_sans_punct = ""
for letter in s:
if letter.lower() in alpha:
s_sans_punct += letter
return s_sans_punct
# from http://openbookproject.net/thinkcs/python/english3e/strings.html
def slide_title_placeholder(slide):
"""from https://github.com/scanny/python-pptx/issues
/153#issuecomment-84475019"""
for shape in slide.shapes:
if not shape.is_placeholder:
continue
if shape.placeholder_format.idx == 0:
return shape
return None
def make_title(label):
return remove_punctuation(label.lower().strip().replace(" ", "_"))
# Returns the closest key in the dictionary, for numerical keys.
def get_closest_key(dict, inKey):
# from http://stackoverflow.com/a/7934624/170243
if inKey in dict:
return inKey
else:
return min(dict.keys(), key=lambda k: abs(k - inKey))
def get_column(leftPos):
key = get_closest_key(COL_TABLE, leftPos)
return COL_TABLE[key]
def get_row(topPos):
key = get_closest_key(ROW_TABLE, topPos)
return ROW_TABLE[key]
def get_index(leftPos, topPos):
co = get_column(leftPos)
ro = get_row(topPos)
return ro*5 + co
class utterance(object):
"""recording the utterance and where it is on the screen, for now we are
doing the grid, later we will allow this to use different heigh/width
and placement options"""
column = 0
row = 0
text = ""
def __init__(self, row, column, text):
self.column, self.row, self.text = row, column, text
def __str__(self):
return "utterance[%d][%d]=\"%s\";" % (
self.column, self.row, self.text)
def read_utterances_and_links(slide):
utterances = [["link" for x in range(5)] for x in range(5)]
links = [["blank" for x in range(5)] for x in range(5)]
colors = [["" for x in range(5)] for x in range(5)]
# dictionary of icons,
# key = (row, col)
# value = list of one or more PICTURE shapes.
images = {}
# First pass through the shapes populates our utterances array.
for shape in slide.shapes:
co = get_column(shape.top)
ro = get_row(shape.left)
if shape.shape_type == MSO_SHAPE_TYPE.AUTO_SHAPE:
if shape.auto_shape_type == MSO_SHAPE.FOLDED_CORNER:
links[co][ro] = "real"
try:
colors[co][
ro] = shape.fill.fore_color.rgb
except AttributeError:
pass
if not shape.has_text_frame:
continue
text = ""
for paragraph in shape.text_frame.paragraphs:
for run in paragraph.runs:
text += run.text.encode('ascii', 'ignore')
if text != "":
# add the if shape_type is text box
utterances[co][ro] = text
return (utterances, links, colors)
def export_images(slide, utterances):
""" Second pass through shapes list finds images and saves them.
We have to do this separately so it's guaranteed we already know what to
name the images!"""
for shape in slide.shapes:
if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
co = get_column(shape.top)
ro = get_row(shape.left)
if (co, ro) not in images:
images[co, ro] = []
images[co, ro].append(shape)
# Compose each icon out of all the images in the grid cell.
for x in range(5):
for y in range(5):
if (x, y) in images:
# Go through all the images, compute bounding
# box.
l = min([shape.left for shape in images[x, y]])
t = min([shape.top for shape in images[x, y]])
r = max([shape.left +
shape.width for shape in images[x, y]])
b = max([shape.top +
shape.height for shape in images[x, y]])
# Scale gives us the mapping from image pixels to powerpoint
# distance units. This depends on the resolution
# of the images.
scale = min([shape.width/shape.image.size[0]
for shape in images[x, y]])
# Size of combined image, in actual pixels (not PPTX units)
# If scales differ between objects, we resize
# them next
w = (r-l)/scale
h = (b-t)/scale
composite = Image.new('RGBA', (w, h))
# Add all the images together.
for shape in images[x, y]:
# TODO: flipping.
part = Image.open(
io.BytesIO(
shape.image.blob))
width = part.size[0]
height = part.size[1]
left = shape.crop_left*width
right = (1-shape.crop_right)*width
top = shape.crop_top*height
bottom = (1-shape.crop_bottom)*height
box = (int(left),
int(top),
int(right),
int(bottom))
part = part.crop(box)
partScale = (
shape.width /
part.size[0]) # part.size because it might have been cropped
part = resizeImage(
part,
partScale /
scale)
composite.paste(
part,
((shape.left - l)/scale,
(shape.top - t)/scale),
part) # This masks out transparent pixels
# Crop final image.
bbox = composite.getbbox()
composite = composite.crop(bbox)
# Save!
name = remove_punctuation( "%d-%d-" %
(x, y)+utterances[x][y]) + ".png"
folder = "icons/" + str(slide_number)
if not os.path.exists(folder):
os.makedirs(folder)
composite.save(folder + "/" + name)
def process_slide(slide, slide_number):
print "slide number is %s" % slide_number
title = slide_title_placeholder(slide)
print """function %s(){
reset(); """ % make_title(title.text)
(utterances, links, colors) = read_utterances_and_links(slide)
export_images(slide, utterances)
for x in range(5):
for y in range(5):
if links[x][y] == "real":
print " links[%d][%d]=\"%s\";" % (y, x, make_title(utterances[x][y]))
else:
if links[x][y] == "blank":
print "utterances[%d][%d]=\"%s\";" % (y, x, utterances[x][y])
else:
raise ValueError("You never listen.")
print """ document.main.src="images/originalSlides/Slide%02d.png";
}""" % (slide_number)
prs = Presentation("../azuleKirsty/testSuite/launch/CommuniKate20launch.pptx")
# text_runs will be populated with a list of strings,
# one for each text run in presentation
slide_number = 1
for slide in prs.slides:
process_slide(slide, slide_number)
slide_number += 1
break
1 Answer 1
I have some general thoughts about your script, since I haven't worked with Powerpoint in Python.
First, you're manually typing all the letters and numbers unnecessarily. Import the string module to automatically get access to strings containing all the characters you need.
import string
alpha = string.ascii_lowercase + string.digits + '_'
The other advantage to this is the ability to add non-ascii characters which will help localisation. You can get these characters with string.lowercase
. In your case that might make no difference because it is affected by locality, but this is what I get (in Ireland):
string.lowercase + string.digits + '_'
>>> "abcdefghijklmnopqrstuvwxyzƒšœžaμoßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ0123456789_"
Note: This will be important to remember if you're using run.text.encode('ascii', 'ignore')
later, as you're only accounting for ascii there too.
Your remove_punctuation
is unnecessarily long too. You can shorten it to just one line using a generator expression (essentially just a shorthand for a for loop) and the str.join
function, which is a handy way to make a single string out of a list of strings attached together.
def remove_punctuation(s):
return ''.join(c for c in s if c in alpha)
Your whitespace in general could be better. This comment is too far from the definition it's referring to, I thought it addressed the previous one.
# Returns the closest key in the dictionary, for numerical keys.
def get_closest_key(dict, inKey):
Try to use whitespace so that related things are together and you leave room between separate parts of the code. Also I personally find it's best to have comments appear after the function definition. Even better, make it a docstring, which is a programmatically accessible string to explain the function to a user.
I also recommend reading the PEP0008 style guide.
Your class's str
method can be replaced with the str.format
method instead. It's a bit clearer and easier to use. It doesn't need to have explicit typing. Also, you shouldn't use \"
when you could just use single quotes to wrap the string in and then use "
just fine.
def __str__(self):
return 'utterance[{}][{}]="{}";'.format(
self.column, self.row, self.text)
-
1\$\begingroup\$ Thank you - I learned a lot from that - I particularly like the .format and the '"' fact :) \$\endgroup\$Joe– Joe2015年08月25日 11:53:22 +00:00Commented Aug 25, 2015 at 11:53