14
\$\begingroup\$

I have written a pop song generator which uses the Markovify library to produce lyrics based on (just for testing purposes) songs by Avril Lavigne.

In order to make the generator a bit more competent I have adapted some code which uses the nltk library to determine whether or not two words rhyme. I have used this to make the verses conform to an ABAC rhyme scheme.

It works, but is slow and sometimes gets stuck in a loop when the rhyme_finder function is called. I would greatly appreciate any suggestions as to how to streamline the programme, and or make it more efficient. I have linked the main programme app.py below, as well as the rhyme_finder function. But the Avril Lavigne lyrics, and the nltk.tokenize lyrics are on Pastebin.

The Markovify lib (https://github.com/jsvine/markovify), the Sylco syllable counter (https://github.com/eaydin/sylco), and some nltk dependencies (cmudict) are required to run.

Lyric Generator

import random
import markovify
import ast
from rhyme import rhyme_finder
from sylco import sylco
# Open and model lyrics
with open('lavigne_verse.txt') as f:
 verse_text = f.read()
with open('lavigne_chorus.txt') as f:
 chorus_text = f.read()
with open('lyrics_tokenize.txt') as f:
 tokenized_text = f.read()
verse_model = markovify.NewlineText(verse_text, state_size=2)
chorus_model = markovify.NewlineText(chorus_text, state_size=2)
# Evaluate tokenized_text as a list
tokenized_text = ast.literal_eval(tokenized_text)
# Specify then remove punctuation
punc = set([',','.','"','?','!'])
def clean(str):
 if str[-1] in punc:
 return str[:-1]
 return str
# Generate line that rhymes with stem of verse line 1
def match_rhyme(stem, verse_model):
 # Check if rhymes exist
 try:
 ls = rhyme_finder(stem, tokenized_text)
 except KeyError:
 return None
 if not ls:
 return None
 # If rhymes exist generate lines
 for n in range(100):
 while True:
 rhyme_line = verse_model.make_sentence()
 if rhyme_line is not None:
 # Keep syllables within range
 syl_count = sylco(rhyme_line)
 if syl_count > 16 or syl_count < 6:
 continue
 # Get stem of rhyme_line
 rhyme_stem = clean(rhyme_line.rsplit(None, 1)[-1])
 # Check for rhyme
 if rhyme_stem in ls:
 return rhyme_line
 break
 return None
# Generate 4-line verse
def make_verse(verse_model):
 verse = ''
 stem = None
 # Markovify for each line
 for _ in range(4):
 while True:
 # Try to find rhyming match between lines 1 and 3
 if _ == 2:
 match = match_rhyme(stem, verse_model)
 # If match, add to verse.
 if match is not None:
 verse += (match + '\n')
 break
 # Otherwise add non-random markovify line
 line = verse_model.make_sentence()
 if line is not None:
 # Keep syllables within range
 syl_count = sylco(line)
 if syl_count > 16 or syl_count < 6:
 continue
 # Cache line for rhyming
 if _ == 0:
 stem = clean(line.rsplit(None, 1)[-1])
 verse += (line + '\n')
 break
 return verse
# Construct chorus
def make_chorus(chorus_model):
 chorus = '[Chorus]' + '\n'
 # Two short lines
 for _ in range(2):
 while True:
 line = chorus_model.make_sentence()
 if line is not None:
 # Keep syllables less than 11
 syl_count = sylco(line)
 if syl_count > 10:
 continue
 chorus += (line + '\n')
 break
 # Two line reprieve
 while True:
 repeat = chorus_model.make_sentence()
 if repeat is not None:
 chorus += (repeat + '\n')
 chorus += (repeat + '\n')
 break
 return chorus
# Construct song
def make_song(chorus_model, verse_model):
 song_chorus = make_chorus(chorus_model)
 song = make_verse(verse_model) + '\n' + song_chorus + '\n' \
 + make_verse(verse_model) + '\n' + make_verse(verse_model) + '\n'\
 + (2 * (song_chorus + '\n'))
 return song
print (make_song(chorus_model, verse_model))

Rhyme_finder

import random
import re
import operator
from nltk.corpus import cmudict
e = cmudict.entries()
d = cmudict.dict()
def make_word_list(tokenized_text):
 word_list = []
 for i in tokenized_text:
 try:
 d[i.lower()]
 except KeyError:
 pass
 else:
 if i.lower() == "'s":
 pass
 elif i[-1] == ".":
 pass
 else:
 word_list.append((i.lower(), d[i.lower()][0]))
 return word_list
def unique(s):
 u = []
 for x in s:
 if x not in u:
 u.append(x)
 else:
 pass
 return u
def meter(word):
 pron = d[word]
 m1 = []
 m2 = []
 mx = []
 if len(pron) == 1:
 for i in pron[0]:
 if '0' in i:
 m1.append(0)
 elif '1' in i:
 m1.append(1)
 elif '2' in i:
 m1.append(2)
 else:
 pass
 mx = [m1]
 elif len(pron) >= 2:
 for i in pron[0]:
 if '0' in i:
 m1.append(0)
 elif '1' in i:
 m1.append(1)
 elif '2' in i:
 m1.append(2)
 else:
 pass
 for i in pron[1]:
 if '0' in i:
 m2.append(0)
 elif '1' in i:
 m2.append(1)
 elif '2' in i:
 m2.append(2)
 else:
 pass
 mx = [m1, m2]
 m = []
 if len(mx) == 1:
 w0 = reduce(operator.mul, mx[0], 1)
 if w0 >= 2:
 for i in mx[0]:
 if i == 1:
 m.append('u')
 elif i == 2:
 m.append('s')
 elif w0 == 1:
 for i in mx[0]:
 m.append('s')
 elif w0 == 0:
 for i in mx[0]:
 if i == 0:
 m.append('u')
 elif i == 1 or i == 2:
 m.append('s')
 elif len(mx) == 2:
 w0 = reduce(operator.mul, mx[0], 1)
 w1 = reduce(operator.mul, mx[1], 1)
 if w0 >= 2 and w1 >= 2:
 for (i, j) in zip(mx[0], mx[1]):
 if i * j == 1:
 m.append('u')
 elif i * j == 4:
 m.append('s')
 elif i * j == 2:
 m.append('x')
 elif w0 == 1 and w1 == 1:
 for (i, j) in zip(mx[0], mx[1]):
 m.append('s')
 elif w0 == 0 and w1 == 0:
 for (i, j) in zip(mx[0], mx[1]):
 if i == j and i * j >= 1:
 m.append('s')
 elif i != j and i * j == 0:
 m.append('x')
 elif i == j and i * j == 0:
 m.append('u')
 elif w0 >= 2 and w1 == 0:
 for (i, j) in zip(mx[0], mx[1]):
 if i == 1 and j == 0:
 m.append('u')
 elif i == 2 and j == 0:
 m.append('x')
 elif i == 1 and j == 1:
 m.append('x')
 elif i == 1 and j == 2:
 m.append('x')
 elif i == 2 and j == 1:
 m.append('s')
 elif i == 2 and j == 2:
 m.append('s')
 elif w0 == 0 and w1 >= 2:
 for (i, j) in zip(mx[0], mx[1]):
 if i == 0 and j == 1:
 m.append('u')
 elif i == 0 and j == 2:
 m.append('x')
 elif i == 1 and j == 1:
 m.append('x')
 elif i == 2 and j == 1:
 m.append('x')
 elif i == 1 and j == 2:
 m.append('s')
 elif i == 2 and j == 2:
 m.append('s')
 elif w0 == 1 and w1 >= 2:
 for (i, j) in zip(mx[0], mx[1]):
 if j == 1:
 m.append('x')
 elif j == 2:
 m.append('s')
 elif w0 >= 2 and w1 == 1:
 for (i, j) in zip(mx[0], mx[1]):
 if i == 1:
 m.append('x')
 elif i == 2:
 m.append('s')
 elif w0 == 1 and w1 == 0:
 for (i, j) in zip(mx[0], mx[1]):
 if j == 0:
 m.append('x')
 elif j == 1:
 m.append('s')
 elif j == 2:
 m.append('s')
 elif w0 == 0 and w1 == 1:
 for (i, j) in zip(mx[0], mx[1]):
 if i == 0:
 m.append('x')
 if i == 1:
 m.append('s')
 if i == 2:
 m.append('s')
 return m
def strip_numbers(x):
 xj = '.'.join(x)
 xl = re.split('0|1|2', xj)
 xjx = ''.join(xl)
 xlx = xjx.split('.')
 return xlx
def last_stressed_vowel(word):
 if len(d[word]) <= 1:
 pron = d[word][0]
 else:
 p0 = d[word][0]
 p1 = d[word][1]
 sj0 = ''.join(p0)
 sl0 = re.split('0|1|2', sj0)
 sj1 = ''.join(p1)
 sl1 = re.split('0|1|2', sj1)
 if len(sl1) < len(sl0):
 pron = p1
 else:
 pron = p0
 mtr = meter(word)
 vowel_index = []
 if len(mtr) == 1:
 lsv = -1
 elif mtr[-1] == 's' or mtr[-1] == 'x':
 lsv = -1
 elif mtr[-2] == 's' or mtr[-3] == 'x':
 lsv = -2
 elif mtr[-3] == 's' or mtr[-3] == 'x':
 lsv = -3
 elif mtr[-4] == 's' or mtr[-4] == 'x':
 lsv = -4
 elif mtr[-5] == 's' or mtr[-5] == 'x':
 lsv = -5
 elif mtr[-6] == 's' or mtr[-6] == 'x':
 lsv = -6
 elif mtr[-7] == 's' or mtr[-7] == 'x':
 lsv = -7
 elif mtr[-8] == 's' or mtr[-8] == 'x':
 lsv = -8
 elif mtr[-9] == 's' or mtr[-9] == 'x':
 lsv = -9
 elif mtr[-10] == 's' or mtr[-10] == 'x':
 lsv = -10
 else:
 lsv = -1
 for i in pron:
 if '0' in i or '1' in i or '2' in i:
 vowel_index.append(pron.index(i))
 else:
 continue
 return vowel_index[lsv]
def rhyme_finder(word, tokenized_text):
 word_list = make_word_list(tokenized_text)
 word_list_u = unique(word_list)
 rhyming_words = []
 if len(d[word]) <= 1:
 pron = d[word][0]
 else:
 p0 = d[word][0]
 p1 = d[word][1]
 sj0 = ''.join(p0)
 sl0 = re.split('0|1|2', sj0)
 sj1 = ''.join(p1)
 sl1 = re.split('0|1|2', sj1)
 if len(sl1) < len(sl0):
 pron = p1
 else:
 pron = p0
 pron = strip_numbers(pron)
 lsv = last_stressed_vowel(word)
 rhyme_part = pron[lsv:]
 lrp = len(rhyme_part) * -1
 for (x, y) in word_list_u:
 ps = strip_numbers(y)
 if ps[lrp:] == rhyme_part and ps[lrp-1:] != pron[lsv-1:]:
 rhyming_words.append(x)
 else:
 pass
 rw = [i for i in rhyming_words if not i == word]
 return rw

lavigne_verse.txt = http://pastebin.com/HkXzr1eB

lavigne_chorus.txt = http://pastebin.com/apD7QxHC

lavigne_tokenize.txt = http://pastebin.com/khzjndPT

Many thanks!

chicks
2,8593 gold badges18 silver badges30 bronze badges
asked Dec 1, 2016 at 14:33
\$\endgroup\$
2
  • 2
    \$\begingroup\$ Instead of "if line is not None:" you could simply go for "if line:". Further, you could some more functions for all the if elifs such as in meter(word) function. This would improve readability. \$\endgroup\$ Commented Dec 1, 2016 at 15:54
  • \$\begingroup\$ Yes you're right. Also just realised that the syllable counter in the rhyme_finder function does not do anything. \$\endgroup\$ Commented Dec 1, 2016 at 16:04

2 Answers 2

3
\$\begingroup\$

Some suggestions:

def unique(s):
 u = []
 for x in s:
 if x not in u:
 u.append(x)
 else:
 pass
 return u

may become much simpler:

def unique(s):
 return list(set(s))

but - as you use it only in one place

word_list_u = unique(word_list)

and in turn the word_list_u is used only in one place, too:

for (x, y) in word_list_u:

where word_list_u need not be a list.

So you may delete both the unique() definition and its usage, and write directly

for (x, y) in set(word_list):

as sets in Python inherently not allow duplicates.


punc = set([',','.','"','?','!'])
def clean(str):
 if str[-1] in punc:
 return str[:-1]
 return str

would be clearer with using the endswith() method and a tuple of punctuation symbols:

punc = tuple(',."?!')
def clean(str):
 if str.endswith(punc):
 return str[:-1]
 return str
answered Aug 16, 2017 at 18:05
\$\endgroup\$
2
\$\begingroup\$

In this part of your code

elif mtr[-1] == 's' or mtr[-1] == 'x':
 lsv = -1
elif mtr[-2] == 's' or mtr[-3] == 'x':
 lsv = -2
elif mtr[-3] == 's' or mtr[-3] == 'x':
 lsv = -3
elif mtr[-4] == 's' or mtr[-4] == 'x':
 lsv = -4
elif mtr[-5] == 's' or mtr[-5] == 'x':
 lsv = -5
elif mtr[-6] == 's' or mtr[-6] == 'x':
 lsv = -6
elif mtr[-7] == 's' or mtr[-7] == 'x':
 lsv = -7
elif mtr[-8] == 's' or mtr[-8] == 'x':
 lsv = -8
elif mtr[-9] == 's' or mtr[-9] == 'x':
 lsv = -9
elif mtr[-10] == 's' or mtr[-10] == 'x':
 lsv = -10
else:
 lsv = -1

you probably have an error:

elif mtr[-2] == 's' or mtr[-3] == 'x': # both indices have to be -2

and you can shorten it, too:

for i in range(-1, -11, -1):
 if mtr[i] in 'sx':
 lsv = i
 break
else:
 lsv = -1

(The else clause in a for loop is performed if and only if the loop finishes without encountering break.)

DLosc
4954 silver badges8 bronze badges
answered Aug 16, 2017 at 18:43
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.