I have written a pop song generator which uses the Markovify library to produce lyrics based on (just for testing purposes) songs by Avril Lavigne.
In order to make the generator a bit more competent I have adapted some code which uses the nltk
library to determine whether or not two words rhyme. I have used this to make the verses conform to an ABAC rhyme scheme.
It works, but is slow and sometimes gets stuck in a loop when the rhyme_finder
function is called. I would greatly appreciate any suggestions as to how to streamline the programme, and or make it more efficient. I have linked the main programme app.py
below, as well as the rhyme_finder
function. But the Avril Lavigne lyrics, and the nltk.tokenize
lyrics are on Pastebin.
The Markovify
lib (https://github.com/jsvine/markovify), the Sylco
syllable counter (https://github.com/eaydin/sylco), and some nltk
dependencies (cmudict
) are required to run.
Lyric Generator
import random
import markovify
import ast
from rhyme import rhyme_finder
from sylco import sylco
# Open and model lyrics
with open('lavigne_verse.txt') as f:
verse_text = f.read()
with open('lavigne_chorus.txt') as f:
chorus_text = f.read()
with open('lyrics_tokenize.txt') as f:
tokenized_text = f.read()
verse_model = markovify.NewlineText(verse_text, state_size=2)
chorus_model = markovify.NewlineText(chorus_text, state_size=2)
# Evaluate tokenized_text as a list
tokenized_text = ast.literal_eval(tokenized_text)
# Specify then remove punctuation
punc = set([',','.','"','?','!'])
def clean(str):
if str[-1] in punc:
return str[:-1]
return str
# Generate line that rhymes with stem of verse line 1
def match_rhyme(stem, verse_model):
# Check if rhymes exist
try:
ls = rhyme_finder(stem, tokenized_text)
except KeyError:
return None
if not ls:
return None
# If rhymes exist generate lines
for n in range(100):
while True:
rhyme_line = verse_model.make_sentence()
if rhyme_line is not None:
# Keep syllables within range
syl_count = sylco(rhyme_line)
if syl_count > 16 or syl_count < 6:
continue
# Get stem of rhyme_line
rhyme_stem = clean(rhyme_line.rsplit(None, 1)[-1])
# Check for rhyme
if rhyme_stem in ls:
return rhyme_line
break
return None
# Generate 4-line verse
def make_verse(verse_model):
verse = ''
stem = None
# Markovify for each line
for _ in range(4):
while True:
# Try to find rhyming match between lines 1 and 3
if _ == 2:
match = match_rhyme(stem, verse_model)
# If match, add to verse.
if match is not None:
verse += (match + '\n')
break
# Otherwise add non-random markovify line
line = verse_model.make_sentence()
if line is not None:
# Keep syllables within range
syl_count = sylco(line)
if syl_count > 16 or syl_count < 6:
continue
# Cache line for rhyming
if _ == 0:
stem = clean(line.rsplit(None, 1)[-1])
verse += (line + '\n')
break
return verse
# Construct chorus
def make_chorus(chorus_model):
chorus = '[Chorus]' + '\n'
# Two short lines
for _ in range(2):
while True:
line = chorus_model.make_sentence()
if line is not None:
# Keep syllables less than 11
syl_count = sylco(line)
if syl_count > 10:
continue
chorus += (line + '\n')
break
# Two line reprieve
while True:
repeat = chorus_model.make_sentence()
if repeat is not None:
chorus += (repeat + '\n')
chorus += (repeat + '\n')
break
return chorus
# Construct song
def make_song(chorus_model, verse_model):
song_chorus = make_chorus(chorus_model)
song = make_verse(verse_model) + '\n' + song_chorus + '\n' \
+ make_verse(verse_model) + '\n' + make_verse(verse_model) + '\n'\
+ (2 * (song_chorus + '\n'))
return song
print (make_song(chorus_model, verse_model))
Rhyme_finder
import random
import re
import operator
from nltk.corpus import cmudict
e = cmudict.entries()
d = cmudict.dict()
def make_word_list(tokenized_text):
word_list = []
for i in tokenized_text:
try:
d[i.lower()]
except KeyError:
pass
else:
if i.lower() == "'s":
pass
elif i[-1] == ".":
pass
else:
word_list.append((i.lower(), d[i.lower()][0]))
return word_list
def unique(s):
u = []
for x in s:
if x not in u:
u.append(x)
else:
pass
return u
def meter(word):
pron = d[word]
m1 = []
m2 = []
mx = []
if len(pron) == 1:
for i in pron[0]:
if '0' in i:
m1.append(0)
elif '1' in i:
m1.append(1)
elif '2' in i:
m1.append(2)
else:
pass
mx = [m1]
elif len(pron) >= 2:
for i in pron[0]:
if '0' in i:
m1.append(0)
elif '1' in i:
m1.append(1)
elif '2' in i:
m1.append(2)
else:
pass
for i in pron[1]:
if '0' in i:
m2.append(0)
elif '1' in i:
m2.append(1)
elif '2' in i:
m2.append(2)
else:
pass
mx = [m1, m2]
m = []
if len(mx) == 1:
w0 = reduce(operator.mul, mx[0], 1)
if w0 >= 2:
for i in mx[0]:
if i == 1:
m.append('u')
elif i == 2:
m.append('s')
elif w0 == 1:
for i in mx[0]:
m.append('s')
elif w0 == 0:
for i in mx[0]:
if i == 0:
m.append('u')
elif i == 1 or i == 2:
m.append('s')
elif len(mx) == 2:
w0 = reduce(operator.mul, mx[0], 1)
w1 = reduce(operator.mul, mx[1], 1)
if w0 >= 2 and w1 >= 2:
for (i, j) in zip(mx[0], mx[1]):
if i * j == 1:
m.append('u')
elif i * j == 4:
m.append('s')
elif i * j == 2:
m.append('x')
elif w0 == 1 and w1 == 1:
for (i, j) in zip(mx[0], mx[1]):
m.append('s')
elif w0 == 0 and w1 == 0:
for (i, j) in zip(mx[0], mx[1]):
if i == j and i * j >= 1:
m.append('s')
elif i != j and i * j == 0:
m.append('x')
elif i == j and i * j == 0:
m.append('u')
elif w0 >= 2 and w1 == 0:
for (i, j) in zip(mx[0], mx[1]):
if i == 1 and j == 0:
m.append('u')
elif i == 2 and j == 0:
m.append('x')
elif i == 1 and j == 1:
m.append('x')
elif i == 1 and j == 2:
m.append('x')
elif i == 2 and j == 1:
m.append('s')
elif i == 2 and j == 2:
m.append('s')
elif w0 == 0 and w1 >= 2:
for (i, j) in zip(mx[0], mx[1]):
if i == 0 and j == 1:
m.append('u')
elif i == 0 and j == 2:
m.append('x')
elif i == 1 and j == 1:
m.append('x')
elif i == 2 and j == 1:
m.append('x')
elif i == 1 and j == 2:
m.append('s')
elif i == 2 and j == 2:
m.append('s')
elif w0 == 1 and w1 >= 2:
for (i, j) in zip(mx[0], mx[1]):
if j == 1:
m.append('x')
elif j == 2:
m.append('s')
elif w0 >= 2 and w1 == 1:
for (i, j) in zip(mx[0], mx[1]):
if i == 1:
m.append('x')
elif i == 2:
m.append('s')
elif w0 == 1 and w1 == 0:
for (i, j) in zip(mx[0], mx[1]):
if j == 0:
m.append('x')
elif j == 1:
m.append('s')
elif j == 2:
m.append('s')
elif w0 == 0 and w1 == 1:
for (i, j) in zip(mx[0], mx[1]):
if i == 0:
m.append('x')
if i == 1:
m.append('s')
if i == 2:
m.append('s')
return m
def strip_numbers(x):
xj = '.'.join(x)
xl = re.split('0|1|2', xj)
xjx = ''.join(xl)
xlx = xjx.split('.')
return xlx
def last_stressed_vowel(word):
if len(d[word]) <= 1:
pron = d[word][0]
else:
p0 = d[word][0]
p1 = d[word][1]
sj0 = ''.join(p0)
sl0 = re.split('0|1|2', sj0)
sj1 = ''.join(p1)
sl1 = re.split('0|1|2', sj1)
if len(sl1) < len(sl0):
pron = p1
else:
pron = p0
mtr = meter(word)
vowel_index = []
if len(mtr) == 1:
lsv = -1
elif mtr[-1] == 's' or mtr[-1] == 'x':
lsv = -1
elif mtr[-2] == 's' or mtr[-3] == 'x':
lsv = -2
elif mtr[-3] == 's' or mtr[-3] == 'x':
lsv = -3
elif mtr[-4] == 's' or mtr[-4] == 'x':
lsv = -4
elif mtr[-5] == 's' or mtr[-5] == 'x':
lsv = -5
elif mtr[-6] == 's' or mtr[-6] == 'x':
lsv = -6
elif mtr[-7] == 's' or mtr[-7] == 'x':
lsv = -7
elif mtr[-8] == 's' or mtr[-8] == 'x':
lsv = -8
elif mtr[-9] == 's' or mtr[-9] == 'x':
lsv = -9
elif mtr[-10] == 's' or mtr[-10] == 'x':
lsv = -10
else:
lsv = -1
for i in pron:
if '0' in i or '1' in i or '2' in i:
vowel_index.append(pron.index(i))
else:
continue
return vowel_index[lsv]
def rhyme_finder(word, tokenized_text):
word_list = make_word_list(tokenized_text)
word_list_u = unique(word_list)
rhyming_words = []
if len(d[word]) <= 1:
pron = d[word][0]
else:
p0 = d[word][0]
p1 = d[word][1]
sj0 = ''.join(p0)
sl0 = re.split('0|1|2', sj0)
sj1 = ''.join(p1)
sl1 = re.split('0|1|2', sj1)
if len(sl1) < len(sl0):
pron = p1
else:
pron = p0
pron = strip_numbers(pron)
lsv = last_stressed_vowel(word)
rhyme_part = pron[lsv:]
lrp = len(rhyme_part) * -1
for (x, y) in word_list_u:
ps = strip_numbers(y)
if ps[lrp:] == rhyme_part and ps[lrp-1:] != pron[lsv-1:]:
rhyming_words.append(x)
else:
pass
rw = [i for i in rhyming_words if not i == word]
return rw
lavigne_verse.txt
= http://pastebin.com/HkXzr1eB
lavigne_chorus.txt
= http://pastebin.com/apD7QxHC
lavigne_tokenize.txt
= http://pastebin.com/khzjndPT
Many thanks!
2 Answers 2
Some suggestions:
def unique(s):
u = []
for x in s:
if x not in u:
u.append(x)
else:
pass
return u
may become much simpler:
def unique(s):
return list(set(s))
but - as you use it only in one place
word_list_u = unique(word_list)
and in turn the word_list_u
is used only in one place, too:
for (x, y) in word_list_u:
where word_list_u
need not be a list.
So you may delete both the unique()
definition and its usage, and write directly
for (x, y) in set(word_list):
as sets in Python inherently not allow duplicates.
punc = set([',','.','"','?','!'])
def clean(str):
if str[-1] in punc:
return str[:-1]
return str
would be clearer with using the endswith()
method and a tuple of punctuation symbols:
punc = tuple(',."?!')
def clean(str):
if str.endswith(punc):
return str[:-1]
return str
In this part of your code
elif mtr[-1] == 's' or mtr[-1] == 'x':
lsv = -1
elif mtr[-2] == 's' or mtr[-3] == 'x':
lsv = -2
elif mtr[-3] == 's' or mtr[-3] == 'x':
lsv = -3
elif mtr[-4] == 's' or mtr[-4] == 'x':
lsv = -4
elif mtr[-5] == 's' or mtr[-5] == 'x':
lsv = -5
elif mtr[-6] == 's' or mtr[-6] == 'x':
lsv = -6
elif mtr[-7] == 's' or mtr[-7] == 'x':
lsv = -7
elif mtr[-8] == 's' or mtr[-8] == 'x':
lsv = -8
elif mtr[-9] == 's' or mtr[-9] == 'x':
lsv = -9
elif mtr[-10] == 's' or mtr[-10] == 'x':
lsv = -10
else:
lsv = -1
you probably have an error:
elif mtr[-2] == 's' or mtr[-3] == 'x': # both indices have to be -2
and you can shorten it, too:
for i in range(-1, -11, -1):
if mtr[i] in 'sx':
lsv = i
break
else:
lsv = -1
(The else
clause in a for
loop is performed if and only if the loop finishes without encountering break
.)
Explore related questions
See similar questions with these tags.
rhyme_finder
function does not do anything. \$\endgroup\$