Song lyric generator using Markov Chains - Python

Question 1

I have written a pop song generator which uses the Markovify library to produce lyrics based on (just for testing purposes) songs by Avril Lavigne.

In order to make the generator a bit more competent I have adapted some code which uses the nltk library to determine whether or not two words rhyme. I have used this to make the verses conform to an ABAC rhyme scheme.

It works, but is slow and sometimes gets stuck in a loop when the rhyme_finder function is called. I would greatly appreciate any suggestions as to how to streamline the programme, and or make it more efficient. I have linked the main programme app.py below, as well as the rhyme_finder function. But the Avril Lavigne lyrics, and the nltk.tokenize lyrics are on Pastebin.

The Markovify lib (https://github.com/jsvine/markovify), the Sylco syllable counter (https://github.com/eaydin/sylco), and some nltk dependencies (cmudict) are required to run.

Lyric Generator

import random
import markovify
import ast
from rhyme import rhyme_finder
from sylco import sylco
# Open and model lyrics
with open('lavigne_verse.txt') as f:
 verse_text = f.read()
with open('lavigne_chorus.txt') as f:
 chorus_text = f.read()
with open('lyrics_tokenize.txt') as f:
 tokenized_text = f.read()
verse_model = markovify.NewlineText(verse_text, state_size=2)
chorus_model = markovify.NewlineText(chorus_text, state_size=2)
# Evaluate tokenized_text as a list
tokenized_text = ast.literal_eval(tokenized_text)
# Specify then remove punctuation
punc = set([',','.','"','?','!'])
def clean(str):
 if str[-1] in punc:
 return str[:-1]
 return str
# Generate line that rhymes with stem of verse line 1
def match_rhyme(stem, verse_model):
 # Check if rhymes exist
 try:
 ls = rhyme_finder(stem, tokenized_text)
 except KeyError:
 return None
 if not ls:
 return None
 # If rhymes exist generate lines
 for n in range(100):
 while True:
 rhyme_line = verse_model.make_sentence()
 if rhyme_line is not None:
 # Keep syllables within range
 syl_count = sylco(rhyme_line)
 if syl_count > 16 or syl_count < 6:
 continue
 # Get stem of rhyme_line
 rhyme_stem = clean(rhyme_line.rsplit(None, 1)[-1])
 # Check for rhyme
 if rhyme_stem in ls:
 return rhyme_line
 break
 return None
# Generate 4-line verse
def make_verse(verse_model):
 verse = ''
 stem = None
 # Markovify for each line
 for _ in range(4):
 while True:
 # Try to find rhyming match between lines 1 and 3
 if _ == 2:
 match = match_rhyme(stem, verse_model)
 # If match, add to verse.
 if match is not None:
 verse += (match + '\n')
 break
 # Otherwise add non-random markovify line
 line = verse_model.make_sentence()
 if line is not None:
 # Keep syllables within range
 syl_count = sylco(line)
 if syl_count > 16 or syl_count < 6:
 continue
 # Cache line for rhyming
 if _ == 0:
 stem = clean(line.rsplit(None, 1)[-1])
 verse += (line + '\n')
 break
 return verse
# Construct chorus
def make_chorus(chorus_model):
 chorus = '[Chorus]' + '\n'
 # Two short lines
 for _ in range(2):
 while True:
 line = chorus_model.make_sentence()
 if line is not None:
 # Keep syllables less than 11
 syl_count = sylco(line)
 if syl_count > 10:
 continue
 chorus += (line + '\n')
 break
 # Two line reprieve
 while True:
 repeat = chorus_model.make_sentence()
 if repeat is not None:
 chorus += (repeat + '\n')
 chorus += (repeat + '\n')
 break
 return chorus
# Construct song
def make_song(chorus_model, verse_model):
 song_chorus = make_chorus(chorus_model)
 song = make_verse(verse_model) + '\n' + song_chorus + '\n' \
 + make_verse(verse_model) + '\n' + make_verse(verse_model) + '\n'\
 + (2 * (song_chorus + '\n'))
 return song
print (make_song(chorus_model, verse_model))

Rhyme_finder

import random
import re
import operator
from nltk.corpus import cmudict
e = cmudict.entries()
d = cmudict.dict()
def make_word_list(tokenized_text):
 word_list = []
 for i in tokenized_text:
 try:
 d[i.lower()]
 except KeyError:
 pass
 else:
 if i.lower() == "'s":
 pass
 elif i[-1] == ".":
 pass
 else:
 word_list.append((i.lower(), d[i.lower()][0]))
 return word_list
def unique(s):
 u = []
 for x in s:
 if x not in u:
 u.append(x)
 else:
 pass
 return u
def meter(word):
 pron = d[word]
 m1 = []
 m2 = []
 mx = []
 if len(pron) == 1:
 for i in pron[0]:
 if '0' in i:
 m1.append(0)
 elif '1' in i:
 m1.append(1)
 elif '2' in i:
 m1.append(2)
 else:
 pass
 mx = [m1]
 elif len(pron) >= 2:
 for i in pron[0]:
 if '0' in i:
 m1.append(0)
 elif '1' in i:
 m1.append(1)
 elif '2' in i:
 m1.append(2)
 else:
 pass
 for i in pron[1]:
 if '0' in i:
 m2.append(0)
 elif '1' in i:
 m2.append(1)
 elif '2' in i:
 m2.append(2)
 else:
 pass
 mx = [m1, m2]
 m = []
 if len(mx) == 1:
 w0 = reduce(operator.mul, mx[0], 1)
 if w0 >= 2:
 for i in mx[0]:
 if i == 1:
 m.append('u')
 elif i == 2:
 m.append('s')
 elif w0 == 1:
 for i in mx[0]:
 m.append('s')
 elif w0 == 0:
 for i in mx[0]:
 if i == 0:
 m.append('u')
 elif i == 1 or i == 2:
 m.append('s')
 elif len(mx) == 2:
 w0 = reduce(operator.mul, mx[0], 1)
 w1 = reduce(operator.mul, mx[1], 1)
 if w0 >= 2 and w1 >= 2:
 for (i, j) in zip(mx[0], mx[1]):
 if i * j == 1:
 m.append('u')
 elif i * j == 4:
 m.append('s')
 elif i * j == 2:
 m.append('x')
 elif w0 == 1 and w1 == 1:
 for (i, j) in zip(mx[0], mx[1]):
 m.append('s')
 elif w0 == 0 and w1 == 0:
 for (i, j) in zip(mx[0], mx[1]):
 if i == j and i * j >= 1:
 m.append('s')
 elif i != j and i * j == 0:
 m.append('x')
 elif i == j and i * j == 0:
 m.append('u')
 elif w0 >= 2 and w1 == 0:
 for (i, j) in zip(mx[0], mx[1]):
 if i == 1 and j == 0:
 m.append('u')
 elif i == 2 and j == 0:
 m.append('x')
 elif i == 1 and j == 1:
 m.append('x')
 elif i == 1 and j == 2:
 m.append('x')
 elif i == 2 and j == 1:
 m.append('s')
 elif i == 2 and j == 2:
 m.append('s')
 elif w0 == 0 and w1 >= 2:
 for (i, j) in zip(mx[0], mx[1]):
 if i == 0 and j == 1:
 m.append('u')
 elif i == 0 and j == 2:
 m.append('x')
 elif i == 1 and j == 1:
 m.append('x')
 elif i == 2 and j == 1:
 m.append('x')
 elif i == 1 and j == 2:
 m.append('s')
 elif i == 2 and j == 2:
 m.append('s')
 elif w0 == 1 and w1 >= 2:
 for (i, j) in zip(mx[0], mx[1]):
 if j == 1:
 m.append('x')
 elif j == 2:
 m.append('s')
 elif w0 >= 2 and w1 == 1:
 for (i, j) in zip(mx[0], mx[1]):
 if i == 1:
 m.append('x')
 elif i == 2:
 m.append('s')
 elif w0 == 1 and w1 == 0:
 for (i, j) in zip(mx[0], mx[1]):
 if j == 0:
 m.append('x')
 elif j == 1:
 m.append('s')
 elif j == 2:
 m.append('s')
 elif w0 == 0 and w1 == 1:
 for (i, j) in zip(mx[0], mx[1]):
 if i == 0:
 m.append('x')
 if i == 1:
 m.append('s')
 if i == 2:
 m.append('s')
 return m
def strip_numbers(x):
 xj = '.'.join(x)
 xl = re.split('0|1|2', xj)
 xjx = ''.join(xl)
 xlx = xjx.split('.')
 return xlx
def last_stressed_vowel(word):
 if len(d[word]) <= 1:
 pron = d[word][0]
 else:
 p0 = d[word][0]
 p1 = d[word][1]
 sj0 = ''.join(p0)
 sl0 = re.split('0|1|2', sj0)
 sj1 = ''.join(p1)
 sl1 = re.split('0|1|2', sj1)
 if len(sl1) < len(sl0):
 pron = p1
 else:
 pron = p0
 mtr = meter(word)
 vowel_index = []
 if len(mtr) == 1:
 lsv = -1
 elif mtr[-1] == 's' or mtr[-1] == 'x':
 lsv = -1
 elif mtr[-2] == 's' or mtr[-3] == 'x':
 lsv = -2
 elif mtr[-3] == 's' or mtr[-3] == 'x':
 lsv = -3
 elif mtr[-4] == 's' or mtr[-4] == 'x':
 lsv = -4
 elif mtr[-5] == 's' or mtr[-5] == 'x':
 lsv = -5
 elif mtr[-6] == 's' or mtr[-6] == 'x':
 lsv = -6
 elif mtr[-7] == 's' or mtr[-7] == 'x':
 lsv = -7
 elif mtr[-8] == 's' or mtr[-8] == 'x':
 lsv = -8
 elif mtr[-9] == 's' or mtr[-9] == 'x':
 lsv = -9
 elif mtr[-10] == 's' or mtr[-10] == 'x':
 lsv = -10
 else:
 lsv = -1
 for i in pron:
 if '0' in i or '1' in i or '2' in i:
 vowel_index.append(pron.index(i))
 else:
 continue
 return vowel_index[lsv]
def rhyme_finder(word, tokenized_text):
 word_list = make_word_list(tokenized_text)
 word_list_u = unique(word_list)
 rhyming_words = []
 if len(d[word]) <= 1:
 pron = d[word][0]
 else:
 p0 = d[word][0]
 p1 = d[word][1]
 sj0 = ''.join(p0)
 sl0 = re.split('0|1|2', sj0)
 sj1 = ''.join(p1)
 sl1 = re.split('0|1|2', sj1)
 if len(sl1) < len(sl0):
 pron = p1
 else:
 pron = p0
 pron = strip_numbers(pron)
 lsv = last_stressed_vowel(word)
 rhyme_part = pron[lsv:]
 lrp = len(rhyme_part) * -1
 for (x, y) in word_list_u:
 ps = strip_numbers(y)
 if ps[lrp:] == rhyme_part and ps[lrp-1:] != pron[lsv-1:]:
 rhyming_words.append(x)
 else:
 pass
 rw = [i for i in rhyming_words if not i == word]
 return rw

lavigne_verse.txt = http://pastebin.com/HkXzr1eB

lavigne_chorus.txt = http://pastebin.com/apD7QxHC

lavigne_tokenize.txt = http://pastebin.com/khzjndPT

Many thanks!

Question 2

Instead of "if line is not None:" you could simply go for "if line:". Further, you could some more functions for all the if elifs such as in meter(word) function. This would improve readability.

Question 3

Yes you're right. Also just realised that the syllable counter in the rhyme_finder function does not do anything.

Question 4

Some suggestions:

def unique(s):
 u = []
 for x in s:
 if x not in u:
 u.append(x)
 else:
 pass
 return u

may become much simpler:

def unique(s):
 return list(set(s))

but - as you use it only in one place

word_list_u = unique(word_list)

and in turn the word_list_u is used only in one place, too:

for (x, y) in word_list_u:

where word_list_u need not be a list.

So you may delete both the unique() definition and its usage, and write directly

for (x, y) in set(word_list):

as sets in Python inherently not allow duplicates.

punc = set([',','.','"','?','!'])
def clean(str):
 if str[-1] in punc:
 return str[:-1]
 return str

would be clearer with using the endswith() method and a tuple of punctuation symbols:

punc = tuple(',."?!')
def clean(str):
 if str.endswith(punc):
 return str[:-1]
 return str

Question 5

In this part of your code

elif mtr[-1] == 's' or mtr[-1] == 'x':
 lsv = -1
elif mtr[-2] == 's' or mtr[-3] == 'x':
 lsv = -2
elif mtr[-3] == 's' or mtr[-3] == 'x':
 lsv = -3
elif mtr[-4] == 's' or mtr[-4] == 'x':
 lsv = -4
elif mtr[-5] == 's' or mtr[-5] == 'x':
 lsv = -5
elif mtr[-6] == 's' or mtr[-6] == 'x':
 lsv = -6
elif mtr[-7] == 's' or mtr[-7] == 'x':
 lsv = -7
elif mtr[-8] == 's' or mtr[-8] == 'x':
 lsv = -8
elif mtr[-9] == 's' or mtr[-9] == 'x':
 lsv = -9
elif mtr[-10] == 's' or mtr[-10] == 'x':
 lsv = -10
else:
 lsv = -1

you probably have an error:

elif mtr[-2] == 's' or mtr[-3] == 'x': # both indices have to be -2

and you can shorten it, too:

for i in range(-1, -11, -1):
 if mtr[i] in 'sx':
 lsv = i
 break
else:
 lsv = -1

(The else clause in a for loop is performed if and only if the loop finishes without encountering break.)

MarianD MarianD 1,9561 gold badge11 silver badges20 bronze badges · Answer 1 · 2017-08-16 18:05:32Z

Some suggestions:

def unique(s):
 u = []
 for x in s:
 if x not in u:
 u.append(x)
 else:
 pass
 return u

may become much simpler:

def unique(s):
 return list(set(s))

but - as you use it only in one place

word_list_u = unique(word_list)

and in turn the word_list_u is used only in one place, too:

for (x, y) in word_list_u:

where word_list_u need not be a list.

So you may delete both the unique() definition and its usage, and write directly

for (x, y) in set(word_list):

as sets in Python inherently not allow duplicates.

punc = set([',','.','"','?','!'])
def clean(str):
 if str[-1] in punc:
 return str[:-1]
 return str

would be clearer with using the endswith() method and a tuple of punctuation symbols:

punc = tuple(',."?!')
def clean(str):
 if str.endswith(punc):
 return str[:-1]
 return str

MarianD MarianD 1,9561 gold badge11 silver badges20 bronze badges · Answer 2 · 2017-08-16 18:43:51Z

In this part of your code

elif mtr[-1] == 's' or mtr[-1] == 'x':
 lsv = -1
elif mtr[-2] == 's' or mtr[-3] == 'x':
 lsv = -2
elif mtr[-3] == 's' or mtr[-3] == 'x':
 lsv = -3
elif mtr[-4] == 's' or mtr[-4] == 'x':
 lsv = -4
elif mtr[-5] == 's' or mtr[-5] == 'x':
 lsv = -5
elif mtr[-6] == 's' or mtr[-6] == 'x':
 lsv = -6
elif mtr[-7] == 's' or mtr[-7] == 'x':
 lsv = -7
elif mtr[-8] == 's' or mtr[-8] == 'x':
 lsv = -8
elif mtr[-9] == 's' or mtr[-9] == 'x':
 lsv = -9
elif mtr[-10] == 's' or mtr[-10] == 'x':
 lsv = -10
else:
 lsv = -1

you probably have an error:

elif mtr[-2] == 's' or mtr[-3] == 'x': # both indices have to be -2

and you can shorten it, too:

for i in range(-1, -11, -1):
 if mtr[i] in 'sx':
 lsv = i
 break
else:
 lsv = -1

(The else clause in a for loop is performed if and only if the loop finishes without encountering break.)

Stack Exchange Network

Song lyric generator using Markov Chains - Python

2 Answers 2

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Song lyric generator using Markov Chains - Python

2 Answers 2

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions