The live version is at my github
Lingo is a game where the host secretly picks a 5 letter word, then provides the first letter to the player. The player then guesses a word, and the host gives feedback on what letters are right, wrong, or in the wrong position.
I call this feedback match_string and use the following format:
's' = right letter, right position (to represent square)
'o' = right letter, wrong position (to represent circle)
'x' = letter is not in word. (to represent.. well.. X)
This is a cheater for Lingo. It loads data from the word list (in this case scrabble dictionary) to find potential words. It also provides guesses according to both the probability of characters in remaining words, and also an english word usage frequency.
from collections import defaultdict, Counter
from copy import copy
from math import log
from random import choices
import re
import os
import pickle
# VARIABLES
num_loops = 10**4
word_len = 5
max_guesses = 5
word_list = 'Collins Scrabble Words (2019).txt'
freq_list = 'all.num.o5.txt'
cache_file = 'cached.pickle'
# PROGRAM
def main():
wl = WordList()
# TODO: add a mode where human can play vs comp supplied words
print("""
1. [H]uman enters guesses and match strings from an external source
2. [C]omputer plays vs itself""")
while True:
i = input('Choice?').lower()
if i in ['1', 'h']:
human_player(wl)
elif i in ['2', 'c', '']:
CompPlay(wl).cp_main()
break
else:
print('Invalid Choice')
def human_player(wl):
while True:
first_letter = input('What\'s the first letter?').upper()
pc = PossCalculator(wl, first_letter)
while True:
pc.print_best(5)
guess = input('Guess?').upper()
if guess == '':
guess = first_letter + pc.get_best(1)[0][0]
print(f'Guessing: {guess}')
elif guess[1:] not in pc.poss:
print(guess, 'is not a valid word. Please try again')
continue
match_string = input('Match String?').lower()
if not re.search(r'[sox]{'+str(word_len)+'}', match_string):
print('invalid match string. Please try again')
num_poss = pc.calc_matches(guess, match_string)
if num_poss == 1:
print(f' -={guess}=-')
break
print(f' {num_poss} words left')
if num_poss == 0:
print(' WTF did you do?')
break
def str_pos_sub(string, pos, sub):
return string[:pos] + sub + string[pos + 1:]
class CompPlay:
def __init__(self, wl):
self.wl = wl
def cp_main(self):
guess_counter = Counter()
for _ in range(num_loops):
word = self.get_word()[0]
print(f'Word is: {word}')
pc = PossCalculator(self.wl, word[0])
guesses = []
while True:
guess = word[0] + pc.get_best(1)[0][0]
if guess in guesses:
pc.poss.discard(guess[1:])
continue
guesses.append(guess)
if len(guesses) > max_guesses:
print(' :( too many guesses')
guess_counter['DQ'] += 1
break
elif guess == word:
print(f' -={word}=-')
print(f' {len(guesses)} guesses')
guess_counter[len(guesses)] += 1
break
match_string = self.get_match_string(word, guess)
num_poss = pc.calc_matches(guess, match_string)
print(f' {guess}\t{match_string}\t{num_poss} words left')
if word[1:] not in pc.poss:
print(' WTF did you do?')
guess_counter['WTF'] += 1
break
print('\n')
for guesses, count in guess_counter.most_common():
print(f'{count:5d} solved in {guesses} guesses')
def get_match_string(self, word, guess):
match_string = '.' * word_len
for pos in range(word_len):
if guess[pos] == word[pos]:
match_string = str_pos_sub(match_string, pos, 's')
word = word.replace(word[pos], '.', 1)
for pos in range(word_len):
if match_string[pos] != '.':
continue
elif guess[pos] in word[1:]:
match_string = str_pos_sub(match_string, pos, 'o')
word = word.replace(guess[pos], '.', 1)
else:
match_string = str_pos_sub(match_string, pos, 'x')
return match_string
def get_word(self):
return choices(
list(self.wl.word_freq.keys()), # population
list(self.wl.word_freq.values()), # weights # TODO: speedup by turning this into a cached cumulative list
)
class PossCalculator:
def __init__(self, wl, first_letter):
self.wl = wl
self.first_letter = first_letter
self.poss = copy(wl.starts_with(first_letter))
print(f' starting letter {first_letter}, {len(self.poss)} words left')
def calc_matches(self, guess, match_string):
guess = guess[1:]
match_string = match_string[1:]
poss_copy = copy(self.poss)
for word in poss_copy:
if not self.check_valid(guess, match_string, word):
self.poss.remove(word)
return len(self.poss)
def check_valid(self, guess, match_string, word):
pos_dict = {
's': [],
'o': [],
'x': [],
}
for pos, char in enumerate(match_string):
pos_dict[char].append(pos)
for pos in pos_dict['s']:
if guess[pos] == word[pos]:
word = str_pos_sub(word, pos, '.')
else:
return False
for pos in pos_dict['o']:
if guess[pos] in word and guess[pos] != word[pos]:
word = word.replace(guess[pos], '.', 1)
else:
return False
for pos in pos_dict['x']:
if guess[pos] in word:
return False
# You have passed the three trials of the match_string. You have proven yourself.
return True
def get_best(self, n):
char_score = Counter()
for word in self.poss:
for char in set(word):
char_score[char] += 1
word_scores = Counter()
for word in self.poss:
word_set = set(word)
for char in word_set:
word_scores[word] += char_score[char]
word_scores[word] *= (len(word_set) + 1)
avg_word_score = int(sum(word_scores.values()) / len(word_scores))
for word, score in word_scores.items():
word_scores[word] = int(score / avg_word_score * 130)
word_scores[word] += self.wl.word_freq[self.first_letter + word]
return word_scores.most_common(n)
def print_best(self, n):
for word, score in self.get_best(n):
print(f'{self.first_letter}{word}\t{score}')
class WordList:
def __init__(self):
if os.path.exists(cache_file):
print('Loading cached wordlist!') # TODO: pickle doesn't want to dump the variables, see below
# with open(cache_file, 'rb') as f:
# self.word_dict = pickle.load(f)
# self.word_freq = pickle.load(f)
else:
print('Building wordlist!')
self.build_wordlists()
def build_wordlists(self):
self.word_dict = defaultdict(set)
# word_dict is {first_letter: [rest_of_word1, rest_of_word2]}
with open(word_list) as f:
for word in f:
if len(word) == word_len+1:
self.word_dict[word[0]].add(word[1:word_len])
# we already know the first letter, so cut off with [1:]
# there's a newline while reading, so cut it off with [:5]
self.word_freq = defaultdict(lambda: 40)
with open(freq_list) as f:
for line in f:
line = line.split()
if len(line[1]) == word_len:
word = line[1].upper()
if word[1:] in self.word_dict[word[0]]:
self.word_freq[word] = int(log(int(line[0]), 6) * 40)
for word in self.word_freq:
assert word[1:] in self.word_dict[word[0]]
# with open(cache_file, 'wb') as f:
# pickle.dump((self.word_dict, self.word_freq), f)
def starts_with(self, first_letter):
return self.word_dict[first_letter]
if __name__ == '__main__':
main()
1 Answer 1
The quality of your code is good.
You have a clear consistent style, which looks quite PEP 8 compatible.
The one glaring change I'd make is your variables at the top of the file are actually constants.
As such the names would be UPPER_SNAKE_CASE
if you decide to follow PEP 8 here.
If we look at individual lines or functions of code stand alone your code is fairly strong. You don't have glaring issues by using poor 'line-level' patterns.
However the code is hard to read and understand.
I think you've focused on getting the cheating working, as such you've not put much thought into the overall structure.
For example you have duplicate logic in human_player
and CompPlay.cp_main
.
You can build a common interface between the two methods of play.
Lets look into how we could build a common interface from the description in your question.
choose_word
Lingo is a game where the host secretly picks a 5 letter word, then provides the first letter to the player.
get_guess
The player then guesses a word,
get_match
and the host gives feedback on what letters are right, wrong, or in the wrong position.
is_match
Presumably we want to stop playing once we guess the right word.complete
The host would congratulate winners, we can here.
Here's an example implementation in Python.
from typing import Protocol
class IGame(Protocol):
def choose_word(self) -> int: ...
def get_guess(self, size: int) -> str: ...
def get_match(self, guess: str) -> str: ...
def is_match(self, guess: str, match: str) -> bool: ...
def complete(self, guess: str) -> None: ...
class HumanGame:
known_words: list[str]
def __init__(self, known_words: list[str]) -> None:
self.known_words = known_words
def choose_word(self) -> int:
self.word = random.choice(self.known_words)
print(f"Computer has chosen a word starting with {self.word[0]}")
return len(self.word)
def get_guess(self, size: int) -> str:
while True:
guess = input(f"Guess a {size} letter word? ").upper()
if len(guess) != size:
print("Invalid length guess")
continue
break
return guess
def get_match(self, guess: str) -> str:
characters = set(self.word)
return "".join([
"s"
if g == w else
"o"
if g in characters else
"x"
for g, w in zip(guess, self.word)
])
def is_match(self, guess: str, match: str) -> bool:
return match == "s" * len(guess)
def complete(self, guess: str) -> None:
print(f"You are correct, {guess} is the word.")
def game_main(game: IGame):
size = game.choose_word()
while True:
guess = game.get_guess(size)
match = game.get_match(guess)
if game.is_match(guess, match):
break
game.complete()
def main():
game_main(HumanGame(["CAT", "BAT", "WAT"]))
if __name__ == "__main__":
main()
From here we can then focus on picking the best. I find your code quite complicated.
The code has two highly coupled classes to deal with picking the best. Some of the decisions around how to store the data make the code quite complex. And the code has a lack of Single Responsibility Principle (SRP) being applied.
Lets focus just on word_list
;
- I cannot pass a list of words to
WordList
. (impaired functionality) - The code reads from the global
word_list
. (impaired functionality) - The class coupled the frequency when building the word list. (increased complexity)
- You can only read from a file applying your filters straight away. (impaired reusability)
- You filter the output in
PossCalculator
. (increased complexity)
Lets focus on what you do to the word list:
- Read from a file.
- Filter words by length.
- Filter words by starting letter.
(You actually group and then index a dictionary, but functionally the same thing.) - Filter by
match_string
andguess
. - Give each word a score based on letters.
As such we can build a single class to do everything.
from __future__ import annotations
class Words(list[str]):
@classmethod
def from_path(cls, path: str) -> Words:
with open(path) as f:
return cls([word.strip() for word in f])
def filter_length(self, size: int) -> Words:
return Words([
word
for word in self
if len(word) == size
])
def filter_start(self, start: str) -> Words:
return Words([
word
for word in self
if word.startswith(start)
])
def filter_match(self, guess: str, match: str):
return Words([
word
for word in self
if self._check_valid(guess, match, word)
])
@staticmethod
def _check_valid(guess: str, match: str, word: str) -> bool:
pos_dict = {
's': [],
'o': [],
'x': [],
}
for pos, char in enumerate(match):
pos_dict[char].append(pos)
for pos in pos_dict['s']:
if guess[pos] == word[pos]:
word = word[:pos] + '.' + word[pos + 1:]
else:
return False
for pos in pos_dict['o']:
if guess[pos] in word and guess[pos] != word[pos]:
word = word.replace(guess[pos], '.', 1)
else:
return False
for pos in pos_dict['x']:
if guess[pos] in word:
return False
return True
def get_scores(self) -> collections.Counter[str, int]:
char_score = collections.Counter()
for word in self:
for char in set(word):
char_score[char] += 1
word_scores = collections.Counter()
for word in self:
word_set = set(word)
for char in word_set:
word_scores[word] += char_score[char]
return word_scores
I have gone on to change to roughly reimplement CompPlay
with the above changes. So you can see how I'd use the code.
Note: all code blocks are untested
from __future__ import annotations
import random
import math
import collections
from typing import Iterable, Protocol
class IGame(Protocol):
def choose_word(self) -> int: ...
def get_guess(self, size: int) -> str: ...
def get_match(self, guess: str) -> str: ...
def is_match(self, guess: str, match: str) -> bool: ...
def complete(self, guess: str) -> None: ...
class HumanGame:
known_words: list[str]
def __init__(self, known_words: list[str]) -> None:
self.known_words = known_words
def choose_word(self) -> int:
self.word = random.choice(self.known_words)
print(f"Computer has chosen a word starting with {self.word[0]}")
return len(self.word)
def get_guess(self, size: int) -> str:
while True:
guess = input(f"Guess a {size} letter word? ").upper()
if len(guess) != size:
print("Invalid length guess")
continue
break
return guess
def get_match(self, guess: str) -> str:
characters = set(self.word)
return "".join([
"s"
if g == w else
"o"
if g in characters else
"x"
for g, w in zip(guess, self.word)
])
def is_match(self, guess: str, match: str) -> bool:
return match == "s" * len(guess)
def complete(self, guess: str) -> None:
print(f"You are correct, {guess} is the word.")
def game_main(game: IGame):
size = game.choose_word()
while True:
guess = game.get_guess(size)
match = game.get_match(guess)
if game.is_match(guess, match):
break
game.complete()
class Words(list[str]):
@classmethod
def from_path(cls, path: str) -> Words:
with open(path) as f:
return cls([word.strip() for word in f])
def filter_length(self, size: int) -> Words:
return Words([
word
for word in self
if len(word) == size
])
def filter_start(self, start: str) -> Words:
return Words([
word
for word in self
if word.startswith(start)
])
def filter_match(self, guess: str, match: str):
return Words([
word
for word in self
if self._check_valid(guess, match, word)
])
@staticmethod
def _check_valid(guess: str, match: str, word: str) -> bool:
pos_dict = {
's': [],
'o': [],
'x': [],
}
for pos, char in enumerate(match):
pos_dict[char].append(pos)
for pos in pos_dict['s']:
if guess[pos] == word[pos]:
word = word[:pos] + '.' + word[pos + 1:]
else:
return False
for pos in pos_dict['o']:
if guess[pos] in word and guess[pos] != word[pos]:
word = word.replace(guess[pos], '.', 1)
else:
return False
for pos in pos_dict['x']:
if guess[pos] in word:
return False
return True
def get_scores(self) -> collections.Counter[str, int]:
char_score = collections.Counter()
for word in self:
for char in set(word):
char_score[char] += 1
word_scores = collections.Counter()
for word in self:
word_set = set(word)
for char in word_set:
word_scores[word] += char_score[char]
return word_scores
class Frequencies(dict[str, int]):
@classmethod
def from_frequencies(cls, frequencies: Iterable[tuple[int, str]]) -> Frequencies:
return cls({
word: int(math.log(frequency, 6) * 40)
for frequency, word in frequencies
})
@classmethod
def from_path(cls, path: str) -> Frequencies:
with open(path) as f:
return cls.from_frequencies(
(int(split[0]), split[1])
for line in f
if (split := line.split())
)
class PossCalculator:
def __init__(self, words: Words, freqs: Frequencies) -> None:
self.words = words
self.freqs = freqs
def calc_matches(self, guess: str, match: str) -> int:
self.words = self.words.filter_match(guess, match)
return len(self.words)
def get_bests(self) -> collections.Counter[str, int]:
word_scores = self.words.get_scores()
avg_word_score = int(sum(word_scores.values()) / len(word_scores))
for word, score in word_scores.items():
word_scores[word] = int(score / avg_word_score * 130)
word_scores[word] += self.freqs[word]
return word_scores
def get_best(self) -> str:
return self.get_bests().most_common(1)[0][0]
def print_best(self, n):
for word, score in self.get_bests().most_common(n):
print(f'{word}\t{score}')
class ComputerGame:
pos: PossCalculator
word: str
guesses: set[str]
def __init__(self, pos: PossCalculator) -> None:
self.pos = pos
self.guesses = set()
def choose_word(self) -> int:
self.word = random.choices(
list(self.pos.freqs.keys()),
list(self.pos.freqs.values()),
)[0]
print(f'Word is: {self.word}')
return len(self.word)
def get_guess(self, size: int) -> str:
while True:
guess = self.pos.get_best()
if guess in self.guesses:
continue
self.guesses.add(guess)
return guess
def get_match(self, guess: str) -> str:
characters = set(self.word)
return "".join([
"s"
if g == w else
"o"
if g in characters else
"x"
for g, w in zip(guess, self.word)
])
def is_match(self, guess: str, match: str) -> bool:
return match == "s" * len(guess)
def complete(self, guess: str) -> None:
print(f'{self.word} solved in {len(self.guesses)} guesses')
def main():
WORD_LIST = 'Collins Scrabble Words (2019).txt'
FREQ_LIST = 'all.num.o5.txt'
words = Words.from_path(WORD_LIST).filter_length(5)
frequencies = Frequencies.from_path(FREQ_LIST)
game_main(ComputerGame(PossCalculator(words, frequencies)))
if __name__ == "__main__":
main()
-
\$\begingroup\$ Thank you for taking the time to write all that out! some of it makes sense, and some of it makes my head spin. \$\endgroup\$Tom L– Tom L2021年05月23日 19:28:46 +00:00Commented May 23, 2021 at 19:28
-
\$\begingroup\$ Apparently enter finishes the comment... anyway, i've never completely understood the variable type syntax you have, though i've seen it in popups while using pycharm... good to know that that's an actual thing, and i can see where that'd be useful instead of the occasional comment 'this is how this variable is structured'. You mentioned SRP, which I understand is part of OOP in general, but I've yet to find a good resource that i grok how to explain what should be in which classes, and where they go in a hierarchy. More reading I suppose. \$\endgroup\$Tom L– Tom L2021年05月23日 19:31:48 +00:00Commented May 23, 2021 at 19:31
-
\$\begingroup\$ @TomL "i've never completely understood the variable type syntax you have" is ok. I've only used type hints to make the code easier for me to write, feel free to ignore anything
typing
related. SRP is paradigm independent (has nothing to do with OOP). Single Responsibility Principle just says a single unit of code (a class, function, method, etc.) should have one responsibility. Contrastbuild_wordlists
(which builds two different datatypes) and how the method's I've written only do one task, where the consumer ofWords
tells the class how to behave for your business goals. \$\endgroup\$2021年05月23日 21:12:29 +00:00Commented May 23, 2021 at 21:12