Return to Answer

More idiomatic dict lookup fallback

edited Jan 2, 2014 at 22:12

145.5k
22
190
478

I find your error handling is a bit counterintuitive: if the input string is invalid, you return an "empty" DocumentFeature, but if there are too many tokens, it raises an exception. I would raise exceptions in both cases, and let the caller decide what to do.

I think that you're being too pessimistic in validating your inputs: the validation repeats some of the real work that would be done anyway. Furthermore, counting slashes and underscores is insufficient validation — for example, "word1_word2//" passes the initial validation, only to fail at tuple(Token(word, pos) for (word, pos) in bits). Instead, I would suggest validating as you perform the transformations.

_TYPES = dict([
 ('NVN', 'SVO'), ('JN', 'AN'), ('VN', 'VO'), ('NN', 'NN')
])
@classmethod
def from_string(cls, string):
 """
 Takes a string representing a DocumentFeature and creates and object out of it. String format is
 "word/PoS" or "word1/PoS1_word2/PoS2",... The type of the feature will be inferred from the length and
 PoS tags of the input string. 
 :type string: str
 """
 try:
 tokens = string.strip().split('_')
 if len(tokens) > 3:
 raise ValueError('Document feature %s is too long' % string)
 tokens = [token.split('/') for token in tokens]
 # Check for too many slashes, too few slashes, or empty words
 if not all(map(lambda token: len(token) == 2 and token[0], tokens)):
 raise ValueError('Invalid document feature %s' % string)
 tokens = tuple(Token(word, pos) for (word, pos) in tokens)
 type = cls._TYPES.get(''.join([t.pos for t in tokens])) or \,
 ('EMPTY', '1-GRAM', '2-GRAM', '3-GRAM')[len(tokens)])
 except:
 logging.error('Cannot create token out of string %s', string)
 raise
 return DocumentFeature(type, tokens)

_TYPES = dict([
 ('NVN', 'SVO'), ('JN', 'AN'), ('VN', 'VO'), ('NN', 'NN')
])
@classmethod
def from_string(cls, string):
 """
 Takes a string representing a DocumentFeature and creates and object out of it. String format is
 "word/PoS" or "word1/PoS1_word2/PoS2",... The type of the feature will be inferred from the length and
 PoS tags of the input string. 
 :type string: str
 """
 try:
 tokens = string.strip().split('_')
 if len(tokens) > 3:
 raise ValueError('Document feature %s is too long' % string)
 tokens = [token.split('/') for token in tokens]
 # Check for too many slashes, too few slashes, or empty words
 if not all(map(lambda token: len(token) == 2 and token[0], tokens)):
 raise ValueError('Invalid document feature %s' % string)
 tokens = tuple(Token(word, pos) for (word, pos) in tokens)
 type = cls._TYPES.get(''.join([t.pos for t in tokens])) or \
 ('EMPTY', '1-GRAM', '2-GRAM', '3-GRAM')[len(tokens)]
 except:
 logging.error('Cannot create token out of string %s', string)
 raise
 return DocumentFeature(type, tokens)

_TYPES = dict([
 ('NVN', 'SVO'), ('JN', 'AN'), ('VN', 'VO'), ('NN', 'NN')
])
@classmethod
def from_string(cls, string):
 """
 Takes a string representing a DocumentFeature and creates and object out of it. String format is
 "word/PoS" or "word1/PoS1_word2/PoS2",... The type of the feature will be inferred from the length and
 PoS tags of the input string. 
 :type string: str
 """
 try:
 tokens = string.strip().split('_')
 if len(tokens) > 3:
 raise ValueError('Document feature %s is too long' % string)
 tokens = [token.split('/') for token in tokens]
 # Check for too many slashes, too few slashes, or empty words
 if not all(map(lambda token: len(token) == 2 and token[0], tokens)):
 raise ValueError('Invalid document feature %s' % string)
 tokens = tuple(Token(word, pos) for (word, pos) in tokens)
 type = cls._TYPES.get(''.join([t.pos for t in tokens]),
 ('EMPTY', '1-GRAM', '2-GRAM', '3-GRAM')[len(tokens)])
 except:
 logging.error('Cannot create token out of string %s', string)
 raise
 return DocumentFeature(type, tokens)

Source Link

answered Jan 2, 2014 at 20:44

200_success

answered Jan 2, 2014 at 20:44

200_success

145.5k
22
190
478

_TYPES = dict([
 ('NVN', 'SVO'), ('JN', 'AN'), ('VN', 'VO'), ('NN', 'NN')
])
@classmethod
def from_string(cls, string):
 """
 Takes a string representing a DocumentFeature and creates and object out of it. String format is
 "word/PoS" or "word1/PoS1_word2/PoS2",... The type of the feature will be inferred from the length and
 PoS tags of the input string. 
 :type string: str
 """
 try:
 tokens = string.strip().split('_')
 if len(tokens) > 3:
 raise ValueError('Document feature %s is too long' % string)
 tokens = [token.split('/') for token in tokens]
 # Check for too many slashes, too few slashes, or empty words
 if not all(map(lambda token: len(token) == 2 and token[0], tokens)):
 raise ValueError('Invalid document feature %s' % string)
 tokens = tuple(Token(word, pos) for (word, pos) in tokens)
 type = cls._TYPES.get(''.join([t.pos for t in tokens])) or \
 ('EMPTY', '1-GRAM', '2-GRAM', '3-GRAM')[len(tokens)]
 except:
 logging.error('Cannot create token out of string %s', string)
 raise
 return DocumentFeature(type, tokens)

lang-py