I have a superclass and a subclass that need to handle their initialisation differently, based on a regular expression. See below for a working example.
import os
import re
class Sample:
RE = r'(?P<id>\d+)'
STRICT_MATCHING = False
def __init__(self, f):
self.file = f
self.basename = os.path.basename(os.path.splitext(self.file)[0])
re_ = re.compile(self.RE)
match = re_.fullmatch if self.STRICT_MATCHING else re_.match
self.__dict__.update(match(self.basename).groupdict())
class DetailedSample(Sample):
RE = r'(?P<id>\d+)_(?P<dir>[lr])_(?P<n>\d+)'
STRICT_MATCHING = True
s1 = Sample("/asdf/2.jpg")
print(s1.id)
s2 = DetailedSample("/asdfadsf/2_l_2.jpg")
print(s2.id, s2.dir, s2.n)
This code works but it has two drawbacks:
- The regular expression gets recompiled every time a new
Sampleis initialised. - The
matchfunction cannot be called from other class methods inSample(for instance, I might want to be able to check whether a file has a valid name - relative toRE- before initialising aSamplefrom it).
To put it simply, I'd like to have something like this:
class Sample:
RE = r'(?P<id>\d+)'
STRICT_MATCHING = False
re_ = re.compile(RE) #
match = re_.fullmatch if STRICT_MATCHING else re_.match #
def __init__(self, f):
self.file = f
self.basename = os.path.basename(os.path.splitext(self.file)[0])
self.__dict__.update(self.match(self.basename).groupdict())
@classmethod
def valid(cls, f):
basename, ext = os.path.splitext(os.path.basename(f))
return cls.match(basename) and ext.lower() in ('.jpg', '.jpeg', '.png')
class DetailedSample(Sample):
RE = r'(?P<id>\d+)_(?P<dir>[lr])_(?P<n>\d+)'
STRICT_MATCHING = True
This, however, obviously won't work in the subclasses, because the two lines marked with # won't execute after the redefinition of RE and STRICT_MATCHING in the subclass.
Is there an approach that will:
- keep the functionality of the first approach (i.e. regex-based intialisation);
- only compile the regex and define the match method once per subclass;
- allow the match method to be called from class methods;
- only require the redefinition of the regex string and
STRICT_MATCHINGparameter in the subclasses?
3 Answers 3
You can use __init_subclass__ to make sure each subclass does the appropriate work. This would be defined in a private base class that your public base class inherits from.
import os
import re
class _BaseSample:
RE = r'(?P<id>\d+)'
STRICT_MATCHING = False
def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
cls._re = re.compile(cls.RE)
cls.match = cls._re.fullmatch if cls.STRICT_MATCHING else cls._re.match
class Sample(_BaseSample):
def __init__(self, f):
self.file = f
self.basename = os.path.basename(os.path.splitext(self.file)[0]
self.__dict__.update(self.match(self.basename).groupdict())
class DetailedSample(Sample):
RE = r'(?P<id>\d+)_(?P<dir>[lr])_(?P<n>\d+)'
STRICT_MATCHING = True
s1 = Sample("/asdf/2.jpg")
print(s1.id)
s2 = DetailedSample("/asdfadsf/2_l_2.jpg")
print(s2.id, s2.dir, s2.n)
Unless you need direct access to the compiled regular expression later, _re can be a local variable to _BaseSample.__init_subclass__ rather than a class attribute of each class.
Note that __init_subclass__ can also take additional keyword arguments, supplied as keyword arguments to the class statement itself. I don't think there is any particular benefit to doing that; it's just a matter of what interface you want to provide for setting RE and STRICT_MATCHING. See Customizing Class Creation for details.
2 Comments
__init_subclass__ - you're right, this is the probably best way to do it.__init_subclass__ is only available in python>=3.6, so in older versions, you'd have to go with one of the class decorator approaches.You can do this by decorating the classes.
This decorator inspects the STRICT_MATCHING attribute and sets the match attribute accordingly.
def set_match(cls):
match = cls.RE.fullmatch if cls.STRICT_MATCHING else cls.RE.match
setattr(cls, 'match', match)
return cls
@set_match
class Sample:
RE = re.compile(r'(?P<id>\d+)')
STRICT_MATCHING = False
def __init__(self, f):
self.file = f
self.basename = os.path.basename(os.path.splitext(self.file)[0])
self.__dict__.update(self.match(self.basename).groupdict())
@set_match
class DetailedSample(Sample):
RE = re.compile(r'(?P<id>\d+)_(?P<dir>[lr])_(?P<n>\d+)')
STRICT_MATCHING = True
The same effect could be obtained using a metaclass:
class MetaMatchSetter(type):
def __new__(cls, clsname, bases, clsdict):
rgx = clsdict['RE']
match = rgx.fullmatch if clsdict['STRICT_MATCHING'] else rgx.match
clsdict['match'] = match
return super().__new__(cls, clsname, bases, clsdict)
class Sample(metaclass=MetaMatchSetter):
...
class DetailedSample(Sample):
...
But using a class decorator (or __init_subclass__ as described in chepner's answer) is more readable and understandable, in my view.
1 Comment
Sample and a class using a different metaclass could run into a conflict.You could cache/memoize the compiled regular expressions as mentioned on wiki.python.org and you need to use the class attributes instead if the instance attributes:
import os
import re
import functools
def memoize(obj):
cache = obj.cache = {}
@functools.wraps(obj)
def memoizer(*args, **kwargs):
if args not in cache:
cache[args] = obj(*args, **kwargs)
return cache[args]
return memoizer
@memoize
def myRegExpCompiler(*args):
print("compiling")
return re.compile(*args)
class Sample:
RE = r'(?P<id>\d+)'
STRICT_MATCHING = False
def __init__(self, f):
self.file = f
self.basename = os.path.basename(os.path.splitext(self.file)[0])
re_ = myRegExpCompiler(self.__class__.RE) # use cls method!
match = re_.fullmatch if self.__class__.STRICT_MATCHING else re_.match # use cls method!
self.__dict__.update(match(self.basename).groupdict())
class DetailedSample(Sample):
RE = r'(?P<id>\d+)_(?P<dir>[lr])_(?P<n>\d+)'
STRICT_MATCHING = True
s1 = Sample("/asdf/2.jpg")
print(s1.id)
s2 = DetailedSample("/asdfadsf/2_l_2.jpg")
print(s2.id, s2.dir, s2.n)
s3 = DetailedSample("/asdfadsf/2_l_2.jpg")
print(s3.id, s3.dir, s3.n)
Output:
compiling
2
compiling
2 l 2
2 l 2
... as you can see the reg. expressions are compiled just two times.
Comments
Explore related questions
See similar questions with these tags.
RE = ...up tomatch =are only evaluated once on module load. Therefore changingRE = ...andSTRICT_MATCHING = ...has no effect.RE = re.compile(r'...')yourself, rather than having the class callre.compilefor you.__init_subclass__was introduced precisely to avoid the need for a full-blown metaclass here :)remodule keeps cached the most recently compiled patterns, so there may be no penalty to repeatedly compiling the same few patterns over and over (it doesn't actually do extra work if they're still in the cache).