Instead of using re.sub you can split it!
def do_something_with_str(string):
# do something with string here.
# for example let's wrap the string with "@" symbol if it's not empty
return f"@{string}" if string else string
def get_replaced_list(string, words):
result = [(string, True), ]
# we take each word we want to replace
for w in words:
new_result = []
# Getting each word in old result
for r in result:
# Now we split every string in results using our word.
split_list = list((x, True) for x in r[0].split(w)) if r[1] else list([r, ])
# If we replace successfully - add all the strings
if len(split_list) > 1:
# This one would be for [text, replaced, text, replaced...]
sub_result = []
ws = [(w, False), ] * (len(split_list) - 1)
for x, replaced in zip(split_list, ws):
sub_result.append(x)
sub_result.append(replaced)
sub_result.append(split_list[-1])
# Add to new result
new_result.extend(sub_result)
# If not - just add it to results
else:
new_result.extend(split_list)
result = new_result
return result
if __name__ == '__main__':
initial_string = 'acbbcbbcacbbcbbcacbbcbbca'
words_to_replace = ('a', 'c')
replaced_list = get_replaced_list(initial_string, words_to_replace)
modified_list = [(do_something_with_str(x[0]), True) if x[1] else x for x in replaced_list]
final_string = ''.join([x[0] for x in modified_list])
Here's variables values of the example above:
initial_string = 'acbbcbbcacbbcbbcacbbcbbca'
words_to_replace = ('a', 'c')
replaced_list = [('', True), ('a', False), ('', True), ('c', False), ('bb', True), ('c', False), ('bb', True), ('c', False), ('', True), ('a', False), ('', True), ('c', False), ('bb', True), ('c', False), ('bb', True), ('c', False), ('', True), ('a', False), ('', True), ('c', False), ('bb', True), ('c', False), ('bb', True), ('c', False), ('', True), ('a', False), ('', True)]
modified_list = [('', True), ('a', False), ('', True), ('c', False), ('@bb', True), ('c', False), ('@bb', True), ('c', False), ('', True), ('a', False), ('', True), ('c', False), ('@bb', True), ('c', False), ('@bb', True), ('c', False), ('', True), ('a', False), ('', True), ('c', False), ('@bb', True), ('c', False), ('@bb', True), ('c', False), ('', True), ('a', False), ('', True)]
final_string = 'ac@bbc@bbcac@bbc@bbcac@bbc@bbca'
As you can see the lists contain tuples. They contain two values - some string and boolean, representing whether it's a text or replaced value (True when text).
After you get replaced list, you can modify it as in the example, checking if it's text value (if x[1] == True).
Hope that helps!
P.S. String formatting like f"some string here {some_variable_here}" requires Python 3.6
lang-py