Python splitting list to sublist at start and end keyword patterns

Question 1

If I were to have a list, say:

lst = ['foo', 'bar', '!test', 'hello', 'world!', 'word']

with a character of !, how would I return a list given:

lst = ['foo', 'bar', ['test', 'hello', 'world'], 'word']

I'm having some difficulty finding a solution for this. Here's one approach I've tried:

def define(lst):
 for index, item in enumerate(lst):
 if item[0] == '!' and lst[index+2][-1] == '!':
 temp = lst[index:index+3]
 del lst[index+1:index+2]
 lst[index] = temp
 return lst

Any help would be greatly appreciated.

Question 2

Do you want your sublist to contain all elements between the two !s?

Question 3

can element start and end with ! like '!element!'?

Question 4

what if there is more opening elements than closing ones? should we check?

Question 5

Checks for matching numbers of opening/closing brackets is not needed, nested sublists are also not needed. An element cannot start and end with '!element!'

Question 6

Assuming that there is no elements which starts & ends with ! like '!foo!'.

First of all we can write helper predicates like

def is_starting_element(element):
 return element.startswith('!')
def is_ending_element(element):
 return element.endswith('!')

Then we can write generator-function (because they are awesome)

def walk(elements):
 elements = iter(elements) # making iterator from passed iterable
 for position, element in enumerate(elements):
 if is_starting_element(element):
 yield [element[1:], *walk(elements)]
 elif is_ending_element(element):
 yield element[:-1]
 return
 else:
 yield element

Tests:

>>> lst = ['foo', 'bar', '!test', 'hello', 'world!', 'word']
>>> list(walk(lst))
['foo', 'bar', ['test', 'hello', 'world'], 'word']
>>> lst = ['foo', 'bar', '!test', '!hello', 'world!', 'word!']
>>> list(walk(lst))
['foo', 'bar', ['test', ['hello', 'world'], 'word']]
>>> lst = ['hello!', 'world!']
>>> list(walk(lst))
['hello']

as we can see from the last example if there are more closing elements than opening ones remaining closing elements will be ignored (this is because we're returning from generator). So if lst has invalid signature (difference between opening and closing elements is not equal to zero) then we can have some unpredictable behavior. As a way out of this situation we can validate given data before processing and raise error if data is invalid.

We can write validator like

def validate_elements(elements):
 def get_sign(element):
 if is_starting_element(element):
 return 1
 elif is_ending_element(element):
 return -1
 else:
 return 0
 signature = sum(map(get_sign, elements))
 are_elements_valid = signature == 0
 if not are_elements_valid:
 error_message = 'Data is invalid: '
 if signature > 0:
 error_message += ('there are more opening elements '
 'than closing ones.')
 else:
 error_message += ('there are more closing elements '
 'than opening ones.')
 raise ValueError(error_message)

Tests

>>> lst = ['!hello', 'world!']
>>> validate_elements(lst) # no exception raised, data is valid
>>> lst = ['!hello', '!world']
>>> validate_elements(lst)
...
ValueError: Data is invalid: there are more opening elements than closing ones.
>>> lst = ['hello!', 'world!']
>>> validate_elements(lst)
...
ValueError: Data is invalid: there are more closing elements than opening ones.

Finally we can write function with validation like

def to_sublists(elements):
 validate_elements(elements)
 return list(walk(elements))

Tests

>>> lst = ['foo', 'bar', '!test', 'hello', 'world!', 'word']
>>> to_sublists(lst)
['foo', 'bar', ['test', 'hello', 'world'], 'word']
>>> lst = ['foo', 'bar', '!test', '!hello', 'world!', 'word!']
>>> to_sublists(lst)
['foo', 'bar', ['test', ['hello', 'world'], 'word']]
>>> lst = ['hello!', 'world!']
>>> to_sublists(lst)
...
ValueError: Data is invalid: there are more closing elements than opening ones.

EDIT

If we want to handle elements which starts & ends with ! like '!bar!' we can modify walk function using itertools.chain like

from itertools import chain
def walk(elements):
 elements = iter(elements)
 for position, element in enumerate(elements):
 if is_starting_element(element):
 yield list(walk(chain([element[1:]], elements)))
 elif is_ending_element(element):
 element = element[:-1]
 yield element
 return
 else:
 yield element

also we need to complete validation by just modifying get_sign function

def get_sign(element):
 if is_starting_element(element):
 if is_ending_element(element):
 return 0
 return 1
 if is_ending_element(element):
 return -1
 return 0

Tests

>>> lst = ['foo', 'bar', '!test', '!baz!', 'hello', 'world!', 'word']
>>> to_sublists(lst)
['foo', 'bar', ['test', ['baz'], 'hello', 'world'], 'word']

Question 7

Here's an iterative solution that can handle arbitrarily nested lists:

def nest(lst, sep):
 current_list = []
 nested_lists = [current_list] # stack of nested lists
 for item in lst:
 if item.startswith(sep):
 if item.endswith(sep):
 item = item[len(sep):-len(sep)] # strip both separators
 current_list.append([item])
 else:
 # start a new nested list and push it onto the stack
 new_list = []
 current_list.append(new_list)
 current_list = new_list
 nested_lists.append(current_list)
 current_list.append(item[len(sep):]) # strip the separator
 elif item.endswith(sep):
 # finalize the deepest list and go up by one level
 current_list.append(item[:-len(sep)]) # strip the separator
 nested_lists.pop()
 current_list = nested_lists[-1]
 else:
 current_list.append(item)
 return current_list

Test run:

>>> nest(['foo', 'bar', '!test', '!baz!', 'hello', 'world!', 'word'], '!')
['foo', 'bar', ['test', ['baz'], 'hello', 'world'], 'word']

The way it works is to maintain a stack of nested lists. Every time a new nested list is created, it gets pushed onto the stack. Elements are always appended to the last list in the stack. When an element that ends with "!" is found, the topmost list is removed from the stack.

Question 8

I would first determine where the start and end points of your sublist are, then cut up the list accordingly, then remove the !s.

def define(lst):
 # First find the start and end indexes
 for index, item in enumerate(lst):
 if item[0] == '!':
 start_index = index
 if item[-1] == "!":
 end_index = index+1
 # Now create the new list
 new_list = lst[:start_index] + [lst[start_index:end_index]] + lst[end_index:]
 # And remove the !s
 new_list[start_index][0] = new_list[start_index][0][1:]
 new_list[start_index][-1] = new_list[start_index][-1][:-1]
 return new_list

Question 9

This only works with a single nested list. Something like ['!foo', 'bar!', '!x', 'y!'] or ['!foo', '!x', 'y!', 'bar!'] gets incorrect output.

Question 10

Yes, I was assuming only one set of !s.

Question 11

Here is a quite simple implementation:

lst = ['foo', 'bar', '!test', 'hello', 'world!', 'word']
lst_tmp = [(tuple(el.split()) if ' ' in (el[0], el[-1]) else el.split()) for el in ' '.join(lst).split('!')]
lst = []
for el in lst_tmp:
 if isinstance(el, tuple):
 for word in el:
 lst.append(word)
 else:
 lst.append(el)

First we join lst into a single str and then split it on '!'. Now, this results in ['foo bar ', 'test hello world', ' word']. We can now use the appearing whitespace characters in the beginning or end of the elements to signify where the embedded list should appear. The words that should appear individually are packed into tuples, just to distinguish them from the list(s). All of this results in lst_tmp. The final thing to do is to unpack the tuples into their single elements, which is what the loop is doing.

Question 12

This doesn't work correctly if any of the words in the list contain exclamation marks like ['f!o!o']. It also doesn't handle arbitrary nesting like ['!foo', '!x', 'y!', 'bar!'].

Question 13

I think you should insert to array rather assign it. and you also need to delete up to index + 3

def define(lst):
 for index, item in enumerate(lst):
 if item[0] == '!' and lst[index+2][-1] == '!':
 temp = lst[index:index+3]
 del lst[index:index+3]
 lst.insert(index, temp)
 return lst

Question 14

Your indentation is messed up. When I fix it (well, I took a guess. I don't know what the intended indentation is), it throws an exception with the input ['!foo', 'bar!'].

Question 15

@Aran-Fey I am just pointing out the problem with OP solutions. If OP needs dynamic solution OP should try first. And sorry for the bad indentation.

Question 16

I don't think it should be posted as an answer if you're not answering the question... pointing out problems can be done in the comments.

Question 17

Yes I am answering the question but not dynamically as OP did not ask for it. I just fixed OP algorithm and posted corrected version.

Question 18

Then help me to improve it. Please give me some inputs.

Question 19

Please Try below :

lst = ['foo', 'bar', '!test', 'hello', 'world!', 'word']
temp =[]
isFound=False
for str in lst:
 if str.startswith("!"):
 temp.append(str,replace("!",""))
 isFound=True
 elif len(temp) and isFound and not str.endswith("!"):
 temp.append(str)
 elif str.endswith("!"):
 temp.append(str,replace("!",""))
 isFound=False
for item in temp:
 lst.remove(item)
lst.append(temp)

Question 20

That puts the sublist at the end of the list, and doesn't remove the !s.

Question 21

Yes, agree, but this can be enhanced to achieve that as well. but solution from @Aran-Fey is much better.

Question 22

You have a bunch of typos in there (str,replace) and if I try on the input ['!foo', 'bar!'] it throws a ValueError. str.replace is also not the right tool for the job, because it will replace all occurences of the exclamation mark. If there's a word like "!fo!o" in the list, the result would contain "foo" instead of "fo!o".

Azat Ibrakov Azat Ibrakov 11.1k9 gold badges43 silver badges58 bronze badges · Accepted Answer · 2018-04-01 10:20:10Z

Assuming that there is no elements which starts & ends with ! like '!foo!'.

First of all we can write helper predicates like

def is_starting_element(element):
 return element.startswith('!')
def is_ending_element(element):
 return element.endswith('!')

Then we can write generator-function (because they are awesome)

def walk(elements):
 elements = iter(elements) # making iterator from passed iterable
 for position, element in enumerate(elements):
 if is_starting_element(element):
 yield [element[1:], *walk(elements)]
 elif is_ending_element(element):
 yield element[:-1]
 return
 else:
 yield element

Tests:

>>> lst = ['foo', 'bar', '!test', 'hello', 'world!', 'word']
>>> list(walk(lst))
['foo', 'bar', ['test', 'hello', 'world'], 'word']
>>> lst = ['foo', 'bar', '!test', '!hello', 'world!', 'word!']
>>> list(walk(lst))
['foo', 'bar', ['test', ['hello', 'world'], 'word']]
>>> lst = ['hello!', 'world!']
>>> list(walk(lst))
['hello']

as we can see from the last example if there are more closing elements than opening ones remaining closing elements will be ignored (this is because we're returning from generator). So if lst has invalid signature (difference between opening and closing elements is not equal to zero) then we can have some unpredictable behavior. As a way out of this situation we can validate given data before processing and raise error if data is invalid.

We can write validator like

def validate_elements(elements):
 def get_sign(element):
 if is_starting_element(element):
 return 1
 elif is_ending_element(element):
 return -1
 else:
 return 0
 signature = sum(map(get_sign, elements))
 are_elements_valid = signature == 0
 if not are_elements_valid:
 error_message = 'Data is invalid: '
 if signature > 0:
 error_message += ('there are more opening elements '
 'than closing ones.')
 else:
 error_message += ('there are more closing elements '
 'than opening ones.')
 raise ValueError(error_message)

Tests

>>> lst = ['!hello', 'world!']
>>> validate_elements(lst) # no exception raised, data is valid
>>> lst = ['!hello', '!world']
>>> validate_elements(lst)
...
ValueError: Data is invalid: there are more opening elements than closing ones.
>>> lst = ['hello!', 'world!']
>>> validate_elements(lst)
...
ValueError: Data is invalid: there are more closing elements than opening ones.

Finally we can write function with validation like

def to_sublists(elements):
 validate_elements(elements)
 return list(walk(elements))

Tests

>>> lst = ['foo', 'bar', '!test', 'hello', 'world!', 'word']
>>> to_sublists(lst)
['foo', 'bar', ['test', 'hello', 'world'], 'word']
>>> lst = ['foo', 'bar', '!test', '!hello', 'world!', 'word!']
>>> to_sublists(lst)
['foo', 'bar', ['test', ['hello', 'world'], 'word']]
>>> lst = ['hello!', 'world!']
>>> to_sublists(lst)
...
ValueError: Data is invalid: there are more closing elements than opening ones.

EDIT

If we want to handle elements which starts & ends with ! like '!bar!' we can modify walk function using itertools.chain like

from itertools import chain
def walk(elements):
 elements = iter(elements)
 for position, element in enumerate(elements):
 if is_starting_element(element):
 yield list(walk(chain([element[1:]], elements)))
 elif is_ending_element(element):
 element = element[:-1]
 yield element
 return
 else:
 yield element

also we need to complete validation by just modifying get_sign function

def get_sign(element):
 if is_starting_element(element):
 if is_ending_element(element):
 return 0
 return 1
 if is_ending_element(element):
 return -1
 return 0

Tests

>>> lst = ['foo', 'bar', '!test', '!baz!', 'hello', 'world!', 'word']
>>> to_sublists(lst)
['foo', 'bar', ['test', ['baz'], 'hello', 'world'], 'word']

CollectivesTM on Stack Overflow

Python splitting list to sublist at start and end keyword patterns

6 Answers 6

EDIT

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

CollectivesTM on Stack Overflow

6 Answers 6

EDIT

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related