I have written an error analyzer in Python for the product of our company. I want to read the log from our device and parse it, On Y axis there are different components of the device, and on X axis there is time scale, there is a condition that two errors cannot appear at the same time. As the device doesn't support logging for now, I had to simulate it in Python. Error state is "1", non-error is "0", if error appears I write the error's number on the top between edges. I have added the slider, and when I change it's position I redraw everything, it slows the program, any hints how to implement it in a better way? EVENT_SIZE can be thousands. I am newbie in Python, so i will be glad to hear any programming pracises.
import numpy as np
import random
import matplotlib.pyplot as plt
from matplotlib.widgets import Slider
Y_AXIS_NAMES = ["NAME #1", "NAME #2", "NAME #3", "NAME #4", "NAME#5"]
Y_AXIS_VALUES = [i for i in range(1, len(Y_AXIS_NAMES)*2, 2)]
EVENT_SIZE = 50
Y_AXIS_MIN_VALUE = 0
Y_AXIS_MAX_VALUE = Y_AXIS_VALUES[-1] + 2
def DrawGrid(ax, pos, *args, **kwargs):
if ax == 'x':
for p in pos[::10]:
plt.axvline(p, *args, **kwargs)
else:
for p in pos[::10]:
plt.axhline(p, *args, **kwargs)
def ShowErrorPoints(ax, ay, offset):
for i in zip(ax[1::2], ay[1::2]):
if(i[0][0] == 1):
plt.plot(i[1], offset+1, marker='o', color='r', markersize = 5)
plt.annotate( i[0][1],
xy=(i[1]+0.1, offset+1+0.3),
#xytext=(i[1], offset),
textcoords='data',
horizontalalignment='right',
verticalalignment='top',
)
#plt.plot(i[1], offset+1, marker='o', color='r', markersize = 5, label='$ID: {}$'.format(i[0][1]))
#datacursor(formatter='{label}'.format)
def update(val):
new_pos = axslider.val
ax.set_xbound(new_pos-viewwindow, new_pos+viewwindow)
fig.canvas.draw_idle()
randBinList = lambda n: [random.randint(0,1) for b in range(1,n+1)]
######################
#Simulate input values
######################
first = np.array(randBinList(EVENT_SIZE))
second_ = np.array(randBinList(EVENT_SIZE))
third_ = np.array(randBinList(EVENT_SIZE))
forth_ = np.array(randBinList(EVENT_SIZE))
fifth_ = np.array(randBinList(EVENT_SIZE))
mask = np.zeros(EVENT_SIZE)
mask = np.logical_or (mask, first ).astype(int)
second = np.logical_and(1-mask, second_).astype(int)
mask = np.logical_or (mask, second_).astype(int)
third = np.logical_and (1-mask, third_).astype(int)
mask = np.logical_or (mask, third_).astype(int)
forth = np.logical_and (1-mask, forth_).astype(int)
mask = np.logical_or (mask, forth_)
fifth = np.logical_and (1-mask, fifth_).astype(int)
mask = np.logical_or (mask, fifth_)
first = [[i, random.randint(1, 100)] for i in first]
second = [[i, random.randint(1, 100)] for i in second]
third = [[i, random.randint(1, 100)] for i in third]
forth = [[i, random.randint(1, 100)] for i in forth]
fifth = [[i, random.randint(1, 100)] for i in fifth]
first = np.repeat(first, [2], axis = 0)
second = np.repeat(second,[2], axis = 0)
third = np.repeat(third, [2], axis = 0)
forth = np.repeat(forth, [2], axis = 0)
fifth = np.repeat(fifth, [2], axis = 0)
t = 0.5 * np.arange(EVENT_SIZE*2)
###############
#Building plots
###############
fig, ax = plt.subplots()
plt.hold(True)
plt.subplots_adjust(left=0.15, bottom=0.25)
plt.yticks(Y_AXIS_VALUES, Y_AXIS_NAMES)
DrawGrid('x', range(EVENT_SIZE+1), color='.5', linewidth=0.5)
DrawGrid('y', Y_AXIS_VALUES, color='.5', linewidth=0.5)
plt.title("Error decoder")
fig = plt.gcf()
fig.canvas.set_window_title('Decoder')
plt.step(t, [i[0] + 1 for i in first] , 'b', linewidth = 1, where='post')
plt.step(t, [i[0] + 3 for i in second], 'b', linewidth = 1, where='post')
plt.step(t, [i[0] + 5 for i in third] , 'b', linewidth = 1, where='post')
plt.step(t, [i[0] + 7 for i in forth] , 'b', linewidth = 1, where='post')
plt.step(t, [i[0] + 9 for i in fifth] , 'b', linewidth = 1, where='post')
ShowErrorPoints(first, t, Y_AXIS_VALUES[0])
ShowErrorPoints(second, t, Y_AXIS_VALUES[1])
ShowErrorPoints(third, t, Y_AXIS_VALUES[2])
ShowErrorPoints(forth, t, Y_AXIS_VALUES[3])
ShowErrorPoints(fifth, t, Y_AXIS_VALUES[4])
plt.ylim([0, Y_AXIS_MAX_VALUE])
plt.xlim([0, EVENT_SIZE])
viewwindow = 20
axcolor = 'lightgoldenrodyellow'
axpos = plt.axes([0.15, 0.1, 0.65, 0.03], axisbg=axcolor)
axslider = Slider(axpos, '', t[0]+viewwindow, t[-1]-viewwindow, valinit=(t[-1]+t[0])/2.0)
update(axslider.val)
axslider.on_changed(update)
plt.show()
-
\$\begingroup\$ It seems to work relatively smoothly for me... \$\endgroup\$Graipher– Graipher2017年08月08日 12:32:51 +00:00Commented Aug 8, 2017 at 12:32
-
1\$\begingroup\$ @Graipher if I increase EVENT_SIZE to 1000 the performance falls, and I would like to ask how to prevent it \$\endgroup\$Andrey Mazur– Andrey Mazur2017年08月09日 08:34:12 +00:00Commented Aug 9, 2017 at 8:34
1 Answer 1
when I change it's position I redraw everything
Well. You don't redraw everything; matplotlib does. It's smart enough for the main plot content not to cause problems when this is done; it's your annotations that are the real issue. Unfortunately, there is no text collection, so you're stuck with individual Text
artists, and either you have to selectively add and remove them as I demonstrate, or you have to get much trickier with blitting. Thankfully the dumb method of adding and removing artists offers more than adequate performance for these purposes.
Individually,
Numerics
I don't think that doubling your time series like
t = 0.5 * np.arange(EVENT_SIZE*2)
and calling repeat()
is a good idea. I demonstrate without that. One consequence is that the annotations are drawn at the time of transition (as they should be) rather than a half-step later.
The random
comprehension and the pile of calls to np.logical_or
(etc.) all need to go away. This is really just one call to get a random matrix of 5x50 followed by a ufunc
accumulation.
matplotlib
Don't draw a grid yourself! Just enable the in-built grid support.
Nearly all of your plt.()
calls need to be replaced with axis or figure member calls. The plt
functions are just wrappers. You actually did explicitly fig, ax = plt.subplots()
(that's good!) but then you mostly ignored ax
(that's bad).
In show_error_points
, don't plot()
. Since you really just want a point collection, use the higher-performance CircleCollection
.
Don't annotate
. As I wrote above, this is your biggest performance burden. Replace that with a list of pre-constructed Text()
artists, and don't add them until they come into view.
hold()
has been removed from the API; the program is fine without it. Similar for axisbg
.
Prefer some named colour like lightgrey
rather than .5
. In your call to step()
, write blue
instead of b
.
fig.canvas.set_window_title('Decoder')
was never a good idea, and now it's broken. It relied on a feature of a specific canvas backend that no longer exists (or perhaps I got unlucky and didn't default to the same backend as you). Just use the normal matplotlib set_title()
or suptitle()
.
Your verticalalignment='top'
is incorrect: notice how you have to add height to the offset to fix it. Use bottom
instead. Arguably horizontalalignment
is clearer as center
.
Don't leave your slider unlabelled.
Any time that you stack a bunch of text together with regular spatial intervals but irregular character counts, you're in for trouble: overlaps, etc. Rotating these helps.
Since your y-axis is categorical rather than numeric and you have some order to your component names, you should invert the axis. However, you can't just .invert_axis()
, because that affects the orientation of the individual component traces. Instead, you need to reverse Y_AXIS_VALUES
.
Python
You need functions and a __main__
guard. Especially: any time that you find yourself writing a section banner like
###############
#Building plots
###############
that needs to go away, and you need to write a function.
If Y_AXIS_NAMES
is going to keep those values, then use a list comprehension to generate those strings rather than hard-coding them.
Don't [i for i in range
; just use the range
directly.
Add typehints. For the ax
parameter to draw_grid
, it has a confusing name that many programmers will assume means it takes an actual matplotlib Axes
object, but it doesn't. Renaming this, and also adding a Literal
typehint, will solve the issue.
For PEP8 compatibility, rewrite function names like DrawGrid
as draw_grid
.
Your i[0][0]
has an opaque and non-obvious packing scheme. It needs to be split up into multiple variables.
Blocks like this:
plt.step(t, [i[0] + 1 for i in first] , 'b', linewidth = 1, where='post')
plt.step(t, [i[0] + 3 for i in second], 'b', linewidth = 1, where='post')
plt.step(t, [i[0] + 5 for i in third] , 'b', linewidth = 1, where='post')
plt.step(t, [i[0] + 7 for i in forth] , 'b', linewidth = 1, where='post')
plt.step(t, [i[0] + 9 for i in fifth] , 'b', linewidth = 1, where='post')
need to be replaced with loops.
Don't write randBinList
as a lambda. Just write a function.
Suggested
This works "fine" for 10,000 events. I estimate ~ 20 Hz but have not measured.
import functools
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import CircleCollection
from matplotlib.text import Text
from matplotlib.ticker import MultipleLocator
from matplotlib.widgets import Slider
N_COMPONENTS = 5
N_EVENTS = 10_000
Y_AXIS_NAMES = [f'NAME #{i}' for i in range(1, N_COMPONENTS+1)]
Y_AXIS_VALUES = range(len(Y_AXIS_NAMES)*2 - 1, 0, -2)
Y_AXIS_MIN_VALUE = 0
Y_AXIS_MAX_VALUE = Y_AXIS_VALUES[0] + 2
def simulate_input_values(rand: np.random.Generator) -> tuple[
np.ndarray, # event time axis
np.ndarray, # error flags: (N_COMPONENTS, N_EVENTS)
np.ndarray, # error codes: (N_COMPONENTS, N_EVENTS)
]:
# Random binary component error flag values
error_flags = rand.integers(
low=0, high=1, endpoint=True, size=(N_COMPONENTS, N_EVENTS), dtype=np.uint8,
)
# Each component's events exclude those of all later components
exclusion_mask = np.bitwise_or.accumulate(error_flags[:-1, :], axis=0)
error_flags[1:, :] &= ~exclusion_mask
error_flags.flags.writeable = False # lock the array
error_codes = rand.integers(
low=1, high=100, size=error_flags.shape, dtype=np.uint8,
)
error_codes.flags.writeable = False
return np.arange(N_EVENTS), error_flags, error_codes
def make_error_points(
ax: plt.Axes, t: np.ndarray, flags: np.ndarray, codes: np.ndarray,
) -> tuple[
np.ndarray, # times at which there is an error condition in any component
list[Text], # all annotation artists, initially hidden
]:
# We take the transpose to make sure that the arrays indexed below will be in order of the
# time dimension
error_mask = flags.T.astype(bool) # components,events
# This cannot stay two-dimensional because it would be jagged. This flattens to one dimension
# that runs through the components in the fast dimension and the events in the slow dimension.
error_times = np.broadcast_to(t[:, np.newaxis], error_mask.shape)[error_mask]
offset_y = np.broadcast_to(1 + np.array(Y_AXIS_VALUES), error_mask.shape)[error_mask]
error_codes = codes.T[error_mask]
# All point markers for all components and error events in one collection
circles = CircleCollection(
sizes=np.full(shape=error_times.size, fill_value=7),
facecolors='red', antialiaseds=False,
offsets=np.stack(arrays=(error_times, offset_y), axis=-1),
offset_transform=ax.transData,
)
ax.add_artist(circles)
# All annotation artists, initially invisible
text_artists = [
Text(
text=code, x=ti, y=offset + 0.1,
fontsize=8, rotation=90,
horizontalalignment='center', verticalalignment='bottom',
)
for ti, offset, code in zip(error_times, offset_y, error_codes)
]
# Very slow:
# for text in text_artists:
# ax.add_artist(text)
# instead, add and remove these selectively during pan update
return error_times, text_artists
def update(
val: float,
ax: plt.Axes,
slider: Slider,
view_window: float,
error_times: np.ndarray,
text_artists: list[Text],
visible_artists: list[Text], # will be mutated
) -> None:
"""
Update the plot when the slider changes.
This is not careful about selectively removing only those annotation artists that have gone
out of the view window; it steamrolls everything.
"""
new_pos = slider.val
t0 = new_pos - view_window
t1 = new_pos + view_window
for old in visible_artists:
old.remove()
visible_artists.clear()
start = error_times.searchsorted(t0)
end = error_times.searchsorted(t1)
visible_artists.extend(text_artists[start: end])
for new in visible_artists:
ax.add_artist(new)
ax.set_xbound(t0, t1)
def make_axes() -> tuple[
plt.Figure,
plt.Axes, # for main data plot
]:
fig, ax = plt.subplots()
fig.subplots_adjust(left=0.15, bottom=0.25)
ax.set_title('Error decoder')
ax.grid(visible=True, which='major', axis='both', color='lightgrey', linewidth=0.5)
ax.xaxis.set_major_locator(MultipleLocator(base=5))
ax.xaxis.set_minor_locator(MultipleLocator(base=1))
ax.set_ylim(bottom=Y_AXIS_MIN_VALUE, top=Y_AXIS_MAX_VALUE)
ax.set_yticks(ticks=Y_AXIS_VALUES, labels=Y_AXIS_NAMES, minor=False)
return fig, ax
def make_plots(
ax: plt.Axes, t: np.ndarray, flags: np.ndarray, codes: np.ndarray,
) -> None:
for component_flags, component_codes, offset in zip(flags, codes, Y_AXIS_VALUES):
ax.step(
t, component_flags + offset,
color='blue', linewidth=0.7, where='post', antialiased=False,
)
def build_slider(
fig: plt.Figure, ax: plt.Axes, t: np.ndarray,
error_times: np.ndarray, text_artists: list[Text],
view_window: float = 20.,
) -> Slider:
ax_pos = fig.add_subplot([0.15, 0.1, 0.65, 0.03])
slider = Slider(
ax=ax_pos, label='Time pan',
valmin=t[0] + view_window, valmax=t[-1] - view_window,
valinit=t[[0, -1]].mean(),
)
visible_artists = []
bound_update = functools.partial(
update, ax=ax, slider=slider, view_window=view_window,
error_times=error_times, text_artists=text_artists, visible_artists=visible_artists,
)
bound_update(slider.val)
slider.on_changed(bound_update)
return slider
def main() -> None:
rand = np.random.default_rng(seed=0) # reproducible demonstration
t, flags, codes = simulate_input_values(rand)
fig, ax = make_axes()
make_plots(ax=ax, t=t, flags=flags, codes=codes)
error_times, text_artists = make_error_points(ax=ax, t=t, flags=flags, codes=codes)
# Slider needs to be assigned or else it will be garbage-collected
slider = build_slider(fig=fig, ax=ax, t=t, error_times=error_times, text_artists=text_artists)
plt.show()
if __name__ == '__main__':
main()
Explore related questions
See similar questions with these tags.