Super User's BSD Cross Reference: /FreeBSD/sys/contrib/openzfs/cmd/zed/zed_disk_event.c

1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License Version 1.0 (CDDL-1.0).
6 * You can obtain a copy of the license from the top-level file
7 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
8 * You may not use this file except in compliance with the license.
9 *
10 * CDDL HEADER END
11 */
12
13 /*
14 * Copyright (c) 2016, 2017, Intel Corporation.
15 */
16
17#ifdef HAVE_LIBUDEV
18
19#include <errno.h>
20#include <fcntl.h>
21#include <libnvpair.h>
22#include <libudev.h>
23#include <libzfs.h>
24#include <libzutil.h>
25#include <pthread.h>
26#include <stdlib.h>
27#include <string.h>
28
29#include <sys/sysevent/eventdefs.h>
30#include <sys/sysevent/dev.h>
31
32#include "zed_log.h"
33#include "zed_disk_event.h"
34#include "agents/zfs_agents.h"
35
36 /*
37 * Portions of ZED need to see disk events for disks belonging to ZFS pools.
38 * A libudev monitor is established to monitor block device actions and pass
39 * them on to internal ZED logic modules. Initially, zfs_mod.c is the only
40 * consumer and is the Linux equivalent for the illumos syseventd ZFS SLM
41 * module responsible for handling disk events for ZFS.
42 */
43
44 pthread_t g_mon_tid;
45 struct udev *g_udev;
46 struct udev_monitor *g_mon;
47
48
49#define DEV_BYID_PATH "/dev/disk/by-id/"
50
51 /* 64MB is minimum usable disk for ZFS */
52#define MINIMUM_SECTORS 131072
53
54
55 /*
56 * Post disk event to SLM module
57 *
58 * occurs in the context of monitor thread
59 */
60 static void
61 zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl)
62{
63 char *strval;
64 uint64_t numval;
65
66 zed_log_msg(LOG_INFO, "zed_disk_event:");
67 zed_log_msg(LOG_INFO, "\tclass: %s", class);
68 zed_log_msg(LOG_INFO, "\tsubclass: %s", subclass);
69 if (nvlist_lookup_string(nvl, DEV_NAME, &strval) == 0)
70 zed_log_msg(LOG_INFO, "\t%s: %s", DEV_NAME, strval);
71 if (nvlist_lookup_string(nvl, DEV_PATH, &strval) == 0)
72 zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PATH, strval);
73 if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &strval) == 0)
74 zed_log_msg(LOG_INFO, "\t%s: %s", DEV_IDENTIFIER, strval);
75 if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &strval) == 0)
76 zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
77 if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)
78 zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_SIZE, numval);
79 if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &numval) == 0)
80 zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_POOL_GUID, numval);
81 if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &numval) == 0)
82 zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_VDEV_GUID, numval);
83
84 (void) zfs_agent_post_event(class, subclass, nvl);
85}
86
87 /*
88 * dev_event_nvlist: place event schema into an nv pair list
89 *
90 * NAME VALUE (example)
91 * -------------- --------------------------------------------------------
92 * DEV_NAME /dev/sdl
93 * DEV_PATH /devices/pci0000:00/0000:00:03.0/0000:04:00.0/host0/...
94 * DEV_IDENTIFIER ata-Hitachi_HTS725050A9A362_100601PCG420VLJ37DMC
95 * DEV_PHYS_PATH pci-.&checktime(0000,04,00,':').0-sas-0x4433221101000000-lun-0
96 * DEV_IS_PART ---
97 * DEV_SIZE 500107862016
98 * ZFS_EV_POOL_GUID 17523635698032189180
99 * ZFS_EV_VDEV_GUID 14663607734290803088
100 */
101 static nvlist_t *
102 dev_event_nvlist(struct udev_device *dev)
103{
104 nvlist_t *nvl;
105 char strval[128];
106 const char *value, *path;
107 uint64_t guid;
108
109 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
110 return (NULL);
111
112 if (zfs_device_get_devid(dev, strval, sizeof (strval)) == 0)
113 (void) nvlist_add_string(nvl, DEV_IDENTIFIER, strval);
114 if (zfs_device_get_physical(dev, strval, sizeof (strval)) == 0)
115 (void) nvlist_add_string(nvl, DEV_PHYS_PATH, strval);
116 if ((path = udev_device_get_devnode(dev)) != NULL)
117 (void) nvlist_add_string(nvl, DEV_NAME, path);
118 if ((value = udev_device_get_devpath(dev)) != NULL)
119 (void) nvlist_add_string(nvl, DEV_PATH, value);
120 value = udev_device_get_devtype(dev);
121 if ((value != NULL && strcmp("partition", value) == 0) ||
122 (udev_device_get_property_value(dev, "ID_PART_ENTRY_NUMBER")
123 != NULL)) {
124 (void) nvlist_add_boolean(nvl, DEV_IS_PART);
125 }
126 if ((value = udev_device_get_sysattr_value(dev, "size")) != NULL) {
127 uint64_t numval = DEV_BSIZE;
128
129 numval *= strtoull(value, NULL, 10);
130 (void) nvlist_add_uint64(nvl, DEV_SIZE, numval);
131 }
132
133 /*
134 * Grab the pool and vdev guids from blkid cache
135 */
136 value = udev_device_get_property_value(dev, "ID_FS_UUID");
137 if (value != NULL && (guid = strtoull(value, NULL, 10)) != 0)
138 (void) nvlist_add_uint64(nvl, ZFS_EV_POOL_GUID, guid);
139
140 value = udev_device_get_property_value(dev, "ID_FS_UUID_SUB");
141 if (value != NULL && (guid = strtoull(value, NULL, 10)) != 0)
142 (void) nvlist_add_uint64(nvl, ZFS_EV_VDEV_GUID, guid);
143
144 /*
145 * Either a vdev guid or a devid must be present for matching
146 */
147 if (!nvlist_exists(nvl, DEV_IDENTIFIER) &&
148 !nvlist_exists(nvl, ZFS_EV_VDEV_GUID)) {
149 nvlist_free(nvl);
150 return (NULL);
151 }
152
153 return (nvl);
154}
155
156 /*
157 * Listen for block device uevents
158 */
159 static void *
160 zed_udev_monitor(void *arg)
161{
162 struct udev_monitor *mon = arg;
163 char *tmp, *tmp2;
164
165 zed_log_msg(LOG_INFO, "Waiting for new udev disk events...");
166
167 while (1) {
168 struct udev_device *dev;
169 const char *action, *type, *part, *sectors;
170 const char *bus, *uuid;
171 const char *class, *subclass;
172 nvlist_t *nvl;
173 boolean_t is_zfs = B_FALSE;
174
175 /* allow a cancellation while blocked (recvmsg) */
176 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
177
178 /* blocks at recvmsg until an event occurs */
179 if ((dev = udev_monitor_receive_device(mon)) == NULL) {
180 zed_log_msg(LOG_WARNING, "zed_udev_monitor: receive "
181 "device error %d", errno);
182 continue;
183 }
184
185 /* allow all steps to complete before a cancellation */
186 pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
187
188 /*
189 * Strongly typed device is the preferred filter
190 */
191 type = udev_device_get_property_value(dev, "ID_FS_TYPE");
192 if (type != NULL && type[0] != '0円') {
193 if (strcmp(type, "zfs_member") == 0) {
194 is_zfs = B_TRUE;
195 } else {
196 /* not ours, so skip */
197 zed_log_msg(LOG_INFO, "zed_udev_monitor: skip "
198 "%s (in use by %s)",
199 udev_device_get_devnode(dev), type);
200 udev_device_unref(dev);
201 continue;
202 }
203 }
204
205 /*
206 * if this is a disk and it is partitioned, then the
207 * zfs label will reside in a DEVTYPE=partition and
208 * we can skip passing this event
209 */
210 type = udev_device_get_property_value(dev, "DEVTYPE");
211 part = udev_device_get_property_value(dev,
212 "ID_PART_TABLE_TYPE");
213 if (type != NULL && type[0] != '0円' &&
214 strcmp(type, "disk") == 0 &&
215 part != NULL && part[0] != '0円') {
216 /* skip and wait for partition event */
217 udev_device_unref(dev);
218 continue;
219 }
220
221 /*
222 * ignore small partitions
223 */
224 sectors = udev_device_get_property_value(dev,
225 "ID_PART_ENTRY_SIZE");
226 if (sectors == NULL)
227 sectors = udev_device_get_sysattr_value(dev, "size");
228 if (sectors != NULL &&
229 strtoull(sectors, NULL, 10) < MINIMUM_SECTORS) {
230 udev_device_unref(dev);
231 continue;
232 }
233
234 /*
235 * If the blkid probe didn't find ZFS, then a persistent
236 * device id string is required in the message schema
237 * for matching with vdevs. Preflight here for expected
238 * udev information.
239 */
240 bus = udev_device_get_property_value(dev, "ID_BUS");
241 uuid = udev_device_get_property_value(dev, "DM_UUID");
242 if (!is_zfs && (bus == NULL && uuid == NULL)) {
243 zed_log_msg(LOG_INFO, "zed_udev_monitor: %s no devid "
244 "source", udev_device_get_devnode(dev));
245 udev_device_unref(dev);
246 continue;
247 }
248
249 action = udev_device_get_action(dev);
250 if (strcmp(action, "add") == 0) {
251 class = EC_DEV_ADD;
252 subclass = ESC_DISK;
253 } else if (strcmp(action, "remove") == 0) {
254 class = EC_DEV_REMOVE;
255 subclass = ESC_DISK;
256 } else if (strcmp(action, "change") == 0) {
257 class = EC_DEV_STATUS;
258 subclass = ESC_DEV_DLE;
259 } else {
260 zed_log_msg(LOG_WARNING, "zed_udev_monitor: %s unknown",
261 action);
262 udev_device_unref(dev);
263 continue;
264 }
265
266 /*
267 * Special case an EC_DEV_ADD for multipath devices
268 *
269 * When a multipath device is created, udev reports the
270 * following:
271 *
272 * 1. "add" event of the dm device for the multipath device
273 * (like /dev/dm-3).
274 * 2. "change" event to create the actual multipath device
275 * symlink (like /dev/mapper/mpatha). The event also
276 * passes back the relevant DM vars we care about, like
277 * DM_UUID.
278 * 3. Another "change" event identical to #2 (that we ignore).
279 *
280 * To get the behavior we want, we treat the "change" event
281 * in #2 as a "add" event; as if "/dev/mapper/mpatha" was
282 * a new disk being added.
283 */
284 if (strcmp(class, EC_DEV_STATUS) == 0 &&
285 udev_device_get_property_value(dev, "DM_UUID") &&
286 udev_device_get_property_value(dev, "MPATH_SBIN_PATH")) {
287 tmp = (char *)udev_device_get_devnode(dev);
288 tmp2 = zfs_get_underlying_path(tmp);
289 if (tmp && tmp2 && (strcmp(tmp, tmp2) != 0)) {
290 /*
291 * We have a real underlying device, which
292 * means that this multipath "change" event is
293 * an "add" event.
294 *
295 * If the multipath device and the underlying
296 * dev are the same name (i.e. /dev/dm-5), then
297 * there is no real underlying disk for this
298 * multipath device, and so this "change" event
299 * really is a multipath removal.
300 */
301 class = EC_DEV_ADD;
302 subclass = ESC_DISK;
303 } else {
304 tmp = (char *)
305 udev_device_get_property_value(dev,
306 "DM_NR_VALID_PATHS");
307 /* treat as a multipath remove */
308 if (tmp != NULL && strcmp(tmp, "0") == 0) {
309 class = EC_DEV_REMOVE;
310 subclass = ESC_DISK;
311 }
312 }
313 free(tmp2);
314 }
315
316 /*
317 * Special case an EC_DEV_ADD for scsi_debug devices
318 *
319 * These devices require a udevadm trigger command after
320 * creation in order to register the vdev_id scsidebug alias
321 * rule (adds a persistent path (phys_path) used for fault
322 * management automated tests in the ZFS test suite.
323 *
324 * After udevadm trigger command, event registers as a "change"
325 * event but needs to instead be handled as another "add" event
326 * to allow for disk labeling and partitioning to occur.
327 */
328 if (strcmp(class, EC_DEV_STATUS) == 0 &&
329 udev_device_get_property_value(dev, "ID_VDEV") &&
330 udev_device_get_property_value(dev, "ID_MODEL")) {
331 const char *id_model, *id_model_sd = "scsi_debug";
332
333 id_model = udev_device_get_property_value(dev,
334 "ID_MODEL");
335 if (strcmp(id_model, id_model_sd) == 0) {
336 class = EC_DEV_ADD;
337 subclass = ESC_DISK;
338 }
339 }
340
341 if ((nvl = dev_event_nvlist(dev)) != NULL) {
342 zed_udev_event(class, subclass, nvl);
343 nvlist_free(nvl);
344 }
345
346 udev_device_unref(dev);
347 }
348
349 return (NULL);
350}
351
352 int
353 zed_disk_event_init()
354{
355 int fd, fflags;
356
357 if ((g_udev = udev_new()) == NULL) {
358 zed_log_msg(LOG_WARNING, "udev_new failed (%d)", errno);
359 return (-1);
360 }
361
362 /* Set up a udev monitor for block devices */
363 g_mon = udev_monitor_new_from_netlink(g_udev, "udev");
364 udev_monitor_filter_add_match_subsystem_devtype(g_mon, "block", "disk");
365 udev_monitor_filter_add_match_subsystem_devtype(g_mon, "block",
366 "partition");
367 udev_monitor_enable_receiving(g_mon);
368
369 /* Make sure monitoring socket is blocking */
370 fd = udev_monitor_get_fd(g_mon);
371 if ((fflags = fcntl(fd, F_GETFL)) & O_NONBLOCK)
372 (void) fcntl(fd, F_SETFL, fflags & ~O_NONBLOCK);
373
374 /* spawn a thread to monitor events */
375 if (pthread_create(&g_mon_tid, NULL, zed_udev_monitor, g_mon) != 0) {
376 udev_monitor_unref(g_mon);
377 udev_unref(g_udev);
378 zed_log_msg(LOG_WARNING, "pthread_create failed");
379 return (-1);
380 }
381
382 zed_log_msg(LOG_INFO, "zed_disk_event_init");
383
384 return (0);
385}
386
387 void
388 zed_disk_event_fini()
389{
390 /* cancel monitor thread at recvmsg() */
391 (void) pthread_cancel(g_mon_tid);
392 (void) pthread_join(g_mon_tid, NULL);
393
394 /* cleanup udev resources */
395 udev_monitor_unref(g_mon);
396 udev_unref(g_udev);
397
398 zed_log_msg(LOG_INFO, "zed_disk_event_fini");
399}
400
401#else
402
403#include "zed_disk_event.h"
404
405 int
406 zed_disk_event_init()
407{
408 return (0);
409}
410
411 void
412 zed_disk_event_fini()
413{
414}
415
416#endif /* HAVE_LIBUDEV */
417 

AltStyle によって変換されたページ (->オリジナル) /