tech-net: Re: bpf/pcap performance

Subject: Re: bpf/pcap performance
To: Guy Harris <guy@alum.mit.edu>
From: Darren Reed <darrenr@reed.wattle.id.au>
List: tech-net
Date: 04/10/2004 07:30:15
In some email I received from Guy Harris, sie wrote:
> > * the application is threaded, one thread uses select over all the
> > NICs so it knows when to read data from BPF, the other writes to
> > disk.
> 
> The original BPF implementation didn't correctly support "select()" on 
> BPF devices if you had a timeout on the device - "select()" wouldn't 
> consider the BPF device readable until the hold buffer was non-empty, 
> but the store buffer wasn't rotated into the hold buffer until it 
> filled up, so "select()" would wait until the store buffer filled.
> 
> FreeBSD fixed that somewhere in the 4.x timeframe, and I *think* 
> OpenBSD also has it fixed; NetBSD still doesn't have it fixed, as far 
> as I know.
Ok, I went looking. I think the bug you are talking about here relates
to bpfread() ? NetBSD has:
 while (d->bd_hbuf == 0) {
 if (d->bd_immediate) {
 if (d->bd_slen == 0) {
 splx(s);
 return (EWOULDBLOCK);
 }
FreeBSD:
 while (d->bd_hbuf == 0) {
 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
OpenBSD:
 while (d->bd_hbuf == 0) {
 if (d->bd_immediate && d->bd_slen != 0) {
FreeBSD also has a bunch of other changes with the use of callouts,
that according to the commit comment, relate to threads:
http://www.freebsd.org/cgi/cvsweb.cgi/src/sys/net/bpf.c
- search for rev 1.86.
A merged change of the above plus a copy of FreeBSD's changes from 1.86,
adapted for NetBSD are below. I've not tested them yet beyond compiling
them up and making sure the kernel links cleanly :)
Some feedback from other NetBSD types about whether or not this is a good
patch to apply would be nice. If so, I'll commit it.
Cheers,
Darren
Index: bpf.c
===================================================================
RCS file: /cvsroot/src/sys/net/bpf.c,v
retrieving revision 1.89
diff -c -r1.89 bpf.c
*** bpf.c	22 Jan 2004 00:32:41 -0000	1.89
--- bpf.c	9 Apr 2004 20:17:08 -0000
***************
*** 39,45 ****
 */
 
 #include <sys/cdefs.h>
! __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.89 2004年01月22日 00:32:41 jonathan Exp $");
 
 #include "bpfilter.h"
 
--- 39,45 ----
 */
 
 #include <sys/cdefs.h>
! __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.90 2004年03月24日 15:34:54 atatat Exp $");
 
 #include "bpfilter.h"
 
***************
*** 114,119 ****
--- 114,120 ----
 static void	bpf_attachd __P((struct bpf_d *, struct bpf_if *));
 static void	bpf_detachd __P((struct bpf_d *));
 static int	bpf_setif __P((struct bpf_d *, struct ifreq *));
+ static void	bpf_timed_out __P((void *));
 static __inline void
 		bpf_wakeup __P((struct bpf_d *));
 static void	catchpacket __P((struct bpf_d *, u_char *, u_int, u_int,
***************
*** 380,385 ****
--- 381,387 ----
 	/* Mark "free" and do most initialization. */
 	memset((char *)d, 0, sizeof(*d));
 	d->bd_bufsize = bpf_bufsize;
+ 	callout_init(&d->bd_callout);
 
 	return (0);
 }
***************
*** 400,405 ****
--- 402,410 ----
 	int s;
 
 	s = splnet();
+ 	if (d->bd_state == BPF_WAITING)
+ 		callout_stop(&d->bd_callout);
+ 	d->bd_state = BPF_IDLE;
 	if (d->bd_bif)
 		bpf_detachd(d);
 	splx(s);
***************
*** 429,434 ****
--- 434,440 ----
 	int ioflag;
 {
 	struct bpf_d *d = &bpf_dtab[minor(dev)];
+ 	int timed_out;
 	int error;
 	int s;
 
***************
*** 440,456 ****
 		return (EINVAL);
 
 	s = splnet();
 	/*
 	 * If the hold buffer is empty, then do a timed sleep, which
 	 * ends when the timeout expires or when enough packets
 	 * have arrived to fill the store buffer.
 	 */
 	while (d->bd_hbuf == 0) {
! 		if (d->bd_immediate) {
! 			if (d->bd_slen == 0) {
! 				splx(s);
! 				return (EWOULDBLOCK);
! 			}
 			/*
 			 * A packet(s) either arrived since the previous
 			 * read or arrived while we were asleep.
--- 446,462 ----
 		return (EINVAL);
 
 	s = splnet();
+ 	if (d->bd_state == BPF_WAITING)
+ 		callout_stop(&d->bd_callout);
+ 	timed_out = (d->bd_state == BPF_TIMED_OUT);
+ 	d->bd_state = BPF_IDLE;
 	/*
 	 * If the hold buffer is empty, then do a timed sleep, which
 	 * ends when the timeout expires or when enough packets
 	 * have arrived to fill the store buffer.
 	 */
 	while (d->bd_hbuf == 0) {
! 		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
 			/*
 			 * A packet(s) either arrived since the previous
 			 * read or arrived while we were asleep.
***************
*** 535,540 ****
--- 541,564 ----
 	d->bd_sel.sel_pid = 0;
 }
 
+ 
+ static void
+ bpf_timed_out(arg)
+ 	void *arg;
+ {
+ 	struct bpf_d *d = (struct bpf_d *)arg;
+ 	int s;
+ 
+ 	s = splnet();
+ 	if (d->bd_state == BPF_WAITING) {
+ 		d->bd_state = BPF_TIMED_OUT;
+ 		if (d->bd_slen != 0)
+ 			bpf_wakeup(d);
+ 	}
+ 	splx(s);
+ }
+ 
+ 
 int
 bpfwrite(dev, uio, ioflag)
 	dev_t dev;
***************
*** 631,636 ****
--- 655,666 ----
 	struct bpf_insn **p;
 #endif
 
+ 	s = splnet();
+ 	if (d->bd_state == BPF_WAITING)
+ 		callout_stop(&d->bd_callout);
+ 	d->bd_state = BPF_IDLE;
+ 	splx(s);
+ 
 	switch (cmd) {
 
 	default:
***************
*** 1040,1049 ****
--- 1070,1095 ----
 		/*
 		 * An imitation of the FIONREAD ioctl code.
 		 */
+ #if 0
 		if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
 			revents |= events & (POLLIN | POLLRDNORM);
 		else
 			selrecord(p, &d->bd_sel);
+ #else
+ 		if (d->bd_hlen != 0 ||
+ 		 ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
+ 		 d->bd_slen != 0))
+ 			revents |= events & (POLLIN | POLLRDNORM);
+ 		else {
+ 			selrecord(p, &d->bd_sel);
+ 			/* Start the read timeout if necessary */
+ 			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
+ 				callout_reset(&d->bd_callout, d->bd_rtout,
+ 					 bpf_timed_out, d);
+ 				d->bd_state = BPF_WAITING;
+ 			}
+ 		}
+ #endif
 	}
 
 	splx(s);
***************
*** 1177,1182 ****
--- 1223,1233 ----
 	for (m0 = m; m0 != 0; m0 = m0->m_next)
 		pktlen += m0->m_len;
 
+ 	if (pktlen == m->m_len) {
+ 		bpf_tap(arg, mtod(m, u_char *), pktlen);
+ 		return;
+ 	}
+ 
 	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
 		++d->bd_rcount;
 		slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
***************
*** 1234,1240 ****
 		ROTATE_BUFFERS(d);
 		bpf_wakeup(d);
 		curlen = 0;
! 	}
 
 	/*
 	 * Append the bpf header.
--- 1285,1297 ----
 		ROTATE_BUFFERS(d);
 		bpf_wakeup(d);
 		curlen = 0;
! 	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
! 		/*
! 		 * Immediate mode is set, or the read timeout has
! 		 * already expired during a select call. A packet
! 		 * arrived, so the reader should be woken up.
! 		 */
! 		bpf_wakeup(d);
 
 	/*
 	 * Append the bpf header.
***************
*** 1248,1261 ****
 	 */
 	(*cpfn)((u_char *)hp + hdrlen, pkt, (hp->bh_caplen = totlen - hdrlen));
 	d->bd_slen = curlen + totlen;
- 
- 	if (d->bd_immediate) {
- 		/*
- 		 * Immediate mode is set. A packet arrived so any
- 		 * reads should be woken up.
- 		 */
- 		bpf_wakeup(d);
- 	}
 }
 
 /*
--- 1305,1310 ----
Index: bpfdesc.h
===================================================================
RCS file: /cvsroot/src/sys/net/bpfdesc.h,v
retrieving revision 1.16
diff -c -r1.16 bpfdesc.h
*** bpfdesc.h	7 Aug 2003 16:32:48 -0000	1.16
--- bpfdesc.h	9 Apr 2004 20:17:08 -0000
***************
*** 41,46 ****
--- 41,47 ----
 #ifndef _NET_BPFDESC_H_
 #define _NET_BPFDESC_H_
 
+ #include <sys/callout.h>
 #include <sys/select.h>
 
 /*
***************
*** 85,92 ****
--- 86,100 ----
 	u_char		bd_pad;		/* explicit alignment */
 	struct selinfo	bd_sel;		/* bsd select info */
 #endif
+ 	struct callout	bd_callout;	/* for BPF timeouts with select */
 };
 
+ 
+ /* Values for bd_state */
+ #define BPF_IDLE	0		/* no select in progress */
+ #define BPF_WAITING	1		/* waiting for read timeout in select */
+ #define BPF_TIMED_OUT	2		/* read timeout has expired in select */
+ 
 /*
 * Descriptor associated with each attached hardware interface.
 */

AltStyle によって変換されたページ (->オリジナル) /