Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 8b58e3e

Browse files
Merge #265
Closes #265
2 parents dafdae1 + 8bc06a6 commit 8b58e3e

File tree

1 file changed

+68
-35
lines changed

1 file changed

+68
-35
lines changed

‎src/lib/efhw/af_xdp.c‎

Lines changed: 68 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ struct efhw_nic_af_xdp
169169
struct efhw_af_xdp_vi* vi;
170170
struct efhw_buddy_allocator vi_allocator;
171171
spinlock_t alloc_lock;
172+
struct xdp_mmap_offsets mmap_offsets;
172173
};
173174

174175
/*----------------------------------------------------------------------------
@@ -583,41 +584,39 @@ static int xdp_create_ring(struct socket* sock,
583584
return 0;
584585
}
585586

586-
static int xdp_create_rings(struct socket* sock,
587-
struct efhw_page_map* page_map, void* kern_mem_base,
588-
long rxq_capacity, long txq_capacity,
589-
struct efab_af_xdp_offsets_rings* kern_offsets,
590-
struct efab_af_xdp_offsets_rings* user_offsets,
591-
struct ring_map* ring_mapping)
587+
static int xdp_get_mmap_offsets(struct xdp_mmap_offsets* mmap_offsets)
592588
{
593-
intrc;
589+
structsocket*sock;
594590
struct sys_call_area rw_area;
595-
struct xdp_mmap_offsets* mmap_offsets;
591+
struct xdp_mmap_offsets* mmap_offsets_user;
596592
int* optlen;
593+
int rc;
597594

598-
EFHW_BUILD_ASSERT(EFAB_AF_XDP_DESC_BYTES == sizeof(struct xdp_desc));
595+
rc = sock_create(AF_XDP, SOCK_RAW, 0, &sock);
596+
if( rc < 0 )
597+
return rc;
599598

600599
/* We need a read-write area to call getsockopt(). We unmap it from UL
601600
* as soon as possible. */
602601
rc = sys_call_area_alloc(&rw_area);
603602
if( rc < 0 )
604-
returnrc;
603+
goto out_release;
605604

606-
mmap_offsets = sys_call_area_ptr(&rw_area);
607-
optlen = (void*)(mmap_offsets + 1);
608-
*optlen = sizeof(*mmap_offsets);
605+
mmap_offsets_user = sys_call_area_ptr(&rw_area);
606+
optlen = (void*)(mmap_offsets_user + 1);
607+
*optlen = sizeof(*mmap_offsets_user);
609608

610609
/* For linux<=5.7 you can use kernel_getsockopt(),
611610
* but newer versions does not have this function, so we have all that
612611
* sys_call_area_*() calls. */
613612
rc = sock->ops->getsockopt(sock, SOL_XDP, XDP_MMAP_OFFSETS,
614613
(void*)sys_call_area_user_addr(&rw_area,
615-
mmap_offsets),
614+
mmap_offsets_user),
616615
(void*)sys_call_area_user_addr(&rw_area, optlen));
617616

618-
/* Security consideration: mmap_offsets is located in untrusted user
617+
/* Security consideration: mmap_offsets_user is located in untrusted user
619618
* memory. I.e. the process can overwrite all this data.
620-
* However this is the process which can create an AF_XDP Onload stack,
619+
* However this is the process which can load an XDP program,
621620
* so it runs with the root account, and it already can do
622621
* anything bad: reboot, execute arbitrary code, etc.
623622
*
@@ -626,44 +625,60 @@ static int xdp_create_rings(struct socket* sock,
626625
sys_call_area_unmap(&rw_area);
627626
if( rc < 0 ) {
628627
EFHW_ERR("%s: getsockopt(XDP_MMAP_OFFSETS) rc=%d", __func__, rc);
629-
goto out;
628+
goto out_unpin;
630629
}
631-
EFHW_ASSERT(*optlen == sizeof(*mmap_offsets));
630+
EFHW_ASSERT(*optlen == sizeof(*mmap_offsets_user));
631+
632+
memcpy(mmap_offsets, mmap_offsets_user, sizeof(*mmap_offsets_user));
633+
rc = 0;
634+
635+
out_unpin:
636+
sys_call_area_unpin(&rw_area);
637+
out_release:
638+
sock_release(sock);
639+
return rc;
640+
}
641+
642+
static int xdp_create_rings(struct socket* sock, struct efhw_nic_af_xdp* xdp,
643+
struct efhw_page_map* page_map, void* kern_mem_base,
644+
long rxq_capacity, long txq_capacity,
645+
struct efab_af_xdp_offsets_rings* kern_offsets,
646+
struct efab_af_xdp_offsets_rings* user_offsets,
647+
struct ring_map* ring_mapping)
648+
{
649+
int rc;
650+
651+
EFHW_BUILD_ASSERT(EFAB_AF_XDP_DESC_BYTES == sizeof(struct xdp_desc));
632652

633653
rc = xdp_create_ring(sock, page_map, kern_mem_base,
634654
rxq_capacity, sizeof(struct xdp_desc),
635655
XDP_RX_RING, XDP_PGOFF_RX_RING,
636-
&mmap_offsets->rx, &kern_offsets->rx, &user_offsets->rx,
656+
&xdp->mmap_offsets.rx, &kern_offsets->rx, &user_offsets->rx,
637657
ring_mapping++);
638658
if( rc < 0 )
639-
goto out;
659+
returnrc;
640660

641661
rc = xdp_create_ring(sock, page_map, kern_mem_base,
642662
txq_capacity, sizeof(struct xdp_desc),
643663
XDP_TX_RING, XDP_PGOFF_TX_RING,
644-
&mmap_offsets->tx, &kern_offsets->tx, &user_offsets->tx,
664+
&xdp->mmap_offsets.tx, &kern_offsets->tx, &user_offsets->tx,
645665
ring_mapping++);
646666
if( rc < 0 )
647-
goto out;
667+
returnrc;
648668

649669
rc = xdp_create_ring(sock, page_map, kern_mem_base,
650670
rxq_capacity, sizeof(uint64_t),
651671
XDP_UMEM_FILL_RING, XDP_UMEM_PGOFF_FILL_RING,
652-
&mmap_offsets->fr, &kern_offsets->fr, &user_offsets->fr,
672+
&xdp->mmap_offsets.fr, &kern_offsets->fr, &user_offsets->fr,
653673
ring_mapping++);
654674
if( rc < 0 )
655-
goto out;
675+
returnrc;
656676

657677
rc = xdp_create_ring(sock, page_map, kern_mem_base,
658678
txq_capacity, sizeof(uint64_t),
659679
XDP_UMEM_COMPLETION_RING, XDP_UMEM_PGOFF_COMPLETION_RING,
660-
&mmap_offsets->cr, &kern_offsets->cr, &user_offsets->cr,
680+
&xdp->mmap_offsets.cr, &kern_offsets->cr, &user_offsets->cr,
661681
ring_mapping);
662-
if( rc < 0 )
663-
goto out;
664-
665-
out:
666-
sys_call_area_unpin(&rw_area);
667682
return rc;
668683
}
669684

@@ -730,6 +745,8 @@ static int af_xdp_init(struct efhw_nic* nic, int instance,
730745
struct socket* sock;
731746
struct file* file;
732747
struct efab_af_xdp_offsets* user_offsets;
748+
const struct cred *old_cred;
749+
struct cred *cred;
733750

734751
if( chunk_size == 0 ||
735752
chunk_size < headroom ||
@@ -749,18 +766,24 @@ static int af_xdp_init(struct efhw_nic* nic, int instance,
749766
if( sw_bt == NULL )
750767
return -EINVAL;
751768

769+
cred = prepare_kernel_cred(&init_task);
770+
if( cred == NULL )
771+
return -ENOMEM;
772+
old_cred = override_creds(cred);
773+
752774
/* We need to use network namespace of network device so that
753775
* ifindex passed in bpf syscalls makes sense
754776
* TODO AF_XDP: there is a race here with device changing netns
755-
* TODO AF_XDP: this fails unless the user namespace has CAP_NET_RAW
756777
*/
757778
rc = __sock_create(dev_net(nic->net_dev), AF_XDP, SOCK_RAW, 0, &sock, 0);
758779
if( rc < 0 )
759-
returnrc;
780+
goto fail_cred;
760781

761782
file = sock_alloc_file(sock, 0, NULL);
762-
if( IS_ERR(file) )
763-
return PTR_ERR(file);
783+
if( IS_ERR(file) ) {
784+
rc = PTR_ERR(file);
785+
goto fail_cred;
786+
}
764787
vi->sock = sock;
765788

766789
rc = efhw_page_alloc_zeroed(&vi->user_offsets_page);
@@ -776,7 +799,7 @@ static int af_xdp_init(struct efhw_nic* nic, int instance,
776799
if( rc < 0 )
777800
goto fail;
778801

779-
rc = xdp_create_rings(sock, page_map, &vi->kernel_offsets,
802+
rc = xdp_create_rings(sock, nic->arch_extra, page_map, &vi->kernel_offsets,
780803
vi->rxq_capacity, vi->txq_capacity,
781804
&vi->kernel_offsets.rings, &user_offsets->rings,
782805
vi->ring_mapping);
@@ -815,11 +838,17 @@ static int af_xdp_init(struct efhw_nic* nic, int instance,
815838
add_wait_queue(sk_sleep(vi->sock->sk), &vi->waiter.wait);
816839

817840
user_offsets->mmap_bytes = efhw_page_map_bytes(page_map);
841+
842+
revert_creds(old_cred);
843+
put_cred(cred);
818844
return 0;
819845

820846
fail:
821847
vi->waiter.wait.func = NULL;
822848
xdp_release_vi(nic, vi);
849+
fail_cred:
850+
revert_creds(old_cred);
851+
put_cred(cred);
823852
return rc;
824853
}
825854

@@ -927,6 +956,10 @@ __af_xdp_nic_init_hardware(struct efhw_nic *nic,
927956

928957
spin_lock_init(&xdp->alloc_lock);
929958

959+
rc = xdp_get_mmap_offsets(&xdp->mmap_offsets);
960+
if( rc < 0 )
961+
goto fail_map;
962+
930963
rc = af_xdp_vi_allocator_ctor(xdp, nic->vi_min, nic->vi_lim);
931964
if( rc < 0 )
932965
goto fail_map;

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /