[PATCH] driver: indicate all available packets in the ring-buffer to NDIS in two NBL chains, one chain for IPv4 and one for IPv6.
Alexandru Brinduse
alexandru.brinduse at pango.co
Mon Apr 3 15:23:43 UTC 2023
NdisMIndicateReceiveNetBufferLists() allows batch processing of chained NBLs.
>From network performance measurements it can be seen that it brings a dramatic increase in download speed.
No chain size limit was imposed due to the fact that the ring-buffer already has a limit. In worst case, not sure how it can be attained in real life, the NBL chain will have the same size as the ringbuffer.
---
driver/wintun.c | 210 +++++++++++++++++++++++++++++++++---------------
1 file changed, 147 insertions(+), 63 deletions(-)
diff --git a/driver/wintun.c b/driver/wintun.c
index ad4b16b..64f8e1b 100644
--- a/driver/wintun.c
+++ b/driver/wintun.c
@@ -246,6 +246,35 @@ TunNblIsCompleted(_In_ NET_BUFFER_LIST *Nbl)
return (ULONG_PTR)(NET_BUFFER_LIST_MINIPORT_RESERVED(Nbl)[0]) & 1;
}
+static VOID
+TunFreeNblChain(_In_ NET_BUFFER_LIST* Nbl)
+{
+ while (Nbl)
+ {
+ NET_BUFFER_LIST* nextNbl = NET_BUFFER_LIST_NEXT_NBL(Nbl);
+ IoFreeMdl(NET_BUFFER_LIST_FIRST_NB(Nbl)->MdlChain);
+ NdisFreeNetBufferList(Nbl);
+ Nbl = nextNbl;
+ }
+}
+
+static VOID
+TunAddNewNblsToReceiveActiveNblsUnsafe(
+ _Inout_ TUN_CTX *Ctx,
+ _In_ NET_BUFFER_LIST* Head,
+ _In_ NET_BUFFER_LIST* Tail
+)
+{
+ if (Ctx->Device.Receive.ActiveNbls.Head)
+ NET_BUFFER_LIST_NEXT_NBL_EX(Ctx->Device.Receive.ActiveNbls.Tail) = Head;
+ else
+ {
+ KeClearEvent(&Ctx->Device.Receive.ActiveNbls.Empty);
+ Ctx->Device.Receive.ActiveNbls.Head = Head;
+ }
+ Ctx->Device.Receive.ActiveNbls.Tail = Tail;
+}
+
static MINIPORT_SEND_NET_BUFFER_LISTS TunSendNetBufferLists;
_Use_decl_annotations_
static VOID
@@ -491,89 +520,144 @@ TunProcessReceiveData(_Inout_ TUN_CTX *Ctx)
KeClearEvent(Ctx->Device.Receive.TailMoved);
}
}
+
if (RingTail >= RingCapacity)
break;
- ULONG RingContent = TUN_RING_WRAP(RingTail - RingHead, RingCapacity);
- if (RingContent < sizeof(TUN_PACKET))
+ ULONG RingContentToConsume = TUN_RING_WRAP(RingTail - RingHead, RingCapacity);
+ if (RingContentToConsume < sizeof(TUN_PACKET))
break;
+ ULONG ConsumedRingContent = 0;
- TUN_PACKET *Packet = (TUN_PACKET *)(Ring->Data + RingHead);
- ULONG PacketSize = *(volatile ULONG *)&Packet->Size;
- if (PacketSize > TUN_MAX_IP_PACKET_SIZE)
- break;
+ NET_BUFFER_LIST* NblChainV4 = NULL, *NblChainV6 = NULL;
+ ULONG NumberOfNblsV4 = 0, NumberOfNblsV6 = 0;
- ULONG AlignedPacketSize = TUN_ALIGN(sizeof(TUN_PACKET) + PacketSize);
- if (AlignedPacketSize > RingContent)
- break;
+ /* We'll build two NBL chains for IPv4 and IPv6 with all available packets
+ (they are sepparted so we can use NDIS_RECEIVE_FLAGS_SINGLE_ETHER_TYPE)
+ Network performance measurements indicate that the download speed is drastically increased
+ if multiple NBLs are indicated to NDIS in a single call */
+ while (ConsumedRingContent < RingContentToConsume)
+ {
+ TUN_PACKET* Packet = (TUN_PACKET*)(Ring->Data + RingHead);
+ ULONG PacketSize = *(volatile ULONG*)&Packet->Size;
+ if (PacketSize > TUN_MAX_IP_PACKET_SIZE)
+ goto fatalError;
- RingHead = TUN_RING_WRAP(RingHead + AlignedPacketSize, RingCapacity);
+ ULONG AlignedPacketSize = TUN_ALIGN(sizeof(TUN_PACKET) + PacketSize);
+ if (AlignedPacketSize > RingContentToConsume)
+ goto fatalError;
- ULONG NblFlags;
- USHORT NblProto;
- if (PacketSize >= 20 && Packet->Data[0] >> 4 == 4)
- {
- NblFlags = NDIS_NBL_FLAGS_IS_IPV4;
- NblProto = HTONS(NDIS_ETH_TYPE_IPV4);
- }
- else if (PacketSize >= 40 && Packet->Data[0] >> 4 == 6)
- {
- NblFlags = NDIS_NBL_FLAGS_IS_IPV6;
- NblProto = HTONS(NDIS_ETH_TYPE_IPV6);
- }
- else
- goto skipNbl;
-
- VOID *PacketAddr =
- (UCHAR *)MmGetMdlVirtualAddress(Ctx->Device.Receive.Mdl) + (ULONG)(Packet->Data - (UCHAR *)Ring);
- MDL *Mdl = IoAllocateMdl(PacketAddr, PacketSize, FALSE, FALSE, NULL);
- if (!Mdl)
- goto skipNbl;
- IoBuildPartialMdl(Ctx->Device.Receive.Mdl, Mdl, PacketAddr, PacketSize);
- NET_BUFFER_LIST *Nbl = NdisAllocateNetBufferAndNetBufferList(Ctx->NblPool, 0, 0, Mdl, 0, PacketSize);
- if (!Nbl)
- goto cleanupMdl;
- Nbl->SourceHandle = Ctx->MiniportAdapterHandle;
- NdisSetNblFlag(Nbl, NblFlags);
- NET_BUFFER_LIST_INFO(Nbl, NetBufferListFrameType) = (PVOID)NblProto;
- NET_BUFFER_LIST_STATUS(Nbl) = NDIS_STATUS_SUCCESS;
- TunNblSetOffsetAndMarkActive(Nbl, RingHead);
+ RingHead = TUN_RING_WRAP(RingHead + AlignedPacketSize, RingCapacity);
+
+ NET_BUFFER_LIST** TargetChain;
+ ULONG* NumberOfNblsInTargetChain;
+ ULONG NblFlags;
+ USHORT NblProto;
+ if (PacketSize >= 20 && Packet->Data[0] >> 4 == 4)
+ {
+ NblFlags = NDIS_NBL_FLAGS_IS_IPV4;
+ NblProto = HTONS(NDIS_ETH_TYPE_IPV4);
+ TargetChain = &NblChainV4;
+ NumberOfNblsInTargetChain = &NumberOfNblsV4;
+ }
+ else if (PacketSize >= 40 && Packet->Data[0] >> 4 == 6)
+ {
+ NblFlags = NDIS_NBL_FLAGS_IS_IPV6;
+ NblProto = HTONS(NDIS_ETH_TYPE_IPV6);
+ TargetChain = &NblChainV6;
+ NumberOfNblsInTargetChain = &NumberOfNblsV6;
+ }
+ else
+ break;
+
+ VOID* PacketAddr =
+ (UCHAR*)MmGetMdlVirtualAddress(Ctx->Device.Receive.Mdl) + (ULONG)(Packet->Data - (UCHAR*)Ring);
+ MDL* Mdl = IoAllocateMdl(PacketAddr, PacketSize, FALSE, FALSE, NULL);
+ if (!Mdl)
+ break;
+ IoBuildPartialMdl(Ctx->Device.Receive.Mdl, Mdl, PacketAddr, PacketSize);
+ NET_BUFFER_LIST* Nbl = NdisAllocateNetBufferAndNetBufferList(Ctx->NblPool, 0, 0, Mdl, 0, PacketSize);
+ if (!Nbl)
+ goto skipPacket;
+ Nbl->SourceHandle = Ctx->MiniportAdapterHandle;
+ NdisSetNblFlag(Nbl, NblFlags);
+ NET_BUFFER_LIST_INFO(Nbl, NetBufferListFrameType) = (PVOID)NblProto;
+ NET_BUFFER_LIST_STATUS(Nbl) = NDIS_STATUS_SUCCESS;
+ TunNblSetOffsetAndMarkActive(Nbl, RingHead);
+ if (*TargetChain)
+ {
+ NET_BUFFER_LIST_NEXT_NBL((NET_BUFFER_LIST*)(*TargetChain)->Scratch) = Nbl;
+ NET_BUFFER_LIST_NEXT_NBL_EX((NET_BUFFER_LIST*)(*TargetChain)->Scratch) = Nbl;
+ }
+ else
+ {
+ *TargetChain = Nbl;
+ }
+
+ (*TargetChain)->Scratch = Nbl; // Set LAST_NBL
+ ++(*NumberOfNblsInTargetChain);
+ ConsumedRingContent += AlignedPacketSize;
+ continue;
+
+ skipPacket:
+ IoFreeMdl(Mdl);
+ break;
+ }
+
KIRQL Irql = ExAcquireSpinLockShared(&Ctx->TransitionLock);
if (!ReadAcquire(&Ctx->Running))
- goto cleanupNbl;
+ {
+ ExReleaseSpinLockShared(&Ctx->TransitionLock, Irql);
+ goto notRunning;
+ }
KLOCK_QUEUE_HANDLE LockHandle;
KeAcquireInStackQueuedSpinLock(&Ctx->Device.Receive.Lock, &LockHandle);
- if (Ctx->Device.Receive.ActiveNbls.Head)
- NET_BUFFER_LIST_NEXT_NBL_EX(Ctx->Device.Receive.ActiveNbls.Tail) = Nbl;
- else
- {
- KeClearEvent(&Ctx->Device.Receive.ActiveNbls.Empty);
- Ctx->Device.Receive.ActiveNbls.Head = Nbl;
- }
- Ctx->Device.Receive.ActiveNbls.Tail = Nbl;
+ if(NblChainV4)
+ TunAddNewNblsToReceiveActiveNblsUnsafe(Ctx, NblChainV4, NblChainV4->Scratch);
+ if(NblChainV6)
+ TunAddNewNblsToReceiveActiveNblsUnsafe(Ctx, NblChainV6, NblChainV6->Scratch);
KeReleaseInStackQueuedSpinLock(&LockHandle);
- NdisMIndicateReceiveNetBufferLists(
- Ctx->MiniportAdapterHandle,
- Nbl,
- NDIS_DEFAULT_PORT_NUMBER,
- 1,
- NDIS_RECEIVE_FLAGS_DISPATCH_LEVEL | NDIS_RECEIVE_FLAGS_SINGLE_ETHER_TYPE);
+ if (NblChainV4)
+ {
+ NdisMIndicateReceiveNetBufferLists(
+ Ctx->MiniportAdapterHandle,
+ NblChainV4,
+ NDIS_DEFAULT_PORT_NUMBER,
+ NumberOfNblsV4,
+ NDIS_RECEIVE_FLAGS_DISPATCH_LEVEL | NDIS_RECEIVE_FLAGS_SINGLE_ETHER_TYPE);
+ }
+ if (NblChainV6)
+ {
+ NdisMIndicateReceiveNetBufferLists(
+ Ctx->MiniportAdapterHandle,
+ NblChainV6,
+ NDIS_DEFAULT_PORT_NUMBER,
+ NumberOfNblsV6,
+ NDIS_RECEIVE_FLAGS_DISPATCH_LEVEL | NDIS_RECEIVE_FLAGS_SINGLE_ETHER_TYPE);
+ }
+
ExReleaseSpinLockShared(&Ctx->TransitionLock, Irql);
+
+ if (ConsumedRingContent < RingContentToConsume)
+ {
+ InterlockedIncrementNoFence64((LONG64*)&Ctx->Statistics.ifInDiscards);
+ KeWaitForSingleObject(&Ctx->Device.Receive.ActiveNbls.Empty, Executive, KernelMode, FALSE, NULL);
+ WriteULongRelease(&Ring->Head, RingHead);
+ }
continue;
- cleanupNbl:
- ExReleaseSpinLockShared(&Ctx->TransitionLock, Irql);
- NdisFreeNetBufferList(Nbl);
- cleanupMdl:
- IoFreeMdl(Mdl);
- skipNbl:
- InterlockedIncrementNoFence64((LONG64 *)&Ctx->Statistics.ifInDiscards);
- KeWaitForSingleObject(&Ctx->Device.Receive.ActiveNbls.Empty, Executive, KernelMode, FALSE, NULL);
- WriteULongRelease(&Ring->Head, RingHead);
+ fatalError:
+ InterlockedIncrementNoFence64((LONG64*)&Ctx->Statistics.ifInDiscards);
+ notRunning:
+ TunFreeNblChain(NblChainV4);
+ InterlockedAddNoFence64((LONG64*)&Ctx->Statistics.ifInDiscards, NumberOfNblsV4);
+ TunFreeNblChain(NblChainV6);
+ InterlockedAddNoFence64((LONG64*)&Ctx->Statistics.ifInDiscards, NumberOfNblsV6);
+ break;
}
/* Wait for all NBLs to return: 1. To prevent race between proceeding and invalidating ring head. 2. To have
--
2.36.0.windows.1
More information about the WireGuard
mailing list