[PATCH] driver: indicate all available packets in the ring-buffer to NDIS in two NBL chains, one chain for IPv4 and one for IPv6.

Alexandru Brinduse alexandru.brinduse at pango.co
Mon Apr 3 15:23:43 UTC 2023


NdisMIndicateReceiveNetBufferLists() allows batch processing of chained NBLs.
>From network performance measurements it can be seen that it brings a dramatic increase in download speed.
No chain size limit was imposed due to the fact that the ring-buffer already has a limit. In worst case, not sure how it can be attained in real life, the NBL chain will have the same size as the ringbuffer.
---
 driver/wintun.c | 210 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 147 insertions(+), 63 deletions(-)

diff --git a/driver/wintun.c b/driver/wintun.c
index ad4b16b..64f8e1b 100644
--- a/driver/wintun.c
+++ b/driver/wintun.c
@@ -246,6 +246,35 @@ TunNblIsCompleted(_In_ NET_BUFFER_LIST *Nbl)
     return (ULONG_PTR)(NET_BUFFER_LIST_MINIPORT_RESERVED(Nbl)[0]) & 1;
 }
 
+static VOID
+TunFreeNblChain(_In_ NET_BUFFER_LIST* Nbl)
+{
+    while (Nbl)
+    {
+        NET_BUFFER_LIST* nextNbl = NET_BUFFER_LIST_NEXT_NBL(Nbl);
+        IoFreeMdl(NET_BUFFER_LIST_FIRST_NB(Nbl)->MdlChain);
+        NdisFreeNetBufferList(Nbl);
+        Nbl = nextNbl;
+    }
+}
+
+static VOID
+TunAddNewNblsToReceiveActiveNblsUnsafe(
+    _Inout_ TUN_CTX *Ctx,
+    _In_ NET_BUFFER_LIST* Head, 
+    _In_ NET_BUFFER_LIST* Tail
+)
+{
+    if (Ctx->Device.Receive.ActiveNbls.Head)
+        NET_BUFFER_LIST_NEXT_NBL_EX(Ctx->Device.Receive.ActiveNbls.Tail) = Head;
+    else
+    {
+        KeClearEvent(&Ctx->Device.Receive.ActiveNbls.Empty);
+        Ctx->Device.Receive.ActiveNbls.Head = Head;
+    }
+    Ctx->Device.Receive.ActiveNbls.Tail = Tail;
+}
+
 static MINIPORT_SEND_NET_BUFFER_LISTS TunSendNetBufferLists;
 _Use_decl_annotations_
 static VOID
@@ -491,89 +520,144 @@ TunProcessReceiveData(_Inout_ TUN_CTX *Ctx)
                 KeClearEvent(Ctx->Device.Receive.TailMoved);
             }
         }
+
         if (RingTail >= RingCapacity)
             break;
 
-        ULONG RingContent = TUN_RING_WRAP(RingTail - RingHead, RingCapacity);
-        if (RingContent < sizeof(TUN_PACKET))
+        ULONG RingContentToConsume = TUN_RING_WRAP(RingTail - RingHead, RingCapacity);
+        if (RingContentToConsume < sizeof(TUN_PACKET))
             break;
+        ULONG ConsumedRingContent = 0;
 
-        TUN_PACKET *Packet = (TUN_PACKET *)(Ring->Data + RingHead);
-        ULONG PacketSize = *(volatile ULONG *)&Packet->Size;
-        if (PacketSize > TUN_MAX_IP_PACKET_SIZE)
-            break;
+        NET_BUFFER_LIST* NblChainV4 = NULL, *NblChainV6 = NULL;
+        ULONG NumberOfNblsV4 = 0, NumberOfNblsV6 = 0;
 
-        ULONG AlignedPacketSize = TUN_ALIGN(sizeof(TUN_PACKET) + PacketSize);
-        if (AlignedPacketSize > RingContent)
-            break;
+        /* We'll build two NBL chains for IPv4 and IPv6 with all available packets 
+            (they are sepparted so we can use NDIS_RECEIVE_FLAGS_SINGLE_ETHER_TYPE)
+        Network performance measurements indicate that the download speed is drastically increased 
+        if multiple NBLs are indicated to NDIS in a single call */
+        while (ConsumedRingContent < RingContentToConsume)
+        {
+            TUN_PACKET* Packet = (TUN_PACKET*)(Ring->Data + RingHead);
+            ULONG PacketSize = *(volatile ULONG*)&Packet->Size;
+            if (PacketSize > TUN_MAX_IP_PACKET_SIZE)
+                goto fatalError;
 
-        RingHead = TUN_RING_WRAP(RingHead + AlignedPacketSize, RingCapacity);
+            ULONG AlignedPacketSize = TUN_ALIGN(sizeof(TUN_PACKET) + PacketSize);
+            if (AlignedPacketSize > RingContentToConsume)
+                goto fatalError;
 
-        ULONG NblFlags;
-        USHORT NblProto;
-        if (PacketSize >= 20 && Packet->Data[0] >> 4 == 4)
-        {
-            NblFlags = NDIS_NBL_FLAGS_IS_IPV4;
-            NblProto = HTONS(NDIS_ETH_TYPE_IPV4);
-        }
-        else if (PacketSize >= 40 && Packet->Data[0] >> 4 == 6)
-        {
-            NblFlags = NDIS_NBL_FLAGS_IS_IPV6;
-            NblProto = HTONS(NDIS_ETH_TYPE_IPV6);
-        }
-        else
-            goto skipNbl;
-
-        VOID *PacketAddr =
-            (UCHAR *)MmGetMdlVirtualAddress(Ctx->Device.Receive.Mdl) + (ULONG)(Packet->Data - (UCHAR *)Ring);
-        MDL *Mdl = IoAllocateMdl(PacketAddr, PacketSize, FALSE, FALSE, NULL);
-        if (!Mdl)
-            goto skipNbl;
-        IoBuildPartialMdl(Ctx->Device.Receive.Mdl, Mdl, PacketAddr, PacketSize);
-        NET_BUFFER_LIST *Nbl = NdisAllocateNetBufferAndNetBufferList(Ctx->NblPool, 0, 0, Mdl, 0, PacketSize);
-        if (!Nbl)
-            goto cleanupMdl;
-        Nbl->SourceHandle = Ctx->MiniportAdapterHandle;
-        NdisSetNblFlag(Nbl, NblFlags);
-        NET_BUFFER_LIST_INFO(Nbl, NetBufferListFrameType) = (PVOID)NblProto;
-        NET_BUFFER_LIST_STATUS(Nbl) = NDIS_STATUS_SUCCESS;
-        TunNblSetOffsetAndMarkActive(Nbl, RingHead);
+            RingHead = TUN_RING_WRAP(RingHead + AlignedPacketSize, RingCapacity);
+
+            NET_BUFFER_LIST** TargetChain;
+            ULONG* NumberOfNblsInTargetChain;
+            ULONG NblFlags;
+            USHORT NblProto;
+            if (PacketSize >= 20 && Packet->Data[0] >> 4 == 4)
+            {
+                NblFlags = NDIS_NBL_FLAGS_IS_IPV4;
+                NblProto = HTONS(NDIS_ETH_TYPE_IPV4);
+                TargetChain = &NblChainV4;
+                NumberOfNblsInTargetChain = &NumberOfNblsV4;
+            }
+            else if (PacketSize >= 40 && Packet->Data[0] >> 4 == 6)
+            {
+                NblFlags = NDIS_NBL_FLAGS_IS_IPV6;
+                NblProto = HTONS(NDIS_ETH_TYPE_IPV6);
+                TargetChain = &NblChainV6;
+                NumberOfNblsInTargetChain = &NumberOfNblsV6;
+            }
+            else
+                break;
+
+            VOID* PacketAddr =
+                (UCHAR*)MmGetMdlVirtualAddress(Ctx->Device.Receive.Mdl) + (ULONG)(Packet->Data - (UCHAR*)Ring);
+            MDL* Mdl = IoAllocateMdl(PacketAddr, PacketSize, FALSE, FALSE, NULL);
+            if (!Mdl)
+                break;
+            IoBuildPartialMdl(Ctx->Device.Receive.Mdl, Mdl, PacketAddr, PacketSize);
+            NET_BUFFER_LIST* Nbl = NdisAllocateNetBufferAndNetBufferList(Ctx->NblPool, 0, 0, Mdl, 0, PacketSize);
+            if (!Nbl)
+                goto skipPacket;
+            Nbl->SourceHandle = Ctx->MiniportAdapterHandle;
+            NdisSetNblFlag(Nbl, NblFlags);
+            NET_BUFFER_LIST_INFO(Nbl, NetBufferListFrameType) = (PVOID)NblProto;
+            NET_BUFFER_LIST_STATUS(Nbl) = NDIS_STATUS_SUCCESS;
+            TunNblSetOffsetAndMarkActive(Nbl, RingHead);
 
+            if (*TargetChain)
+            {
+                NET_BUFFER_LIST_NEXT_NBL((NET_BUFFER_LIST*)(*TargetChain)->Scratch) = Nbl;
+                NET_BUFFER_LIST_NEXT_NBL_EX((NET_BUFFER_LIST*)(*TargetChain)->Scratch) = Nbl;
+            }
+            else
+            {
+                *TargetChain = Nbl;
+            }
+
+            (*TargetChain)->Scratch = Nbl; // Set LAST_NBL
+            ++(*NumberOfNblsInTargetChain);
+            ConsumedRingContent += AlignedPacketSize;
+            continue;
+
+        skipPacket:
+            IoFreeMdl(Mdl);
+            break;
+        }
+       
         KIRQL Irql = ExAcquireSpinLockShared(&Ctx->TransitionLock);
         if (!ReadAcquire(&Ctx->Running))
-            goto cleanupNbl;
+        {
+            ExReleaseSpinLockShared(&Ctx->TransitionLock, Irql);
+            goto notRunning;
+        }
 
         KLOCK_QUEUE_HANDLE LockHandle;
         KeAcquireInStackQueuedSpinLock(&Ctx->Device.Receive.Lock, &LockHandle);
-        if (Ctx->Device.Receive.ActiveNbls.Head)
-            NET_BUFFER_LIST_NEXT_NBL_EX(Ctx->Device.Receive.ActiveNbls.Tail) = Nbl;
-        else
-        {
-            KeClearEvent(&Ctx->Device.Receive.ActiveNbls.Empty);
-            Ctx->Device.Receive.ActiveNbls.Head = Nbl;
-        }
-        Ctx->Device.Receive.ActiveNbls.Tail = Nbl;
+        if(NblChainV4)
+            TunAddNewNblsToReceiveActiveNblsUnsafe(Ctx, NblChainV4, NblChainV4->Scratch);
+        if(NblChainV6)
+            TunAddNewNblsToReceiveActiveNblsUnsafe(Ctx, NblChainV6, NblChainV6->Scratch);
         KeReleaseInStackQueuedSpinLock(&LockHandle);
 
-        NdisMIndicateReceiveNetBufferLists(
-            Ctx->MiniportAdapterHandle,
-            Nbl,
-            NDIS_DEFAULT_PORT_NUMBER,
-            1,
-            NDIS_RECEIVE_FLAGS_DISPATCH_LEVEL | NDIS_RECEIVE_FLAGS_SINGLE_ETHER_TYPE);
+        if (NblChainV4)
+        {
+            NdisMIndicateReceiveNetBufferLists(
+                Ctx->MiniportAdapterHandle,
+                NblChainV4,
+                NDIS_DEFAULT_PORT_NUMBER,
+                NumberOfNblsV4,
+                NDIS_RECEIVE_FLAGS_DISPATCH_LEVEL | NDIS_RECEIVE_FLAGS_SINGLE_ETHER_TYPE);
+        }
 
+        if (NblChainV6)
+        {
+            NdisMIndicateReceiveNetBufferLists(
+                Ctx->MiniportAdapterHandle,
+                NblChainV6,
+                NDIS_DEFAULT_PORT_NUMBER,
+                NumberOfNblsV6,
+                NDIS_RECEIVE_FLAGS_DISPATCH_LEVEL | NDIS_RECEIVE_FLAGS_SINGLE_ETHER_TYPE);
+        }
+        
         ExReleaseSpinLockShared(&Ctx->TransitionLock, Irql);
+
+        if (ConsumedRingContent < RingContentToConsume)
+        {
+            InterlockedIncrementNoFence64((LONG64*)&Ctx->Statistics.ifInDiscards);
+            KeWaitForSingleObject(&Ctx->Device.Receive.ActiveNbls.Empty, Executive, KernelMode, FALSE, NULL);
+            WriteULongRelease(&Ring->Head, RingHead);
+        }
         continue;
 
-    cleanupNbl:
-        ExReleaseSpinLockShared(&Ctx->TransitionLock, Irql);
-        NdisFreeNetBufferList(Nbl);
-    cleanupMdl:
-        IoFreeMdl(Mdl);
-    skipNbl:
-        InterlockedIncrementNoFence64((LONG64 *)&Ctx->Statistics.ifInDiscards);
-        KeWaitForSingleObject(&Ctx->Device.Receive.ActiveNbls.Empty, Executive, KernelMode, FALSE, NULL);
-        WriteULongRelease(&Ring->Head, RingHead);
+    fatalError:
+        InterlockedIncrementNoFence64((LONG64*)&Ctx->Statistics.ifInDiscards);
+    notRunning:
+        TunFreeNblChain(NblChainV4);
+        InterlockedAddNoFence64((LONG64*)&Ctx->Statistics.ifInDiscards, NumberOfNblsV4);
+        TunFreeNblChain(NblChainV6);
+        InterlockedAddNoFence64((LONG64*)&Ctx->Statistics.ifInDiscards, NumberOfNblsV6);
+        break;
     }
 
     /* Wait for all NBLs to return: 1. To prevent race between proceeding and invalidating ring head. 2. To have
-- 
2.36.0.windows.1



More information about the WireGuard mailing list