Code Diff
diff --git a/pkg/proxy/winkernel/hns.go b/pkg/proxy/winkernel/hns.go
index 4ce7194ba2d77..44f00d471e752 100644
--- a/pkg/proxy/winkernel/hns.go
+++ b/pkg/proxy/winkernel/hns.go
@@ -31,7 +31,10 @@ import (
type HostNetworkService interface {
getNetworkByName(name string) (*hnsNetworkInfo, error)
- getAllEndpointsByNetwork(networkName string) (map[string]*endpointInfo, error)
+ // Returns a map of endpoints keyed by both endpoint ID and IP address for all endpoints on the specified network, and a map of remote endpoints with duplicate IPs to be deleted.
+ getAllEndpointsByNetwork(networkName string) (map[string]*endpointInfo, map[string]bool, error)
+ // deleteAllRemoteEndpointsWithDupIP deletes all remote endpoints with duplicate IPs that were found in getAllEndpointsByNetwork. This is needed to clean up stale remote endpoints that can be left behind due to a Windows bug.
+ deleteAllRemoteEndpointsWithDupIP(remoteEPsWithDupIP map[string]bool)
getEndpointByID(id string) (*endpointInfo, error)
getEndpointByIpAddress(ip string, networkName string) (*endpointInfo, error)
getEndpointByName(id string) (*endpointInfo, error)
@@ -114,17 +117,20 @@ func (hns hns) getNetworkByName(name string) (*hnsNetworkInfo, error) {
}, nil
}
-func (hns hns) getAllEndpointsByNetwork(networkName string) (map[string]*(endpointInfo), error) {
+func (hns hns) getAllEndpointsByNetwork(networkName string) (map[string]*(endpointInfo), map[string]bool, error) {
hcnnetwork, err := hns.hcn.GetNetworkByName(networkName)
if err != nil {
klog.ErrorS(err, "failed to get HNS network by name", "name", networkName)
- return nil, err
+ return nil, nil, err
}
endpoints, err := hns.hcn.ListEndpointsOfNetwork(hcnnetwork.Id)
if err != nil {
- return nil, fmt.Errorf("failed to list endpoints: %w", err)
+ return nil, nil, fmt.Errorf("failed to list endpoints: %w", err)
}
+
endpointInfos := make(map[string]*(endpointInfo))
+ remoteEPsWithDupIP := make(map[string]bool)
+
for _, ep := range endpoints {
if len(ep.IpConfigurations) == 0 {
@@ -142,14 +148,22 @@ func (hns hns) getAllEndpointsByNetwork(networkName string) (map[string]*(endpoi
break
}
- isLocal := uint32(ep.Flags&hcn.EndpointFlagsRemoteEndpoint) == 0
-
- if existingEp, ok := endpointInfos[ipConfig.IpAddress]; ok && isLocal {
- // If the endpoint is already part of the queried endpoints map and is local,
- // then we should not add it again to the map
- // This is to avoid overwriting the remote endpoint info with a local endpoint.
- klog.V(3).InfoS("Endpoint already exists in queried endpoints map; skipping.", "newLocalEndpoint", ep, "ipConfig", ipConfig, "existingEndpoint", existingEp)
- continue
+ curEpIsLocal := uint32(ep.Flags&hcn.EndpointFlagsRemoteEndpoint) == 0
+
+ if existingEp, ok := endpointInfos[ipConfig.IpAddress]; ok {
+ if curEpIsLocal && !existingEp.isLocal {
+ // Local found, stale remote in map → delete remote from HNS, overwrite
+ remoteEPsWithDupIP[existingEp.hnsID] = true
+ delete(endpointInfos, existingEp.hnsID)
+ delete(endpointInfos, existingEp.ip)
+ // fall through to add local
+ } else if !curEpIsLocal && existingEp.isLocal {
+ // Local already in map, remote arriving → delete remote from HNS, skip
+ remoteEPsWithDupIP[ep.Id] = true
+ continue
+ } else {
+ continue // same type, keep existing
+ }
}
// Add to map with key endpoint ID or IP address
@@ -157,7 +171,7 @@ func (hns hns) getAllEndpointsByNetwork(networkName string) (map[string]*(endpoi
// TODO: Store by IP only and remove any lookups by endpoint ID.
epInfo := &endpointInfo{
ip: ipConfig.IpAddress,
- isLocal: isLocal,
+ isLocal: curEpIsLocal,
macAddress: ep.MacAddress,
hnsID: ep.Id,
hns: hns,
@@ -172,7 +186,17 @@ func (hns hns) getAllEndpointsByNetwork(networkName string) (map[string]*(endpoi
}
klog.V(3).InfoS("Queried endpoints from network", "network", networkName, "count", len(endpointInfos))
klog.V(5).InfoS("Queried endpoints details", "network", networkName, "endpointInfos", endpointInfos)
- return endpointInfos, nil
+ return endpointInfos, remoteEPsWithDupIP, nil
+}
+
+func (hns hns) deleteAllRemoteEndpointsWithDupIP(remoteEPsWithDupIP map[string]bool) {
+ for hnsID := range remoteEPsWithDupIP {
+ klog.V(3).InfoS("Deleting stale remote endpoint with duplicate IP", "hnsID", hnsID)
+ err := hns.deleteEndpoint(hnsID)
+ if err != nil {
+ klog.ErrorS(err, "Failed to delete stale remote endpoint with duplicate IP", "hnsID", hnsID)
+ }
+ }
}
func (hns hns) getEndpointByID(id string) (*endpointInfo, error) {
diff --git a/pkg/proxy/winkernel/hns_test.go b/pkg/proxy/winkernel/hns_test.go
index 4b8c798750a4f..069125477b08d 100644
--- a/pkg/proxy/winkernel/hns_test.go
+++ b/pkg/proxy/winkernel/hns_test.go
@@ -94,7 +94,7 @@ func TestGetAllEndpointsByNetwork(t *testing.T) {
t.Error(err)
}
- mapEndpointsInfo, err := hns.getAllEndpointsByNetwork(Network.Name)
+ mapEndpointsInfo, _, err := hns.getAllEndpointsByNetwork(Network.Name)
if err != nil {
t.Error(err)
}
@@ -156,24 +156,25 @@ func TestGetAllEndpointsByNetworkWithDupEP(t *testing.T) {
t.Error(err)
}
- mapEndpointsInfo, err := hns.getAllEndpointsByNetwork(Network.Name)
+ mapEndpointsInfo, remoteEPsWithDupIP, err := hns.getAllEndpointsByNetwork(Network.Name)
if err != nil {
t.Error(err)
}
+ hns.deleteAllRemoteEndpointsWithDupIP(remoteEPsWithDupIP)
endpointIpv4, ipv4EpPresent := mapEndpointsInfo[ipv4Config.IpAddress]
assert.True(t, ipv4EpPresent, "IPV4 endpoint is missing in Dualstack mode")
assert.Equal(t, endpointIpv4.ip, epIpAddress, "IPV4 IP is missing in Dualstack mode")
- assert.Equal(t, endpointIpv4.hnsID, remoteEndpoint.Id, "HNS ID is not matching with remote endpoint")
+ assert.Equal(t, endpointIpv4.hnsID, dupLocalEndpoint.Id, "HNS ID is not matching with local endpoint")
endpointIpv6, ipv6EpPresent := mapEndpointsInfo[ipv6Config.IpAddress]
assert.True(t, ipv6EpPresent, "IPV6 endpoint is missing in Dualstack mode")
assert.Equal(t, endpointIpv6.ip, epIpv6Address, "IPV6 IP is missing in Dualstack mode")
- assert.Equal(t, endpointIpv6.hnsID, remoteEndpoint.Id, "HNS ID is not matching with remote endpoint")
+ assert.Equal(t, endpointIpv6.hnsID, dupLocalEndpoint.Id, "HNS ID is not matching with local endpoint")
- err = hns.hcn.DeleteEndpoint(remoteEndpoint)
- if err != nil {
- t.Error(err)
- }
+ remoteEpExists, _ := hns.hcn.GetEndpointByID(remoteEndpoint.Id)
+ assert.Nil(t, remoteEpExists, "Remote endpoint with duplicate IP should have been deleted")
+
+ // Clean up the duplicate local endpoint
err = hns.hcn.DeleteEndpoint(dupLocalEndpoint)
if err != nil {
t.Error(err)
diff --git a/pkg/proxy/winkernel/proxier.go b/pkg/proxy/winkernel/proxier.go
index 1fb2c25a433ad..98fddb2650848 100644
--- a/pkg/proxy/winkernel/proxier.go
+++ b/pkg/proxy/winkernel/proxier.go
@@ -155,7 +155,8 @@ type remoteSubnetInfo struct {
}
const (
- NETWORK_TYPE_OVERLAY = "overlay"
+ NETWORK_TYPE_OVERLAY = "overlay"
+ NETWORK_TYPE_L2BRIDGE = "L2Bridge"
// MAX_COUNT_STALE_LOADBALANCERS is the maximum number of stale loadbalancers which cleanedup in single syncproxyrules.
// If there are more stale loadbalancers to clean, it will go to next iteration of syncproxyrules.
MAX_COUNT_STALE_LOADBALANCERS = 20
@@ -1170,7 +1171,9 @@ func (proxier *Proxier) syncProxyRules() (retryError error) {
_ = proxier.endpointsMap.Update(proxier.endpointsChanges)
// Query HNS for endpoints and load balancers
- queriedEndpoints, err := hns.getAllEndpointsByNetwork(hnsNetworkName)
+ queriedEndpoints, remoteEPsWithDupIP, err := hns.getAllEndpointsByNetwork(hnsNetworkName)
+ defer hns.deleteAllRemoteEndpointsWithDupIP(remoteEPsWithDupIP)
+
if err != nil {
klog.ErrorS(err, "Querying HNS for endpoints failed")
return
@@ -1715,23 +1718,30 @@ func (proxier *Proxier) syncProxyRules() (retryError error) {
}
// remove stale endpoint refcount entries
+ proxier.deleteTerminatedEndpoints(queriedEndpoints)
+
+ // This will cleanup stale load balancers which are pending delete
+ // in last iteration
+ proxier.cleanupStaleLoadbalancers()
+ return
+}
+
+func (proxier *Proxier) deleteTerminatedEndpoints(queriedEndpoints map[string]*(endpointInfo)) {
for epIP := range proxier.terminatedEndpoints {
klog.V(5).InfoS("Terminated endpoints ready for deletion", "epIP", epIP)
if epToDelete := queriedEndpoints[epIP]; epToDelete != nil && epToDelete.hnsID != "" && !epToDelete.IsLocal() {
- if refCount := proxier.endPointsRefCount.getRefCount(epToDelete.hnsID); refCount == nil || *refCount == 0 {
- err := proxier.hns.deleteEndpoint(epToDelete.hnsID)
- if err != nil {
+ refCount := proxier.endPointsRefCount.getRefCount(epToDelete.hnsID)
+ if refCount == nil || *refCount == 0 {
+ if err := proxier.hns.deleteEndpoint(epToDelete.hnsID); err != nil {
klog.ErrorS(err, "Deleting unreferenced remote endpoint failed", "hnsID", epToDelete.hnsID)
} else {
klog.V(3).InfoS("Deleting unreferenced remote endpoint succeeded", "hnsID", epToDelete.hnsID, "IP", epToDelete.ip)
}
+ } else {
+ klog.V(3).InfoS("Not deleting remote endpoint as it is still referenced", "hnsID", epToDelete.hnsID, "IP", epToDelete.ip, "refCount", refCount)
}
}
}
- // This will cleanup stale load balancers which are pending delete
- // in last iteration
- proxier.cleanupStaleLoadbalancers()
- return
}
// deleteExistingLoadBalancer checks whether loadbalancer delete is needed or not.
diff --git a/pkg/proxy/winkernel/proxier_test.go b/pkg/proxy/winkernel/proxier_test.go
index 5a4e9887ca279..dace6e9fb7cd7 100644
--- a/pkg/proxy/winkernel/proxier_test.go
+++ b/pkg/proxy/winkernel/proxier_test.go
@@ -2003,3 +2003,674 @@ type testHostMacProvider struct {
func (r *testHostMacProvider) GetHostMac(nodeIP net.IP) string {
return r.macAddress
}
+
+// TestRemoteAndLocalEndpointsSameIP demonstrates a reference counting issue
+// when two services share an endpoint with the same IP address, where one
+// service treats it as local (NodeName matches proxy hostname) and the other
+// treats it as remote (NodeName doesn't match). The remote proxy endpoint
+// resolves to the local HNS endpoint, causing its refCount to never be
+// incremented via the shared endPointsRefCount map.
+func TestRemoteAndLocalEndpointsSameIP(t *testing.T) {
+ proxier := NewFakeProxier(t, testNodeName, netutils.ParseIPSloppy("10.0.0.1"), NETWORK_TYPE_L2BRIDGE, false)
+ if proxier == nil {
+ t.Error("Failed to create proxier")
+ }
+
+ sharedEPIP := epIpAddressLocal1 // "192.168.4.4" — same IP for both services
+
+ svcIP1 := "10.20.30.41"
+ svcPort1 := 80
+ svcPortName1 := proxy.ServicePortName{
+ NamespacedName: makeNSN("ns1", "svc1"),
+ Port: "p80",
+ Protocol: v1.ProtocolTCP,
+ }
+
+ svcIP2 := "10.20.30.42"
+ svcPort2 := 80
+ svcPortName2 := proxy.ServicePortName{
+ NamespacedName: makeNSN("ns1", "svc2"),
+ Port: "p80",
+ Protocol: v1.ProtocolTCP,
+ }
+
+ makeServiceMap(proxier,
+ // svc1 uses the endpoint as LOCAL
+ makeTestService(svcPortName1.Namespace, svcPortName1.Name, func(svc *v1.Service) {
+ svc.Spec.Type = v1.ServiceTypeClusterIP
+ svc.Spec.ClusterIP = svcIP1
+ svc.Spec.Ports = []v1.ServicePort{{
+ Name: svcPortName1.Port,
+ Port: int32(svcPort1),
+ Protocol: v1.ProtocolTCP,
+ }}
+ }),
+ // svc2 uses the endpoint as REMOTE
+ makeTestService(svcPortName2.Namespace, svcPortName2.Name, func(svc *v1.Service) {
+ svc.Spec.Type = v1.ServiceTypeClusterIP
+ svc.Spec.ClusterIP = svcIP2
+ svc.Spec.Ports = []v1.ServicePort{{
+ Name: svcPortName2.Port,
+ Port: int32(svcPort2),
+ Protocol: v1.ProtocolTCP,
+ }}
+ }),
+ )
+
+ populateEndpointSlices(proxier,
+ // svc1's endpoint: local (NodeName = "testhost" matches proxy hostname)
+ makeTestEndpointSlice(svcPortName1.Namespace, svcPortName1.Name, 1, func(eps *discovery.EndpointSlice) {
+ eps.AddressType = discovery.AddressTypeIPv4
+ eps.Endpoints = []discovery.Endpoint{{
+ Addresses: []string{sharedEPIP},
+ NodeName: ptr.To(testNodeName),
+ }}
+ eps.Ports = []discovery.EndpointPort{{
+ Name: ptr.To(svcPortName1.Port),
+ Port: ptr.To(int32(svcPort1)),
+ Protocol: ptr.To(v1.ProtocolTCP),
+ }}
+ }),
+ // svc2's endpoint: remote (NodeName = "testhost2" doesn't match proxy hostname)
+ makeTestEndpointSlice(svcPortName2.Namespace, svcPortName2.Name, 1, func(eps *discovery.EndpointSlice) {
+ eps.AddressType = discovery.AddressTypeIPv4
+ eps.Endpoints = []discovery.Endpoint{{
+ Addresses: []string{sharedEPIP},
+ NodeName: ptr.To("testhost2"),
+ }}
+ eps.Ports = []discovery.EndpointPort{{
+ Name: ptr.To(svcPortName2.Port),
+ Port: ptr.To(int32(svcPort2)),
+ Protocol: ptr.To(v1.ProtocolTCP),
+ }}
+ }),
+ )
+
+ // Pre-populate the local HNS endpoint at sharedEPIP (as CNI would create it)
+ hcnMock := (proxier.hcn).(*fakehcn.HcnMock)
+ hcnMock.PopulateQueriedEndpoints(endpointLocal1, networkId, sharedEPIP, macAddressLocal1, prefixLen)
+
+ proxier.setInitialized(true)
+ proxier.syncProxyRules()
+
+ // Find each service's endpoint
+ var localEp, remoteEp *endpointInfo
+ for _, ep := range proxier.endpointsMap[svcPortName1] {
+ if epI, ok := ep.(*endpointInfo); ok && epI.ip == sharedEPIP {
+ localEp = epI
+ }
+ }
+ for _, ep := range proxier.endpointsMap[svcPortName2] {
+ if epI, ok := ep.(*endpointInfo); ok && epI.ip == sharedEPIP {
+ remoteEp = epI
+ }
+ }
+
+ assert.NotNil(t, localEp, "Expected to find local endpoint for svc1")
+ assert.NotNil(t, remoteEp, "Expected to find remote endpoint for svc2")
+
+ // Both should resolve to the same local HNS endpoint
+ assert.Equal(t, endpointLocal1, localEp.hnsID,
+ "Local ep should have the pre-populated HNS endpoint ID")
+ assert.Equal(t, endpointLocal1, remoteEp.hnsID,
+ "Remote ep should resolve to the same local HNS endpoint ID")
+
+ // Verify the endpoint locality as seen by the proxy layer
+ assert.True(t, localEp.IsLocal(), "svc1's endpoint should be local")
+ assert.False(t, remoteEp.IsLocal(), "svc2's endpoint should be remote")
+
+ // The remote ep's refCount was never incremented via endPointsRefCount
+ // because the resolved HNS endpoint is local (newHnsEndpoint.IsLocal()=true),
+ // so the code took the hnsLocalEndpoints branch instead of incrementing the
+ // shared refCount. The remote ep retains its private refCount (value 0).
+ assert.NotNil(t, remoteEp.refCount, "Remote ep refCount pointer should not be nil")
+ assert.Equal(t, uint16(0), *remoteEp.refCount,
+ "Remote ep refCount should be 0 — it was never incremented because "+
+ "the HNS endpoint is local, exposing a refCount tracking gap")
+
+ // The shared endPointsRefCount map should not have an entry for this
+ // HNS endpoint (or if it does from som
... [truncated]