Commit b1b9aa27 authored by Natanael Copa's avatar Natanael Copa

main/linux-grsec: ipsec improvement patches

backport of net-next to improve ipsec send performance
parent 83243e0d
From 5ea3677e1f26dd343ed139d2bdad23ae2f1393db Mon Sep 17 00:00:00 2001
From: Timo Teras <timo.teras@iki.fi>
Date: Mon, 12 Apr 2010 13:43:01 +0000
Subject: [PATCH 01/18] grsec: revert conflicting flow cache changes
---
net/core/flow.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/net/core/flow.c b/net/core/flow.c
index 5b27992..9601587 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -39,7 +39,7 @@ atomic_t flow_cache_genid = ATOMIC_INIT(0);
static u32 flow_hash_shift;
#define flow_hash_size (1 << flow_hash_shift)
-static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables);
+static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
#define flow_table(cpu) (per_cpu(flow_tables, cpu))
@@ -52,7 +52,7 @@ struct flow_percpu_info {
u32 hash_rnd;
int count;
};
-static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info);
+static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 };
#define flow_hash_rnd_recalc(cpu) \
(per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
@@ -69,7 +69,7 @@ struct flow_flush_info {
atomic_t cpuleft;
struct completion completion;
};
-static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets);
+static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL };
#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
--
1.7.0.2
From 9082391046940c410eac3bad065c8701998b5cab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Wed, 3 Mar 2010 04:01:13 +0000
Subject: [PATCH 02/18] gre: fix hard header destination address checking
ipgre_header() can be called with zero daddr when the gre device is
configured as multipoint tunnel and still has the NOARP flag set (which is
typically cleared by the userspace arp daemon). If the NOARP packets are
not dropped, ipgre_tunnel_xmit() will take rt->rt_gateway (= NBMA IP) and
use that for route look up (and may lead to bogus xfrm acquires).
The multicast address check is removed as sending to multicast group should
be ok. In fact, if gre device has a multicast address as destination
ipgre_header is always called with multicast address.
Signed-off-by: Timo Teras <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 6d55cb91a0020ac0d78edcad61efd6c8cf5785a3)
---
net/ipv4/ip_gre.c | 7 ++-----
1 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 1433338..ac88ce5 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1137,12 +1137,9 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
if (saddr)
memcpy(&iph->saddr, saddr, 4);
-
- if (daddr) {
+ if (daddr)
memcpy(&iph->daddr, daddr, 4);
- return t->hlen;
- }
- if (iph->daddr && !ipv4_is_multicast(iph->daddr))
+ if (iph->daddr)
return t->hlen;
return -t->hlen;
--
1.7.0.2
--- linux-2.6.32/net/ipv4/ip_gre.c.orig
+++ linux-2.6.32/net/ipv4/ip_gre.c
@@ -803,11 +803,13 @@
From cd0e9d08480e1e0648e17d099ecf50f6fd8714e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Sat, 20 Mar 2010 02:27:58 +0000
Subject: [PATCH 03/18] ip_gre: include route header_len in max_headroom calculation
Taking route's header_len into account, and updating gre device
needed_headroom will give better hints on upper bound of required
headroom. This is useful if the gre traffic is xfrm'ed.
Signed-off-by: Timo Teras <timo.teras@iki.fi>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 243aad830e8a4cdda261626fbaeddde16b08d04a)
---
net/ipv4/ip_gre.c | 4 +++-
1 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ac88ce5..7f1ff73 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -803,11 +803,13 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
tunnel->err_count = 0;
}
......@@ -15,3 +34,6 @@
if (!new_skb) {
ip_rt_put(rt);
stats->tx_dropped++;
--
1.7.0.2
From 8a0e3ea4924059a7268446177d6869e3399adbb2 Mon Sep 17 00:00:00 2001
From: Timo Teras <timo.teras@iki.fi>
Date: Mon, 12 Apr 2010 13:46:45 +0000
Subject: [PATCH 04/18] arp: flush arp cache on device change
If IFF_NOARP is changed, we must flush the arp cache.
Signed-off-by: Timo Teras <timo.teras@iki.fi>
---
net/ipv4/arp.c | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index c95cd93..71ab56f 100644
index 4e80f33..580bfc3 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1200,6 +1200,9 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo
......@@ -12,3 +24,6 @@ index c95cd93..71ab56f 100644
default:
break;
}
--
1.7.0.2
From 78f1cd02457252e1ffbc6caa44a17424a45286b8 Mon Sep 17 00:00:00 2001
From 89f350c4ec426b4c1db6ef269546940365d918e1 Mon Sep 17 00:00:00 2001
From: Francois Romieu <romieu@fr.zoreil.com>
Date: Sat, 27 Mar 2010 19:35:46 -0700
Subject: [PATCH] r8169: fix broken register writes
Subject: [PATCH 05/18] r8169: fix broken register writes
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This is quite similar to b39fe41f481d20c201012e4483e76c203802dda7
......@@ -14,19 +14,20 @@ level before being merged into a 64 bit logical entity.
Credits go to Ben Hutchings <ben@decadent.org.uk> for the MAR
registers (aka "multicast is broken for ages on ARM) and to
Timo Teräs <timo.teras@iki.fi> for the MAC registers.
Timo Teräs <timo.teras@iki.fi> for the MAC registers.
Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 78f1cd02457252e1ffbc6caa44a17424a45286b8)
---
drivers/net/r8169.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index b93fd23..7193afc 100644
index 0fe2fc9..24599b5 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -2820,8 +2820,8 @@ static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
@@ -2827,8 +2827,8 @@ static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
spin_lock_irq(&tp->lock);
RTL_W8(Cfg9346, Cfg9346_Unlock);
......@@ -36,7 +37,7 @@ index b93fd23..7193afc 100644
RTL_W8(Cfg9346, Cfg9346_Lock);
spin_unlock_irq(&tp->lock);
@@ -4747,8 +4747,8 @@ static void rtl_set_rx_mode(struct net_device *dev)
@@ -4795,8 +4795,8 @@ static void rtl_set_rx_mode(struct net_device *dev)
mc_filter[1] = swab32(data);
}
......@@ -47,5 +48,5 @@ index b93fd23..7193afc 100644
RTL_W32(RxConfig, tmp);
--
1.7.0.3
1.7.0.2
From c0cd884af045338476b8e69a61fceb3f34ff22f1 Mon Sep 17 00:00:00 2001
From a60cfaf3df9cd0cddbc24695434ed5bfa917d505 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@redhat.com>
Date: Mon, 29 Mar 2010 13:16:02 -0700
Subject: [PATCH] r8169: offical fix for CVE-2009-4537 (overlength frame DMAs)
Subject: [PATCH 06/18] r8169: offical fix for CVE-2009-4537 (overlength frame DMAs)
Official patch to fix the r8169 frame length check error.
......@@ -48,15 +48,16 @@ such that performance is restored easily.
Signed-off-by: Neil Horman <nhorman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit c0cd884af045338476b8e69a61fceb3f34ff22f1)
---
drivers/net/r8169.c | 29 ++++++++++++++++++++++++-----
1 files changed, 24 insertions(+), 5 deletions(-)
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 7193afc..9674005 100644
index 24599b5..1484528 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -186,7 +186,12 @@ static DEFINE_PCI_DEVICE_TABLE(rtl8169_pci_tbl) = {
@@ -186,7 +186,12 @@ static struct pci_device_id rtl8169_pci_tbl[] = {
MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
......@@ -70,7 +71,7 @@ index 7193afc..9674005 100644
static int use_dac;
static struct {
u32 msg_enable;
@@ -3217,9 +3222,13 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
@@ -3245,9 +3250,13 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
}
static void rtl8169_set_rxbufsize(struct rtl8169_private *tp,
......@@ -86,7 +87,7 @@ index 7193afc..9674005 100644
tp->rx_buf_sz = (max_frame > RX_BUF_SIZE) ? max_frame : RX_BUF_SIZE;
}
@@ -3231,7 +3240,17 @@ static int rtl8169_open(struct net_device *dev)
@@ -3259,7 +3268,17 @@ static int rtl8169_open(struct net_device *dev)
int retval = -ENOMEM;
......@@ -105,7 +106,7 @@ index 7193afc..9674005 100644
/*
* Rx and Tx desscriptors needs 256 bytes alignment.
@@ -3884,7 +3903,7 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
@@ -3912,7 +3931,7 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
rtl8169_down(dev);
......@@ -115,5 +116,5 @@ index 7193afc..9674005 100644
ret = rtl8169_init_ring(dev);
if (ret < 0)
--
1.7.0.3
1.7.0.2
From 26654a966adb674afc30d285f7e79535d03c2492 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 31 Mar 2010 02:08:31 +0000
Subject: [PATCH 07/18] r8169: Fix rtl8169_rx_interrupt()
In case a reset is performed, rtl8169_rx_interrupt() is called from
process context instead of softirq context. Special care must be taken
to call appropriate network core services (netif_rx() instead of
netif_receive_skb()). VLAN handling also corrected.
Reported-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Tested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Diagnosed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 630b943c182d1aed69f244405131902fbcba7ec6)
---
drivers/net/r8169.c | 22 +++++++++++++++++-----
1 files changed, 17 insertions(+), 5 deletions(-)
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 1484528..bed1d47 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -1047,14 +1047,14 @@ static void rtl8169_vlan_rx_register(struct net_device *dev,
}
static int rtl8169_rx_vlan_skb(struct rtl8169_private *tp, struct RxDesc *desc,
- struct sk_buff *skb)
+ struct sk_buff *skb, int polling)
{
u32 opts2 = le32_to_cpu(desc->opts2);
struct vlan_group *vlgrp = tp->vlgrp;
int ret;
if (vlgrp && (opts2 & RxVlanTag)) {
- vlan_hwaccel_receive_skb(skb, vlgrp, swab16(opts2 & 0xffff));
+ __vlan_hwaccel_rx(skb, vlgrp, swab16(opts2 & 0xffff), polling);
ret = 0;
} else
ret = -1;
@@ -1071,7 +1071,7 @@ static inline u32 rtl8169_tx_vlan_tag(struct rtl8169_private *tp,
}
static int rtl8169_rx_vlan_skb(struct rtl8169_private *tp, struct RxDesc *desc,
- struct sk_buff *skb)
+ struct sk_buff *skb, int polling)
{
return -1;
}
@@ -4480,12 +4480,20 @@ out:
return done;
}
+/*
+ * Warning : rtl8169_rx_interrupt() might be called :
+ * 1) from NAPI (softirq) context
+ * (polling = 1 : we should call netif_receive_skb())
+ * 2) from process context (rtl8169_reset_task())
+ * (polling = 0 : we must call netif_rx() instead)
+ */
static int rtl8169_rx_interrupt(struct net_device *dev,
struct rtl8169_private *tp,
void __iomem *ioaddr, u32 budget)
{
unsigned int cur_rx, rx_left;
unsigned int delta, count;
+ int polling = (budget != ~(u32)0) ? 1 : 0;
cur_rx = tp->cur_rx;
rx_left = NUM_RX_DESC + tp->dirty_rx - cur_rx;
@@ -4550,8 +4558,12 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
skb_put(skb, pkt_size);
skb->protocol = eth_type_trans(skb, dev);
- if (rtl8169_rx_vlan_skb(tp, desc, skb) < 0)
- netif_receive_skb(skb);
+ if (rtl8169_rx_vlan_skb(tp, desc, skb, polling) < 0) {
+ if (likely(polling))
+ netif_receive_skb(skb);
+ else
+ netif_rx(skb);
+ }
dev->stats.rx_bytes += pkt_size;
dev->stats.rx_packets++;
--
1.7.0.2
From d1c9ac562923fa0b1738fceb4c7bafac3ab936ba Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Thu, 1 Apr 2010 07:30:07 +0000
Subject: [PATCH 08/18] r8169: clean up my printk uglyness
Fix formatting on r8169 printk
Brandon Philips noted that I had a spacing issue in my printk for the
last r8169 patch that made it quite ugly. Fix that up and add the PFX
macro to it as well so it looks like the other r8169 printks
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 93f4d91d879acfcb0ba9c2725e3133fcff2dfd1e)
---
drivers/net/r8169.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index bed1d47..790555e 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -3255,8 +3255,8 @@ static void rtl8169_set_rxbufsize(struct rtl8169_private *tp,
unsigned int max_frame = mtu + VLAN_ETH_HLEN + ETH_FCS_LEN;
if (max_frame != 16383)
- printk(KERN_WARNING "WARNING! Changing of MTU on this NIC"
- "May lead to frame reception errors!\n");
+ printk(KERN_WARNING PFX "WARNING! Changing of MTU on this "
+ "NIC may lead to frame reception errors!\n");
tp->rx_buf_sz = (max_frame > RX_BUF_SIZE) ? max_frame : RX_BUF_SIZE;
}
--
1.7.0.2
From 21ee14f92ef1b6d4ca965c9b59135f3462919631 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 2 Mar 2010 02:51:56 +0000
Subject: [PATCH 09/18] ipsec: Fix bogus bundle flowi
When I merged the bundle creation code, I introduced a bogus
flowi value in the bundle. Instead of getting from the caller,
it was instead set to the flow in the route object, which is
totally different.
The end result is that the bundles we created never match, and
we instead end up with an ever growing bundle list.
Thanks to Jamal for find this problem.
Reported-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 87c1e12b5eeb7b30b4b41291bef8e0b41fc3dde9)
---
include/net/xfrm.h | 3 ++-
net/ipv4/xfrm4_policy.c | 5 +++--
net/ipv6/xfrm6_policy.c | 3 ++-
net/xfrm/xfrm_policy.c | 7 ++++---
4 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 223e90a..6960be2 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -273,7 +273,8 @@ struct xfrm_policy_afinfo {
struct dst_entry *dst,
int nfheader_len);
int (*fill_dst)(struct xfrm_dst *xdst,
- struct net_device *dev);
+ struct net_device *dev,
+ struct flowi *fl);
};
extern int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 74fb2eb..7009886 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -92,11 +92,12 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
return 0;
}
-static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+ struct flowi *fl)
{
struct rtable *rt = (struct rtable *)xdst->route;
- xdst->u.rt.fl = rt->fl;
+ xdst->u.rt.fl = *fl;
xdst->u.dst.dev = dev;
dev_hold(dev);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 8ec3d45..3f89ab7 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -117,7 +117,8 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
return 0;
}
-static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+ struct flowi *fl)
{
struct rt6_info *rt = (struct rt6_info*)xdst->route;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index cb81ca3..d75047c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1341,7 +1341,8 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
return err;
}
-static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+ struct flowi *fl)
{
struct xfrm_policy_afinfo *afinfo =
xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
@@ -1350,7 +1351,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
if (!afinfo)
return -EINVAL;
- err = afinfo->fill_dst(xdst, dev);
+ err = afinfo->fill_dst(xdst, dev, fl);
xfrm_policy_put_afinfo(afinfo);
@@ -1454,7 +1455,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
- err = xfrm_fill_dst(xdst, dev);
+ err = xfrm_fill_dst(xdst, dev, fl);
if (err)
goto free_dst;
--
1.7.0.2
From f2c59932757a06851bb740dc757ce2ba1961fc08 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 31 Mar 2010 01:19:49 +0000
Subject: [PATCH 10/18] xfrm: Remove xfrm_state_genid
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The xfrm state genid only needs to be matched against the copy
saved in xfrm_dst. So we don't need a global genid at all. In
fact, we don't even need to initialise it.
Based on observation by Timo Teräs.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 34996cb91dd72f0b0456d8fd3fef4aaee62232f2)
---
net/xfrm/xfrm_state.c | 5 +----
1 files changed, 1 insertions(+), 4 deletions(-)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index f2f7c63..8ee733f 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -34,7 +34,6 @@
static DEFINE_SPINLOCK(xfrm_state_lock);
static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
-static unsigned int xfrm_state_genid;
static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
@@ -903,8 +902,6 @@ static void __xfrm_state_insert(struct xfrm_state *x)
struct net *net = xs_net(x);
unsigned int h;
- x->genid = ++xfrm_state_genid;
-
list_add(&x->km.all, &net->xfrm.state_all);
h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
@@ -948,7 +945,7 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
x->props.reqid == reqid &&
!xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
!xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
- x->genid = xfrm_state_genid;
+ x->genid++;
}
}
--
1.7.0.2
From 5b3e87bccb0e48f2f8b78695e949c015a3695f8e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Wed, 31 Mar 2010 00:17:04 +0000
Subject: [PATCH 11/18] xfrm_user: verify policy direction at XFRM_MSG_POLEXPIRE handler
Add missing check for policy direction verification. This is
especially important since without this xfrm_user may end up
deleting per-socket policy which is not allowed.
Signed-off-by: Timo Teras <timo.teras@iki.fi>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit c8bf4d04f970fafb3430d332533e1cf103f2a018)
---
net/xfrm/xfrm_user.c | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index b95a2d6..d1e9ee3 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1589,6 +1589,10 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
return err;
+ err = verify_policy_dir(p->dir);
+ if (err)
+ return err;
+
if (p->index)
xp = xfrm_policy_byid(net, type, p->dir, p->index, 0, &err);
else {
--
1.7.0.2
From 7a400eb025dd53883c3560d0fdb069542f7ad3db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Wed, 31 Mar 2010 00:17:05 +0000
Subject: [PATCH 12/18] xfrm: remove policy lock when accessing policy->walk.dead
All of the code considers ->dead as a hint that the cached policy
needs to get refreshed. The read side can just drop the read lock
without any side effects.
The write side needs to make sure that it's written only exactly
once. Only possible race is at xfrm_policy_kill(). This is fixed
by checking result of __xfrm_policy_unlink() when needed. It will
always succeed if the policy object is looked up from the hash
list (so some checks are removed), but it needs to be checked if
we are trying to unlink policy via a reference (appropriate
checks added).
Since policy->walk.dead is written exactly once, it no longer
needs to be protected with a write lock.
Signed-off-by: Timo Teras <timo.teras@iki.fi>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
(backported from commit ea2dea9dacc256fe927857feb423872051642ae7)
---
net/xfrm/xfrm_policy.c | 20 +++++---------------
net/xfrm/xfrm_user.c | 6 +-----
2 files changed, 6 insertions(+), 20 deletions(-)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d75047c..110184f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -156,7 +156,7 @@ static void xfrm_policy_timer(unsigned long data)
read_lock(&xp->lock);
- if (xp->walk.dead)
+ if (unlikely(xp->walk.dead))
goto out;
dir = xfrm_policy_id2dir(xp->index);
@@ -297,17 +297,7 @@ static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
static void xfrm_policy_kill(struct xfrm_policy *policy)
{
- int dead;
-
- write_lock_bh(&policy->lock);
- dead = policy->walk.dead;
policy->walk.dead = 1;
- write_unlock_bh(&policy->lock);
-
- if (unlikely(dead)) {
- WARN_ON(1);
- return;
- }
spin_lock_bh(&xfrm_policy_gc_lock);
hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
@@ -1115,6 +1105,9 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
}
if (old_pol)
+ /* Unlinking succeeds always. This is the only function
+ * allowed to delete or replace socket policy.
+ */
__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
write_unlock_bh(&xfrm_policy_lock);
@@ -1705,11 +1698,8 @@ restart:
goto error;
}
- for (pi = 0; pi < npols; pi++) {
- read_lock_bh(&pols[pi]->lock);
+ for (pi = 0; pi < npols; pi++)
pol_dead |= pols[pi]->walk.dead;
- read_unlock_bh(&pols[pi]->lock);
- }
write_lock_bh(&policy->lock);
if (unlikely(pol_dead || stale_bundle(dst))) {
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d1e9ee3..f9c56e9 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1617,13 +1617,9 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
if (xp == NULL)
return -ENOENT;
- read_lock(&xp->lock);
- if (xp->walk.dead) {
- read_unlock(&xp->lock);
+ if (unlikely(xp->walk.dead))
goto out;
- }
- read_unlock(&xp->lock);
err = 0;
if (up->hard) {
uid_t loginuid = NETLINK_CB(skb).loginuid;
--
1.7.0.2
This diff is collapsed.
From 4c53c9239069f48ec9a86f8e596c163b72e8bc4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Wed, 7 Apr 2010 00:30:06 +0000
Subject: [PATCH 16/18] xfrm: remove policy garbage collection
Policies are now properly reference counted and destroyed from
all code paths. The delayed gc is just an overhead now and can