Commit 5681d392 authored by Natanael Copa's avatar Natanael Copa

main/linux-grsec: upgrade to grsecurity-2.2.0-2.6.35.7-201010232009

parent 650c08ba
From 5ea3677e1f26dd343ed139d2bdad23ae2f1393db Mon Sep 17 00:00:00 2001
From: Timo Teras <timo.teras@iki.fi>
Date: Mon, 12 Apr 2010 13:43:01 +0000
Subject: [PATCH 01/18] grsec: revert conflicting flow cache changes
---
net/core/flow.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/net/core/flow.c b/net/core/flow.c
index 5b27992..9601587 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -39,7 +39,7 @@ atomic_t flow_cache_genid = ATOMIC_INIT(0);
static u32 flow_hash_shift;
#define flow_hash_size (1 << flow_hash_shift)
-static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables);
+static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
#define flow_table(cpu) (per_cpu(flow_tables, cpu))
@@ -52,7 +52,7 @@ struct flow_percpu_info {
u32 hash_rnd;
int count;
};
-static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info);
+static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 };
#define flow_hash_rnd_recalc(cpu) \
(per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
@@ -69,7 +69,7 @@ struct flow_flush_info {
atomic_t cpuleft;
struct completion completion;
};
-static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets);
+static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL };
#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
--
1.7.0.2
From 9082391046940c410eac3bad065c8701998b5cab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Wed, 3 Mar 2010 04:01:13 +0000
Subject: [PATCH 02/18] gre: fix hard header destination address checking
ipgre_header() can be called with zero daddr when the gre device is
configured as multipoint tunnel and still has the NOARP flag set (which is
typically cleared by the userspace arp daemon). If the NOARP packets are
not dropped, ipgre_tunnel_xmit() will take rt->rt_gateway (= NBMA IP) and
use that for route look up (and may lead to bogus xfrm acquires).
The multicast address check is removed as sending to multicast group should
be ok. In fact, if gre device has a multicast address as destination
ipgre_header is always called with multicast address.
Signed-off-by: Timo Teras <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 6d55cb91a0020ac0d78edcad61efd6c8cf5785a3)
---
net/ipv4/ip_gre.c | 7 ++-----
1 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 1433338..ac88ce5 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1137,12 +1137,9 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
if (saddr)
memcpy(&iph->saddr, saddr, 4);
-
- if (daddr) {
+ if (daddr)
memcpy(&iph->daddr, daddr, 4);
- return t->hlen;
- }
- if (iph->daddr && !ipv4_is_multicast(iph->daddr))
+ if (iph->daddr)
return t->hlen;
return -t->hlen;
--
1.7.0.2
From cd0e9d08480e1e0648e17d099ecf50f6fd8714e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Sat, 20 Mar 2010 02:27:58 +0000
Subject: [PATCH 03/18] ip_gre: include route header_len in max_headroom calculation
Taking route's header_len into account, and updating gre device
needed_headroom will give better hints on upper bound of required
headroom. This is useful if the gre traffic is xfrm'ed.
Signed-off-by: Timo Teras <timo.teras@iki.fi>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 243aad830e8a4cdda261626fbaeddde16b08d04a)
---
net/ipv4/ip_gre.c | 4 +++-
1 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ac88ce5..7f1ff73 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -803,11 +803,13 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
tunnel->err_count = 0;
}
- max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
+ max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len;
if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
(skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+ if (max_headroom > dev->needed_headroom)
+ dev->needed_headroom = max_headroom;
if (!new_skb) {
ip_rt_put(rt);
stats->tx_dropped++;
--
1.7.0.2
From 89f350c4ec426b4c1db6ef269546940365d918e1 Mon Sep 17 00:00:00 2001
From: Francois Romieu <romieu@fr.zoreil.com>
Date: Sat, 27 Mar 2010 19:35:46 -0700
Subject: [PATCH 05/18] r8169: fix broken register writes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This is quite similar to b39fe41f481d20c201012e4483e76c203802dda7
though said registers are not even documented as 64-bit registers
- as opposed to the initial TxDescStartAddress ones - but as single
bytes which must be combined into 32 bits at the MMIO read/write
level before being merged into a 64 bit logical entity.
Credits go to Ben Hutchings <ben@decadent.org.uk> for the MAR
registers (aka "multicast is broken for ages on ARM) and to
Timo Teräs <timo.teras@iki.fi> for the MAC registers.
Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 78f1cd02457252e1ffbc6caa44a17424a45286b8)
---
drivers/net/r8169.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 0fe2fc9..24599b5 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -2827,8 +2827,8 @@ static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
spin_lock_irq(&tp->lock);
RTL_W8(Cfg9346, Cfg9346_Unlock);
- RTL_W32(MAC0, low);
RTL_W32(MAC4, high);
+ RTL_W32(MAC0, low);
RTL_W8(Cfg9346, Cfg9346_Lock);
spin_unlock_irq(&tp->lock);
@@ -4795,8 +4795,8 @@ static void rtl_set_rx_mode(struct net_device *dev)
mc_filter[1] = swab32(data);
}
- RTL_W32(MAR0 + 0, mc_filter[0]);
RTL_W32(MAR0 + 4, mc_filter[1]);
+ RTL_W32(MAR0 + 0, mc_filter[0]);
RTL_W32(RxConfig, tmp);
--
1.7.0.2
From 26654a966adb674afc30d285f7e79535d03c2492 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 31 Mar 2010 02:08:31 +0000
Subject: [PATCH 07/18] r8169: Fix rtl8169_rx_interrupt()
In case a reset is performed, rtl8169_rx_interrupt() is called from
process context instead of softirq context. Special care must be taken
to call appropriate network core services (netif_rx() instead of
netif_receive_skb()). VLAN handling also corrected.
Reported-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Tested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Diagnosed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 630b943c182d1aed69f244405131902fbcba7ec6)
---
drivers/net/r8169.c | 22 +++++++++++++++++-----
1 files changed, 17 insertions(+), 5 deletions(-)
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 1484528..bed1d47 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -1047,14 +1047,14 @@ static void rtl8169_vlan_rx_register(struct net_device *dev,
}
static int rtl8169_rx_vlan_skb(struct rtl8169_private *tp, struct RxDesc *desc,
- struct sk_buff *skb)
+ struct sk_buff *skb, int polling)
{
u32 opts2 = le32_to_cpu(desc->opts2);
struct vlan_group *vlgrp = tp->vlgrp;
int ret;
if (vlgrp && (opts2 & RxVlanTag)) {
- vlan_hwaccel_receive_skb(skb, vlgrp, swab16(opts2 & 0xffff));
+ __vlan_hwaccel_rx(skb, vlgrp, swab16(opts2 & 0xffff), polling);
ret = 0;
} else
ret = -1;
@@ -1071,7 +1071,7 @@ static inline u32 rtl8169_tx_vlan_tag(struct rtl8169_private *tp,
}
static int rtl8169_rx_vlan_skb(struct rtl8169_private *tp, struct RxDesc *desc,
- struct sk_buff *skb)
+ struct sk_buff *skb, int polling)
{
return -1;
}
@@ -4480,12 +4480,20 @@ out:
return done;
}
+/*
+ * Warning : rtl8169_rx_interrupt() might be called :
+ * 1) from NAPI (softirq) context
+ * (polling = 1 : we should call netif_receive_skb())
+ * 2) from process context (rtl8169_reset_task())
+ * (polling = 0 : we must call netif_rx() instead)
+ */
static int rtl8169_rx_interrupt(struct net_device *dev,
struct rtl8169_private *tp,
void __iomem *ioaddr, u32 budget)
{
unsigned int cur_rx, rx_left;
unsigned int delta, count;
+ int polling = (budget != ~(u32)0) ? 1 : 0;
cur_rx = tp->cur_rx;
rx_left = NUM_RX_DESC + tp->dirty_rx - cur_rx;
@@ -4550,8 +4558,12 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
skb_put(skb, pkt_size);
skb->protocol = eth_type_trans(skb, dev);
- if (rtl8169_rx_vlan_skb(tp, desc, skb) < 0)
- netif_receive_skb(skb);
+ if (rtl8169_rx_vlan_skb(tp, desc, skb, polling) < 0) {
+ if (likely(polling))
+ netif_receive_skb(skb);
+ else
+ netif_rx(skb);
+ }
dev->stats.rx_bytes += pkt_size;
dev->stats.rx_packets++;
--
1.7.0.2
From 21ee14f92ef1b6d4ca965c9b59135f3462919631 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 2 Mar 2010 02:51:56 +0000
Subject: [PATCH 09/18] ipsec: Fix bogus bundle flowi
When I merged the bundle creation code, I introduced a bogus
flowi value in the bundle. Instead of getting from the caller,
it was instead set to the flow in the route object, which is
totally different.
The end result is that the bundles we created never match, and
we instead end up with an ever growing bundle list.
Thanks to Jamal for find this problem.
Reported-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 87c1e12b5eeb7b30b4b41291bef8e0b41fc3dde9)
---
include/net/xfrm.h | 3 ++-
net/ipv4/xfrm4_policy.c | 5 +++--
net/ipv6/xfrm6_policy.c | 3 ++-
net/xfrm/xfrm_policy.c | 7 ++++---
4 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 223e90a..6960be2 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -273,7 +273,8 @@ struct xfrm_policy_afinfo {
struct dst_entry *dst,
int nfheader_len);
int (*fill_dst)(struct xfrm_dst *xdst,
- struct net_device *dev);
+ struct net_device *dev,
+ struct flowi *fl);
};
extern int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 74fb2eb..7009886 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -92,11 +92,12 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
return 0;
}
-static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+ struct flowi *fl)
{
struct rtable *rt = (struct rtable *)xdst->route;
- xdst->u.rt.fl = rt->fl;
+ xdst->u.rt.fl = *fl;
xdst->u.dst.dev = dev;
dev_hold(dev);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 8ec3d45..3f89ab7 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -117,7 +117,8 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
return 0;
}
-static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+ struct flowi *fl)
{
struct rt6_info *rt = (struct rt6_info*)xdst->route;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index cb81ca3..d75047c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1341,7 +1341,8 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
return err;
}
-static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+ struct flowi *fl)
{
struct xfrm_policy_afinfo *afinfo =
xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
@@ -1350,7 +1351,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
if (!afinfo)
return -EINVAL;
- err = afinfo->fill_dst(xdst, dev);
+ err = afinfo->fill_dst(xdst, dev, fl);
xfrm_policy_put_afinfo(afinfo);
@@ -1454,7 +1455,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
- err = xfrm_fill_dst(xdst, dev);
+ err = xfrm_fill_dst(xdst, dev, fl);
if (err)
goto free_dst;
--
1.7.0.2
From f2c59932757a06851bb740dc757ce2ba1961fc08 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 31 Mar 2010 01:19:49 +0000
Subject: [PATCH 10/18] xfrm: Remove xfrm_state_genid
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The xfrm state genid only needs to be matched against the copy
saved in xfrm_dst. So we don't need a global genid at all. In
fact, we don't even need to initialise it.
Based on observation by Timo Teräs.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 34996cb91dd72f0b0456d8fd3fef4aaee62232f2)
---
net/xfrm/xfrm_state.c | 5 +----
1 files changed, 1 insertions(+), 4 deletions(-)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index f2f7c63..8ee733f 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -34,7 +34,6 @@
static DEFINE_SPINLOCK(xfrm_state_lock);
static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
-static unsigned int xfrm_state_genid;
static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
@@ -903,8 +902,6 @@ static void __xfrm_state_insert(struct xfrm_state *x)
struct net *net = xs_net(x);
unsigned int h;
- x->genid = ++xfrm_state_genid;
-
list_add(&x->km.all, &net->xfrm.state_all);
h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
@@ -948,7 +945,7 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
x->props.reqid == reqid &&
!xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
!xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
- x->genid = xfrm_state_genid;
+ x->genid++;
}
}
--
1.7.0.2
From 7a400eb025dd53883c3560d0fdb069542f7ad3db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Wed, 31 Mar 2010 00:17:05 +0000
Subject: [PATCH 12/18] xfrm: remove policy lock when accessing policy->walk.dead
All of the code considers ->dead as a hint that the cached policy
needs to get refreshed. The read side can just drop the read lock
without any side effects.
The write side needs to make sure that it's written only exactly
once. Only possible race is at xfrm_policy_kill(). This is fixed
by checking result of __xfrm_policy_unlink() when needed. It will
always succeed if the policy object is looked up from the hash
list (so some checks are removed), but it needs to be checked if
we are trying to unlink policy via a reference (appropriate
checks added).
Since policy->walk.dead is written exactly once, it no longer
needs to be protected with a write lock.
Signed-off-by: Timo Teras <timo.teras@iki.fi>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
(backported from commit ea2dea9dacc256fe927857feb423872051642ae7)
---
net/xfrm/xfrm_policy.c | 20 +++++---------------
net/xfrm/xfrm_user.c | 6 +-----
2 files changed, 6 insertions(+), 20 deletions(-)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d75047c..110184f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -156,7 +156,7 @@ static void xfrm_policy_timer(unsigned long data)
read_lock(&xp->lock);
- if (xp->walk.dead)
+ if (unlikely(xp->walk.dead))
goto out;
dir = xfrm_policy_id2dir(xp->index);
@@ -297,17 +297,7 @@ static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
static void xfrm_policy_kill(struct xfrm_policy *policy)
{
- int dead;
-
- write_lock_bh(&policy->lock);
- dead = policy->walk.dead;
policy->walk.dead = 1;
- write_unlock_bh(&policy->lock);
-
- if (unlikely(dead)) {
- WARN_ON(1);
- return;
- }
spin_lock_bh(&xfrm_policy_gc_lock);
hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
@@ -1115,6 +1105,9 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
}
if (old_pol)
+ /* Unlinking succeeds always. This is the only function
+ * allowed to delete or replace socket policy.
+ */
__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
write_unlock_bh(&xfrm_policy_lock);
@@ -1705,11 +1698,8 @@ restart:
goto error;
}
- for (pi = 0; pi < npols; pi++) {
- read_lock_bh(&pols[pi]->lock);
+ for (pi = 0; pi < npols; pi++)
pol_dead |= pols[pi]->walk.dead;
- read_unlock_bh(&pols[pi]->lock);
- }
write_lock_bh(&policy->lock);
if (unlikely(pol_dead || stale_bundle(dst))) {
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d1e9ee3..f9c56e9 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1617,13 +1617,9 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
if (xp == NULL)
return -ENOENT;
- read_lock(&xp->lock);
- if (xp->walk.dead) {
- read_unlock(&xp->lock);
+ if (unlikely(xp->walk.dead))
goto out;
- }
- read_unlock(&xp->lock);
err = 0;
if (up->hard) {
uid_t loginuid = NETLINK_CB(skb).loginuid;
--
1.7.0.2
From 4c53c9239069f48ec9a86f8e596c163b72e8bc4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Wed, 7 Apr 2010 00:30:06 +0000
Subject: [PATCH 16/18] xfrm: remove policy garbage collection
Policies are now properly reference counted and destroyed from
all code paths. The delayed gc is just an overhead now and can
be removed.
Signed-off-by: Timo Teras <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 285ead175c5dd5075cab5b6c94f35a3e6c0a3ae6)
---
net/xfrm/xfrm_policy.c | 39 +++++----------------------------------
1 files changed, 5 insertions(+), 34 deletions(-)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0379d82..5606841 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -46,9 +46,6 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
static struct kmem_cache *xfrm_dst_cache __read_mostly;
-static HLIST_HEAD(xfrm_policy_gc_list);
-static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
-
static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
static void xfrm_init_pmtu(struct dst_entry *dst);
@@ -288,32 +285,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
}
EXPORT_SYMBOL(xfrm_policy_destroy);
-static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
-{
- atomic_inc(&policy->genid);
-
- if (del_timer(&policy->timer))
- atomic_dec(&policy->refcnt);
-
- xfrm_pol_put(policy);
-}
-
-static void xfrm_policy_gc_task(struct work_struct *work)
-{
- struct xfrm_policy *policy;
- struct hlist_node *entry, *tmp;
- struct hlist_head gc_list;
-
- spin_lock_bh(&xfrm_policy_gc_lock);
- gc_list.first = xfrm_policy_gc_list.first;
- INIT_HLIST_HEAD(&xfrm_policy_gc_list);
- spin_unlock_bh(&xfrm_policy_gc_lock);
-
- hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
- xfrm_policy_gc_kill(policy);
-}
-static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
-
/* Rule must be locked. Release descentant resources, announce
* entry dead. The rule must be unlinked from lists to the moment.
*/
@@ -322,11 +293,12 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
{
policy->walk.dead = 1;
- spin_lock_bh(&xfrm_policy_gc_lock);
- hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
- spin_unlock_bh(&xfrm_policy_gc_lock);
+ atomic_inc(&policy->genid);
- schedule_work(&xfrm_policy_gc_work);
+ if (del_timer(&policy->timer))
+ xfrm_pol_put(policy);
+
+ xfrm_pol_put(policy);
}
static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
@@ -2535,7 +2507,6 @@ static void xfrm_policy_fini(struct net *net)
audit_info.sessionid = -1;
audit_info.secid = 0;
xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
- flush_work(&xfrm_policy_gc_work);
WARN_ON(!list_empty(&net->xfrm.policy_all));
--
1.7.0.2
From fede05e99e2d860e97bc877b8b77fb9e63f55cc8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Wed, 7 Apr 2010 00:30:07 +0000
Subject: [PATCH 17/18] flow: delayed deletion of flow cache entries
Speed up lookups by freeing flow cache entries later. After
virtualizing flow cache entry operations, the flow cache may now
end up calling policy or bundle destructor which can be slowish.
As gc_list is more effective with double linked list, the flow cache
is converted to use common hlist and list macroes where appropriate.
Signed-off-by: Timo Teras <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 8e4795605d1e1b39113818ad7c147b8a867a1f6a)
---
net/core/flow.c | 100 ++++++++++++++++++++++++++++++++++++++-----------------
1 files changed, 69 insertions(+), 31 deletions(-)
diff --git a/net/core/flow.c b/net/core/flow.c
index 521df52..1619006 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -26,7 +26,10 @@
#include <linux/security.h>
struct flow_cache_entry {
- struct flow_cache_entry *next;
+ union {
+ struct hlist_node hlist;
+ struct list_head gc_list;
+ } u;
u16 family;
u8 dir;
u32 genid;
@@ -35,7 +38,7 @@ struct flow_cache_entry {
};
struct flow_cache_percpu {
- struct flow_cache_entry **hash_table;
+ struct hlist_head *hash_table;
int hash_count;
u32 hash_rnd;
int hash_rnd_recalc;
@@ -62,6 +65,9 @@ atomic_t flow_cache_genid = ATOMIC_INIT(0);
static struct flow_cache flow_cache_global;
static struct kmem_cache *flow_cachep;
+static DEFINE_SPINLOCK(flow_cache_gc_lock);
+static LIST_HEAD(flow_cache_gc_list);
+
#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift)
#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
@@ -86,38 +92,66 @@ static int flow_entry_valid(struct flow_cache_entry *fle)
return 1;
}
-static void flow_entry_kill(struct flow_cache *fc,
- struct flow_cache_percpu *fcp,
- struct flow_cache_entry *fle)
+static void flow_entry_kill(struct flow_cache_entry *fle)
{
if (fle->object)
fle->object->ops->delete(fle->object);
kmem_cache_free(flow_cachep, fle);
- fcp->hash_count--;
+}
+
+static void flow_cache_gc_task(struct work_struct *work)
+{
+ struct list_head gc_list;
+ struct flow_cache_entry *fce, *n;
+
+ INIT_LIST_HEAD(&gc_list);
+ spin_lock_bh(&flow_cache_gc_lock);
+ list_splice_tail_init(&flow_cache_gc_list, &gc_list);
+ spin_unlock_bh(&flow_cache_gc_lock);
+
+ list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)