赞
踩
基于 kernel 4.19
xfrm相关的头文件如下:
./include/net/xfrm.h 和网络相关的结构定义: xfrm_state, xfrm_policy
./include/net/netns/xfrm.h 和namespace相关定义: netns_xfrm
./include/uapi/linux/xfrm.h
XFRM相关的代码在 net/xfrm/ 目录下
- struct net {
- ...
- #ifdef CONFIG_XFRM
- struct netns_xfrm xfrm;
- #endif
- ...
- }
-
- void __init xfrm_init(void)
- {
- register_pernet_subsys(&xfrm_net_ops);
- xfrm_dev_init();
- seqcount_init(&xfrm_policy_hash_generation);
- xfrm_input_init();
-
- RCU_INIT_POINTER(xfrm_if_cb, NULL);
- synchronize_rcu();
- }
-
- void __init xfrm4_init(void)
- {
- xfrm4_state_init();
- xfrm4_policy_init();
- xfrm4_protocol_init();
- register_pernet_subsys(&xfrm4_net_ops);
- }
-
- static struct pernet_operations __net_initdata xfrm_net_ops = {
- .init = xfrm_net_init,
- .exit = xfrm_net_exit,
- };
-
- static int __net_init xfrm_net_init(struct net *net)
- {
- ...
- rv = xfrm_statistics_init(net);
- if (rv < 0)
- goto out_statistics;
- rv = xfrm_state_init(net);
- if (rv < 0)
- goto out_state;
- rv = xfrm_policy_init(net);
- if (rv < 0)
- goto out_policy;
- ...
- }
-
- int __net_init xfrm_state_init(struct net *net)
- {
- ...
- net->xfrm.state_bydst = xfrm_hash_alloc(sz);
- if (!net->xfrm.state_bydst)
- goto out_bydst;
- net->xfrm.state_bysrc = xfrm_hash_alloc(sz);
- if (!net->xfrm.state_bysrc)
- goto out_bysrc;
- net->xfrm.state_byspi = xfrm_hash_alloc(sz);
- if (!net->xfrm.state_byspi)
- goto out_byspi;
- net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
- ...
- }

内核 XFRM框架初始化:
void __init xfrm4_init(void)
{
xfrm4_state_init(); // 将 xfrm4_state_afinfo 添加到 *xfrm_state_afinfo[NPROTO];
xfrm4_policy_init(); //将 xfrm4_policy_afinfo添加到 *xfrm_policy_afinfo[AF_INET6 + 1]
xfrm4_protocol_init(); //xfrm4_input_afinfo static 添加到*xfrm_input_afinfo[AF_INET6 + 1];
register_pernet_subsys(&xfrm4_net_ops);
}
ip_rcv -> ip_rcv_finish -> ip_rcv_finish_core(查找路由) -> dst_input
ip_rcv_finish_core ->ip_route_input_noref -> ip_route_input_rcu -> ip_route_input_slow -> fib_lookup -> res->type :
a. RTN_BROADCAST
b. RTN_LOCAL -> rt_dst_alloc
```C
1630 rt->dst.output = ip_output;
1631 if (flags & RTCF_LOCAL)
1632 rt->dst.input = ip_local_deliver;
```
1. input: ip_local_deliver
2. output: ip_output
XFRM 初始化:route.c: ip_rt_init
```C
3246 #ifdef CONFIG_XFRM
3247 xfrm_init();
3248 xfrm4_init();
3249 #endif
```
3.1 xfrm4_init();
379 void __init xfrm4_init(void)
380 {
381 xfrm4_state_init();//init xfrm_state_afinfo
382 xfrm4_policy_init();//init xfrm_policy_afinfo
383 xfrm4_protocol_init();//set the xfrm 协议回调函数
384 register_pernet_subsys(&xfrm4_net_ops);
385 }
static const struct xfrm_input_afinfo xfrm4_input_afinfo = {
.family = AF_INET,
.callback = xfrm4_rcv_cb,
};
static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
.dst_ops = &xfrm4_dst_ops_template,
.dst_lookup = xfrm4_dst_lookup,
.get_saddr = xfrm4_get_saddr,
.decode_session = _decode_session4,
.get_tos = xfrm4_get_tos,
.init_path = xfrm4_init_path,
.fill_dst = xfrm4_fill_dst,
.blackhole_route = ipv4_blackhole_route,
};
74 static struct xfrm_state_afinfo xfrm4_state_afinfo = {
75 .family = AF_INET,
76 .proto = IPPROTO_IPIP,
77 .eth_proto = htons(ETH_P_IP),
78 .owner = THIS_MODULE,
79 .init_flags = xfrm4_init_flags,
80 .init_tempsel = __xfrm4_init_tempsel,
81 .init_temprop = xfrm4_init_temprop,
82 .output = xfrm4_output,
83 .output_finish = xfrm4_output_finish,
84 .extract_input = xfrm4_extract_input,
85 .extract_output = xfrm4_extract_output,
86 .transport_finish = xfrm4_transport_finish,
87 .local_error = xfrm4_local_error,
88 };
- 126 static struct xfrm_mode xfrm4_tunnel_mode = {
- 127 .input2 = xfrm4_mode_tunnel_input,
- 128 .input = xfrm_prepare_input,
- 129 .output2 = xfrm4_mode_tunnel_output,
- 130 .output = xfrm4_prepare_output,
- 131 .gso_segment = xfrm4_mode_tunnel_gso_segment,
- 132 .xmit = xfrm4_mode_tunnel_xmit,
- 133 .owner = THIS_MODULE,
- 134 .encap = XFRM_MODE_TUNNEL,
- 135 .flags = XFRM_MODE_FLAG_TUNNEL,
- 136 };
## IPSEC接收处理
- static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
- {
- ...
- rcu_read_lock();
- {
- int protocol = ip_hdr(skb)->protocol;
- const struct net_protocol *ipprot;
- int raw;
-
- resubmit:
- raw = raw_local_deliver(skb, protocol);
-
- ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot) {
- int ret;
-
- if (!ipprot->no_policy) {
- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- kfree_skb(skb);
- goto out;
- }
- nf_reset(skb);
- }
-
- ret = ipprot->handler(skb);
- if (ret < 0) {
- protocol = -ret;
- goto resubmit;
- }
- __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
- } else {
- if (!raw) {
- if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS);
- icmp_send(skb, ICMP_DEST_UNREACH,
- ICMP_PROT_UNREACH, 0);
- }
- kfree_skb(skb);
- } else {
- __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
- consume_skb(skb);
- }
- }
- }
- out:
- rcu_read_unlock();
-
- return 0;
- }

xfrm4_policy_check -> __xfrm_policy_check2 -> __xfrm_policy_check
检查SP 策略
inet_protos 中挂载了各类协议(udp/tcp/icmp/esp/ah/esp)的操作集,对于IPSEC的AH或者ESP协议来说,ipprot->handler(skb) 处理函数就是 xfrm4_rcv, 如果NAT-T情况下,接收处理函数
- static struct xfrm4_protocol esp4_protocol = {
- .handler = xfrm4_rcv,
- .input_handler = xfrm_input,
- .cb_handler = esp4_rcv_cb,
- .err_handler = esp4_err,
- .priority = 0,
- };
- static struct net_protocol udp_protocol = {
- .early_demux = udp_v4_early_demux,
- .early_demux_handler = udp_v4_early_demux,
- .handler = udp_rcv,
- .err_handler = udp_err,
- .no_policy = 1,
- .netns_ok = 1,
- };
数据包内部就是被IPSEC封装的报文,处理顺序是:
xfrm4_rcv -> xfrm4_rcv_spi -> xfrm_input
xfrm_input 处理
1. secpath_set(skb); 创建IPSEC的安全路径
2. xfrm_parse_spi(skb, nexthdr, &spi, &seq)解析报文,得到报文的SPI, seq 根据AH/ESP头
- struct ip_auth_hdr {
- __u8 nexthdr;
- __u8 hdrlen; /* This one is measured in 32 bit units! */
- __be16 reserved;
- __be32 spi;
- __be32 seq_no; /* Sequence number */
- __u8 auth_data[0]; /* Variable len but >=4. Mind the 64 bit alignment! */
- };
-
- struct ip_esp_hdr {
- __be32 spi;
- __be32 seq_no; /* Sequence number */
- __u8 enc_data[0]; /* Variable len but >=8. Mind the 64 bit alignment! */
- };
3. xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
根据 daddr、spi、nexthdr(协议类型:ah/esp)查找 SA, 并对SA做比如状态、封装类型、抗重放和SA超时等检查;
nexthdr = x->type->input(x, skb);
调用根据SA相对应的协议类型的 input 处理函数;
- static const struct xfrm_type ah_type =
- {
- .description = "AH4",
- .owner = THIS_MODULE,
- .proto = IPPROTO_AH,
- .flags = XFRM_TYPE_REPLAY_PROT,
- .init_state = ah_init_state,
- .destructor = ah_destroy,
- .input = ah_input,
- .output = ah_output
- };
-
- static const struct xfrm_type esp_type =
- {
- .description = "ESP4",
- .owner = THIS_MODULE,
- .proto = IPPROTO_ESP,
- .flags = XFRM_TYPE_REPLAY_PROT,
- .init_state = esp_init_state,
- .destructor = esp_destroy,
- .get_mtu = esp4_get_mtu,
- .input = esp_input,
- .output = esp_output,
- };
-
- static const struct xfrm_type ipcomp_type = {
- .description = "IPCOMP4",
- .owner = THIS_MODULE,
- .proto = IPPROTO_COMP,
- .init_state = ipcomp4_init_state,
- .destructor = ipcomp_destroy,
- .input = ipcomp_input,
- .output = ipcomp_output
- };

4. esp_input: 根据协议ah/esp类型,调用响应的输入处理函数
skb_cow_data(skb, 0, &trailer); 得到报文中的ESP尾部指针的地址 trailer;
然后找到加密区域 aead,
ESP_SKB_CB(skb)->tmp = tmp;
seqhi = esp_tmp_extra(tmp);
iv = esp_tmp_iv(aead, tmp, seqhilen);
req = esp_tmp_req(aead, iv);
sg = esp_req_sg(aead, req);
esp_input_set_header(skb, seqhi);
sg_init_table(sg, nfrags);
err = skb_to_sgvec(skb, sg, 0, skb->len);
if (unlikely(err < 0)) {
kfree(tmp);
goto out;
}
skb->ip_summed = CHECKSUM_NONE;
if ((x->props.flags & XFRM_STATE_ESN))
aead_request_set_callback(req, 0, esp_input_done_esn, skb);
else
aead_request_set_callback(req, 0, esp_input_done, skb);
aead_request_set_crypt(req, sg, sg, elen + ivlen, iv);
aead_request_set_ad(req, assoclen);
err = crypto_aead_decrypt(req); //解密
esp_input_done2(skb, err);
5. esp_input_done2
解密完成后,根据IPSEC的模式:传输模式和隧道模式,对数据报文进行处理
esp_remove_trailer(skb); //溢出报文末尾的ESP尾部;
如果是NAT-T,封装的数UDP协议
- if (x->encap) {
- struct xfrm_encap_tmpl *encap = x->encap;
- struct udphdr *uh = (void *)(skb_network_header(skb) + ihl);
-
- /*
- * 1) if the NAT-T peer's IP or port changed then
- * advertize the change to the keying daemon.
- * This is an inbound SA, so just compare
- * SRC ports.
- */
- if (iph->saddr != x->props.saddr.a4 ||
- uh->source != encap->encap_sport) {
- xfrm_address_t ipaddr;
-
- ipaddr.a4 = iph->saddr;
- km_new_mapping(x, &ipaddr, uh->source);
-
- /* XXX: perhaps add an extra
- * policy check here, to see
- * if we should allow or
- * reject a packet from a
- * different source
- * address/port.
- */
- }
-
- /*
- * 2) ignore UDP/TCP checksums in case
- * of NAT-T in Transport Mode, or
- * perform other post-processing fixes
- * as per draft-ietf-ipsec-udp-encaps-06,
- * section 3.1.2
- */
- if (x->props.mode == XFRM_MODE_TRANSPORT)
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- }

得到SKB的IP头部以及长度等,设置一些SKB的其他参数,然后返回xfrm_input函数继续往下执行;
- inner_mode = x->inner_mode;
-
- if (x->sel.family == AF_UNSPEC) {
- inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
- if (inner_mode == NULL) {
- XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
- goto drop;
- }
- }
-
- if (inner_mode->input(x, skb)) {
- XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
- goto drop;
- }
-
- if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
- decaps = 1;
- break;
- }

inner_mode = x->inner_mode;
inner_mode 就是 xfrm4_tunnel_mode / xfrm4_transport_mode, 去掉外层的IP头部,并设置一些SKB的数据,隧道模式下得到内层的IP头部;
inner_mode->input(x, skb) : xfrm_prepare_input 或者 xfrm4_transport_input
xfrm_prepare_input 函数处理:
x->outer_mode->afinfo->extract_input(x, skb); //xfrm4_extract_input
inner_mode->input2(x, skb); // xfrm4_mode_tunnel_input
隧道模式下,重设 SKB包的 protocol、IP头、 网络层头部地址、MAC头等,然后重新把SKB放入到 netif_rx 函数,即SKB重新入协议栈
gro_cells_receive
传输模式下,调用xfrm4_transport_input 后,从NF_INET_PRE_ROUTING点重新人协议栈,虽然前面已经经过PREROUTING 和 INPUT点,但解密数据后新的协议和端口号任然可能做NAT,之后重新进行路由选择,调用路由的input函数 skb_dst(skb)->input(skb)
x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async);
xfrm4_transport_finish
NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
xfrm4_rcv_encap_finish);
xfrm4_rcv_encap_finish -> ip_route_input_noref -> xfrm4_rcv_encap_finish2 -> dst_input -> skb_dst(skb)->input(skb);
可能是ip_local_deliver()或ip_forward(),完成后面的协议栈。
## IPSEC 发送
xfrm4_output
return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
net, sk, skb, NULL, skb_dst(skb)->dev,
__xfrm4_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
__xfrm4_output
x->outer_mode->afinfo->output_finish(sk, skb);
xfrm4_output_finish -> xfrm_output -> xfrm_output2 -> xfrm_output_resume -> xfrm_output_one -> x->outer_mode->output(x, skb)
x->outer_mode->output(x, skb) <-> xfrm4_prepare_output -> x->outer_mode->output2(x, skb);
x->outer_mode->output2(x, skb); <-> xfrm4_mode_tunnel_output
## IPSEC 转发
路由查找:
ip_queue_xmit -> __ip_queue_xmit -> ip_route_output_ports -> ip_route_output_flow -> xfrm_lookup_route -> xfrm_lookup -> xfrm_lookup_with_ifid :
/* Finds/creates a bundle for given flow and if_id */
xfrm_lookup_with_ifid -> 查找
1. xfrm_sk_policy_lookup -> xfrm_selector_match -> security_xfrm_policy_lookup
2. xfrm_policy_lookup_bytype
xfrm_resolve_and_create_bundle //创建
xfrm_bundle_lookup //如果已经创建了,查找bundle
数据封装发送:
ip_queue_xmit -> __ip_queue_xmit -> ip_local_out -> __ip_local_out -> dst_output -> xfrm4_output -> __xfrm4_output -> xfrm4_output_finish -> xfrm_output -> xfrm_output2 -> xfrm_output_resume ->
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。