当前位置:   article > 正文

linux内核协议栈 icmp 报文收发流程_linux kernel icmp

linux kernel icmp

目录

1 ICMP报文接收

1.1  icmp_rcv() 实现

1.2 type类型对应处理函数定义 icmp_pointers[NR_ICMP_TYPES + 1]

1.3 处理 ping 请求处理 icmp_echo()

1.4 时间戳请求处理 icmp_timestamp()

1.5 Unreach 数据处理 icmp_unreach()

1.5.1 调用传输层接口差错报文处理 icmp_socket_deliver()

1.6 redirect 数据处理 icmp_redirect()

1.7 ICMP报文应答 icmp_reply()

1.7.1 速率控制函数 icmpv4_xrlim_allow()

1.7.2 数据发送 icmp_push_reply()

2 ICMP报文发送

2.1 icmp_send() 实现


1 ICMP报文接收

1.1  icmp_rcv() 实现

在ip层判断是icmp报文之后,会调用 icmp_rcv() 来处理 icmp 类型的报文

  1. 对数据包进行合理性检查
  2. 根据icmp的类型,分类处理
  1. /*
  2. * Deal with incoming ICMP packets.
  3. */
  4. int icmp_rcv(struct sk_buff *skb)
  5. {
  6. struct icmphdr *icmph;
  7. struct rtable *rt = skb_rtable(skb);
  8. struct net *net = dev_net(rt->dst.dev);
  9. // 基于策略的高扩展性的网络安全架构,对于这个内核子架构不清楚此处分析不了,跳过。
  10. if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
  11. struct sec_path *sp = skb_sec_path(skb);
  12. int nh;
  13. if (!(sp && sp->xvec[sp->len - 1]->props.flags &
  14. XFRM_STATE_ICMP))
  15. goto drop;
  16. if (!pskb_may_pull(skb, sizeof(*icmph) + sizeof(struct iphdr)))
  17. goto drop;
  18. nh = skb_network_offset(skb);
  19. skb_set_network_header(skb, sizeof(*icmph));
  20. if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
  21. goto drop;
  22. skb_set_network_header(skb, nh);
  23. }
  24. ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS);
  25. //验证校验和信息
  26. switch (skb->ip_summed) {
  27. case CHECKSUM_COMPLETE:
  28. if (!csum_fold(skb->csum))
  29. break;
  30. /* fall through */
  31. case CHECKSUM_NONE:
  32. skb->csum = 0;
  33. if (__skb_checksum_complete(skb))
  34. goto csum_error;
  35. }
  36. if (!pskb_pull(skb, sizeof(*icmph)))
  37. goto error;
  38. //获取icmp头部
  39. icmph = icmp_hdr(skb);
  40. ICMPMSGIN_INC_STATS_BH(net, icmph->type);
  41. /*
  42. * 18 is the highest 'known' ICMP type. Anything else is a mystery
  43. *
  44. * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
  45. * discarded.
  46. */
  47. //type类型错误直接丢掉
  48. if (icmph->type > NR_ICMP_TYPES)
  49. goto error;
  50. /*
  51. * Parse the ICMP message
  52. */
  53. //判断是否丢弃掉多播类型的icmp数据包
  54. //只处理echo、timestamp、address_mask_request、address_mask_reply类型的多播icmp数据包
  55. if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
  56. /*
  57. * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be
  58. * silently ignored (we let user decide with a sysctl).
  59. * RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently
  60. * discarded if to broadcast/multicast.
  61. */
  62. if ((icmph->type == ICMP_ECHO ||
  63. icmph->type == ICMP_TIMESTAMP) &&
  64. net->ipv4.sysctl_icmp_echo_ignore_broadcasts) {
  65. goto error;
  66. }
  67. if (icmph->type != ICMP_ECHO &&
  68. icmph->type != ICMP_TIMESTAMP &&
  69. icmph->type != ICMP_ADDRESS &&
  70. icmph->type != ICMP_ADDRESSREPLY) {
  71. goto error;
  72. }
  73. }
  74. //根据icmp数据包类型,调用相应的处理函数
  75. icmp_pointers[icmph->type].handler(skb);
  76. drop:
  77. kfree_skb(skb);
  78. return 0;
  79. csum_error:
  80. ICMP_INC_STATS_BH(net, ICMP_MIB_CSUMERRORS);
  81. error:
  82. ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
  83. goto drop;
  84. }

1.2 type类型对应处理函数定义 icmp_pointers[NR_ICMP_TYPES + 1]

  1. static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
  2. [ICMP_ECHOREPLY] = {
  3. .handler = ping_rcv,
  4. },
  5. [1] = {
  6. .handler = icmp_discard,
  7. .error = 1,
  8. },
  9. [2] = {
  10. .handler = icmp_discard,
  11. .error = 1,
  12. },
  13. [ICMP_DEST_UNREACH] = {
  14. .handler = icmp_unreach,
  15. .error = 1,
  16. },
  17. [ICMP_SOURCE_QUENCH] = {
  18. .handler = icmp_unreach,
  19. .error = 1,
  20. },
  21. [ICMP_REDIRECT] = {
  22. .handler = icmp_redirect,
  23. .error = 1,
  24. },
  25. [6] = {
  26. .handler = icmp_discard,
  27. .error = 1,
  28. },
  29. [7] = {
  30. .handler = icmp_discard,
  31. .error = 1,
  32. },
  33. [ICMP_ECHO] = {
  34. .handler = icmp_echo,
  35. },
  36. [9] = {
  37. .handler = icmp_discard,
  38. .error = 1,
  39. },
  40. [10] = {
  41. .handler = icmp_discard,
  42. .error = 1,
  43. },
  44. [ICMP_TIME_EXCEEDED] = {
  45. .handler = icmp_unreach,
  46. .error = 1,
  47. },
  48. [ICMP_PARAMETERPROB] = {
  49. .handler = icmp_unreach,
  50. .error = 1,
  51. },
  52. [ICMP_TIMESTAMP] = {
  53. .handler = icmp_timestamp,
  54. },
  55. [ICMP_TIMESTAMPREPLY] = {
  56. .handler = icmp_discard,
  57. },
  58. [ICMP_INFO_REQUEST] = {
  59. .handler = icmp_discard,
  60. },
  61. [ICMP_INFO_REPLY] = {
  62. .handler = icmp_discard,
  63. },
  64. [ICMP_ADDRESS] = {
  65. .handler = icmp_discard,
  66. },
  67. [ICMP_ADDRESSREPLY] = {
  68. .handler = icmp_discard,
  69. },
  70. };

1.3 处理 ping 请求处理 icmp_echo()

该函数用于处理远端的ping请求报文,即收到type=8的 icmp 报文,核心步骤如下:

  1. 将 icmp 的 type 设置为 ICMP_ECHOREPLY(0)
  2. 调用 icmp_reply() 将该数据包发送出去。
  1. /*
  2. * Handle ICMP_ECHO ("ping") requests.
  3. *
  4. * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
  5. * requests.
  6. * RFC 1122: 3.2.2.6 Data received in the ICMP_ECHO request MUST be
  7. * included in the reply.
  8. * RFC 1812: 4.3.3.6 SHOULD have a config option for silently ignoring
  9. * echo requests, MUST have default=NOT.
  10. * See also WRT handling of options once they are done and working.
  11. */
  12. static void icmp_echo(struct sk_buff *skb)
  13. {
  14. struct net *net;
  15. net = dev_net(skb_dst(skb)->dev);
  16. if (!net->ipv4.sysctl_icmp_echo_ignore_all) {
  17. struct icmp_bxm icmp_param;
  18. icmp_param.data.icmph = *icmp_hdr(skb);
  19. icmp_param.data.icmph.type = ICMP_ECHOREPLY;
  20. icmp_param.skb = skb;
  21. icmp_param.offset = 0;
  22. icmp_param.data_len = skb->len;
  23. icmp_param.head_len = sizeof(struct icmphdr);
  24. icmp_reply(&icmp_param, skb);
  25. }
  26. }

1.4 时间戳请求处理 icmp_timestamp()

收到远端发送的时间戳请求报文,即 type=13 的 icmp 报文,核心步骤如下:

  1. 获取当前时间戳
  2. 将 icmp 的 type 设置为 ICMP_TIMESTAMPREPLY(14)
  3. 调用 icmp_reply() 将该数据包发送出去。
  1. /*
  2. * Handle ICMP Timestamp requests.
  3. * RFC 1122: 3.2.2.8 MAY implement ICMP timestamp requests.
  4. * SHOULD be in the kernel for minimum random latency.
  5. * MUST be accurate to a few minutes.
  6. * MUST be updated at least at 15Hz.
  7. */
  8. static void icmp_timestamp(struct sk_buff *skb)
  9. {
  10. struct timespec tv;
  11. struct icmp_bxm icmp_param;
  12. /*
  13. * Too short.
  14. */
  15. if (skb->len < 4)
  16. goto out_err;
  17. /*
  18. * Fill in the current time as ms since midnight UT:
  19. */
  20. getnstimeofday(&tv);
  21. icmp_param.data.times[1] = htonl((tv.tv_sec % 86400) * MSEC_PER_SEC +
  22. tv.tv_nsec / NSEC_PER_MSEC);
  23. icmp_param.data.times[2] = icmp_param.data.times[1];
  24. if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4))
  25. BUG();
  26. icmp_param.data.icmph = *icmp_hdr(skb);
  27. icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY;
  28. icmp_param.data.icmph.code = 0;
  29. icmp_param.skb = skb;
  30. icmp_param.offset = 0;
  31. icmp_param.data_len = 0;
  32. icmp_param.head_len = sizeof(struct icmphdr) + 12;
  33. icmp_reply(&icmp_param, skb);
  34. out:
  35. return;
  36. out_err:
  37. ICMP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
  38. goto out;
  39. }

1.5 Unreach 数据处理 icmp_unreach()

收到远端发过来的报文不可达信息,即type=3的 icmp 报文。当然,如果收到 type=4(ICMP_SOURCE_QUENCH)、type=14(ICMP_TIMESTAMPREPLY)的报文也调用该接口处理,icmp_unreach()核心逻辑就是根据icmp中有效载荷数据的值,调用传输层的错误处理函数进行处理。

  1. /*
  2. * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_SOURCE_QUENCH.
  3. */
  4. static void icmp_unreach(struct sk_buff *skb)
  5. {
  6. const struct iphdr *iph;
  7. struct icmphdr *icmph;
  8. struct net *net;
  9. u32 info = 0;
  10. net = dev_net(skb_dst(skb)->dev);
  11. /*
  12. * Incomplete header ?
  13. * Only checks for the IP header, there should be an
  14. * additional check for longer headers in upper levels.
  15. */
  16. if (!pskb_may_pull(skb, sizeof(struct iphdr)))
  17. goto out_err;
  18. //获取icmp首部
  19. icmph = icmp_hdr(skb);
  20. iph = (const struct iphdr *)skb->data;
  21. //判断ip首部是否完整
  22. if (iph->ihl < 5) /* Mangled header, drop. */
  23. goto out_err;
  24. /*仅处理type类型为3或者12的数据包
  25. 1、当类型为3时,仅处理code为frag needed的报文
  26. a)当系统不支持pmtu时,丢弃该数据包
  27. b)当系统支持pmtu时,调用ip_rt_frag_needed修改pmtu的值
  28. 2、当type类型为12时,则通过icmph->un.gateway获取出错偏移值(相对于数据包)
  29. */
  30. if (icmph->type == ICMP_DEST_UNREACH) {
  31. switch (icmph->code & 15) {
  32. case ICMP_NET_UNREACH:
  33. case ICMP_HOST_UNREACH:
  34. case ICMP_PROT_UNREACH:
  35. case ICMP_PORT_UNREACH:
  36. break;
  37. case ICMP_FRAG_NEEDED:
  38. if (ipv4_config.no_pmtu_disc) {
  39. LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"),
  40. &iph->daddr);
  41. } else {
  42. info = ntohs(icmph->un.frag.mtu);
  43. if (!info)
  44. goto out;
  45. }
  46. break;
  47. case ICMP_SR_FAILED:
  48. LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: Source Route Failed\n"),
  49. &iph->daddr);
  50. break;
  51. default:
  52. break;
  53. }
  54. if (icmph->code > NR_ICMP_UNREACH)
  55. goto out;
  56. } else if (icmph->type == ICMP_PARAMETERPROB)
  57. info = ntohl(icmph->un.gateway) >> 24;
  58. /*
  59. * Throw it at our lower layers
  60. *
  61. * RFC 1122: 3.2.2 MUST extract the protocol ID from the passed
  62. * header.
  63. * RFC 1122: 3.2.2.1 MUST pass ICMP unreach messages to the
  64. * transport layer.
  65. * RFC 1122: 3.2.2.2 MUST pass ICMP time expired messages to
  66. * transport layer.
  67. */
  68. /*
  69. * Check the other end isn't violating RFC 1122. Some routers send
  70. * bogus responses to broadcast frames. If you see this message
  71. * first check your netmask matches at both ends, if it does then
  72. * get the other vendor to fix their kit.
  73. */
  74. //对于目的地址是广播的icmp数据包,且需要忽略时,则打印错误并忽略该数据包
  75. if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses &&
  76. inet_addr_type(net, iph->daddr) == RTN_BROADCAST) {
  77. net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n",
  78. &ip_hdr(skb)->saddr,
  79. icmph->type, icmph->code,
  80. &iph->daddr, skb->dev->name);
  81. goto out;
  82. }
  83. icmp_socket_deliver(skb, info);
  84. out:
  85. return;
  86. out_err:
  87. ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
  88. goto out;
  89. }

1.5.1 调用传输层接口差错报文处理 icmp_socket_deliver()

  1. static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
  2. {
  3. //此时的iph,是icmp有效载荷中的ip头部信息,在icmp_rcv中已经将skb->data指向icmp报文的有效载荷部分了
  4. const struct iphdr *iph = (const struct iphdr *) skb->data;
  5. const struct net_protocol *ipprot;
  6. //获取传输层协议值
  7. int protocol = iph->protocol;
  8. /* Checkin full IP header plus 8 bytes of protocol to
  9. * avoid additional coding at protocol handlers.
  10. */
  11. /*检测icmp报文中有效载荷部分内容长度是否大于等于ip头部信息加上8字节
  12. 在发送icmp差错报文时,会将icmp数据部分的值设置为ip头部信息+ ip有效载荷的前8个字节,
  13. 这样就可以判断是传输层的那个应用数据发送出错*/
  14. if (!pskb_may_pull(skb, iph->ihl * 4 + 8))
  15. return;
  16. //首先调用raw_icmp_error,将差错信息发送给感兴趣的raw socket
  17. raw_icmp_error(skb, protocol, info);
  18. rcu_read_lock();
  19. //根据protocol值,查找符合条件的4层接收处理hash数组inet_protos,
  20. //调用其错误处理函数进行后续处理
  21. ipprot = rcu_dereference(inet_protos[protocol]);
  22. if (ipprot && ipprot->err_handler)
  23. ipprot->err_handler(skb, info);
  24. rcu_read_unlock();
  25. }

1.6 redirect 数据处理 icmp_redirect()

收到远端发过来的报文不可达信息,即type=5的 icmp 报文,函数如下:

  1. /*
  2. * Handle ICMP_REDIRECT.
  3. */
  4. static bool icmp_redirect(struct sk_buff *skb)
  5. {
  6. if (skb->len < sizeof(struct iphdr)) {
  7. __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
  8. return false;
  9. }
  10. if (!pskb_may_pull(skb, sizeof(struct iphdr))) {
  11. /* there aught to be a stat */
  12. return false;
  13. }
  14. icmp_socket_deliver(skb, ntohl(icmp_hdr(skb)->un.gateway));
  15. return true;
  16. }

1.7 ICMP报文应答 icmp_reply()

在前面介绍icmp echo的应对以及icmp timestamp的应答时,函数都是调用icmp_reply发送数据的,该函数核心功能如下:

  1. 查找路由,若查找失败,直接返回;查找成功执行第二步
  2. 调用速率限制函数 icmpv4_xrlim_allow() 进行速率限制,当允许发送时,执行第三步,否则返回
  3. 调用 icmp_push_reply() 发送数据
  1. /*
  2. * Driving logic for building and sending ICMP messages.
  3. */
  4. static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
  5. {
  6. struct ipcm_cookie ipc;
  7. struct rtable *rt = skb_rtable(skb);
  8. struct net *net = dev_net(rt->dst.dev);
  9. struct flowi4 fl4;
  10. struct sock *sk;
  11. struct inet_sock *inet;
  12. __be32 daddr, saddr;
  13. if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
  14. return;
  15. sk = icmp_xmit_lock(net);
  16. if (sk == NULL)
  17. return;
  18. inet = inet_sk(sk);
  19. icmp_param->data.icmph.checksum = 0;
  20. inet->tos = ip_hdr(skb)->tos;
  21. daddr = ipc.addr = ip_hdr(skb)->saddr;
  22. saddr = fib_compute_spec_dst(skb);
  23. ipc.opt = NULL;
  24. ipc.tx_flags = 0;
  25. if (icmp_param->replyopts.opt.opt.optlen) {
  26. ipc.opt = &icmp_param->replyopts.opt;
  27. if (ipc.opt->opt.srr)
  28. daddr = icmp_param->replyopts.opt.opt.faddr;
  29. }
  30. memset(&fl4, 0, sizeof(fl4));
  31. fl4.daddr = daddr;
  32. fl4.saddr = saddr;
  33. fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
  34. fl4.flowi4_proto = IPPROTO_ICMP;
  35. security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
  36. rt = ip_route_output_key(net, &fl4);
  37. if (IS_ERR(rt))
  38. goto out_unlock;
  39. if (icmpv4_xrlim_allow(net, rt, &fl4, icmp_param->data.icmph.type,
  40. icmp_param->data.icmph.code))
  41. icmp_push_reply(icmp_param, &fl4, &ipc, &rt);
  42. ip_rt_put(rt);
  43. out_unlock:
  44. icmp_xmit_unlock(sk);
  45. }

1.7.1 速率控制函数 icmpv4_xrlim_allow()

功能:判断是否允许发送数据,允许发送则直接返回true不进行限速,否则,调用 inet_peer_xrlim_allow() 进行限速判断

  1.  对于不支持的icmp type类型,返回允许发送
  2.  对于type类型为ICMP_DEST_UNREACH code为ICMP_FRAG_NEEDED的数据包,允许发送
  3. 对于目的设备为回环设备的,返回允许发送
  4. 对于其他类型的icmp报文,只有 ipv4.sysctl_icmp_ratemask 中对应位为1的数据包才会进行限速,对于其他类型的数据包,直接返回允许发送(即不限速)
  1. /*
  2. * Send an ICMP frame.
  3. */
  4. static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
  5. struct flowi4 *fl4, int type, int code)
  6. {
  7. struct dst_entry *dst = &rt->dst;
  8. bool rc = true;
  9. if (type > NR_ICMP_TYPES)
  10. goto out;
  11. /* Don't limit PMTU discovery. */
  12. if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
  13. goto out;
  14. /* No rate limit on loopback */
  15. if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
  16. goto out;
  17. /* Limit if icmp type is enabled in ratemask. */
  18. if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) {
  19. struct inet_peer *peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1);
  20. rc = inet_peer_xrlim_allow(peer,
  21. net->ipv4.sysctl_icmp_ratelimit);
  22. if (peer)
  23. inet_putpeer(peer);
  24. }
  25. out:
  26. return rc;
  27. }

1.7.2 数据发送 icmp_push_reply()

该函数是将 icmp 报文数据写入到 ip 层队列中,准备发送。

  1. 调用 ip_append_data(),将数据缓存起来
  2. 调用 ip_flush_pending_frames() 将数据直接发送出去
  1. static void icmp_push_reply(struct icmp_bxm *icmp_param,
  2. struct flowi4 *fl4,
  3. struct ipcm_cookie *ipc, struct rtable **rt)
  4. {
  5. struct sock *sk;
  6. struct sk_buff *skb;
  7. //获取当前执行CPU 所有的sock,主要用于发送ICMP数据包
  8. sk = icmp_sk(dev_net((*rt)->dst.dev));
  9. /*调用ip_append_data,将要发送的数据缓存到sk->sk_write_queue
  10. 并调用ip_push_pending_frames,将数据发送出去*/
  11. if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
  12. icmp_param->data_len+icmp_param->head_len,
  13. icmp_param->head_len,
  14. ipc, rt, MSG_DONTWAIT) < 0) {
  15. ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_OUTERRORS);
  16. ip_flush_pending_frames(sk);
  17. } else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
  18. struct icmphdr *icmph = icmp_hdr(skb);
  19. __wsum csum = 0;
  20. struct sk_buff *skb1;
  21. skb_queue_walk(&sk->sk_write_queue, skb1) {
  22. csum = csum_add(csum, skb1->csum);
  23. }
  24. csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
  25. (char *)icmph,
  26. icmp_param->head_len, csum);
  27. icmph->checksum = csum_fold(csum);
  28. skb->ip_summed = CHECKSUM_NONE;
  29. ip_push_pending_frames(sk, fl4);
  30. }
  31. }

2 ICMP报文发送

2.1 icmp_send() 实现

对于由与入口数据包处理失败等操作时,上层协议会调用 icmp_send 发送数据(udp在收到一个没有监听端口的报文时会调用该函数发送端口不可达信息,接口:__udp4_lib_rcv()),该函数发送一个icmp error 数据包,核心逻辑如下:
不能发送icmp error 数据包的条件:

  1. 对于入口数据包是多播的数据包(硬件或者ip地址为多播地址),不发送 icmp error 数据包
  2. 对于入口数据包有分段的,仅对首个分段的入口数据包,发送 icmp error 数据包
  3. 入口数据包本身是icmp error类型的,不发送针对该入口数据包的 icmp error 

若入口数据包不满足上述条件,则需要发送针对该数据包的 icmp error 类型数据

  1. 查找路由
  2. 当路由查找成功后,则会调用 icmp_push_reply() 将数据发送出去
  1. /*
  2. * Send an ICMP message in response to a situation
  3. *
  4. * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header.
  5. * MAY send more (we do).
  6. * MUST NOT change this header information.
  7. * MUST NOT reply to a multicast/broadcast IP address.
  8. * MUST NOT reply to a multicast/broadcast MAC address.
  9. * MUST reply to only the first fragment.
  10. */
  11. void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
  12. {
  13. struct iphdr *iph;
  14. int room;
  15. struct icmp_bxm icmp_param;
  16. struct rtable *rt = skb_rtable(skb_in);
  17. struct ipcm_cookie ipc;
  18. struct flowi4 fl4;
  19. __be32 saddr;
  20. u8 tos;
  21. struct net *net;
  22. struct sock *sk;
  23. if (!rt)
  24. goto out;
  25. net = dev_net(rt->dst.dev);
  26. /*
  27. * Find the original header. It is expected to be valid, of course.
  28. * Check this, icmp_send is called from the most obscure devices
  29. * sometimes.
  30. */
  31. iph = ip_hdr(skb_in);
  32. //对sk_buff做合理性检查,保证ipheader在sk_buff->head与sk_buff->tail之间的范围内
  33. if ((u8 *)iph < skb_in->head ||
  34. (skb_in->network_header + sizeof(*iph)) > skb_in->tail)
  35. goto out;
  36. /*
  37. * No replies to physical multicast/broadcast
  38. */
  39. //判断入口数据包的数据链路层的地址是否是广播或组播地址,若是则退出
  40. if (skb_in->pkt_type != PACKET_HOST)
  41. goto out;
  42. /*
  43. * Now check at the protocol level
  44. */
  45. //检查入口数据包是否广播、组播数据
  46. if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
  47. goto out;
  48. /*
  49. * Only reply to fragment 0. We byte re-order the constant
  50. * mask for efficiency.
  51. */
  52. //对于IP分段数据,仅对首个分段数据包发送ICMP错误信息
  53. if (iph->frag_off & htons(IP_OFFSET))
  54. goto out;
  55. /*
  56. * If we send an ICMP error to an ICMP error a mess would result..
  57. */
  58. //判断接收的数据包是否是一个ICMP 错误信息数据包,若是则不对该数据包回复ICMP错误信息
  59. if (icmp_pointers[type].error) {
  60. /*
  61. * We are an error, check if we are replying to an
  62. * ICMP error
  63. */
  64. if (iph->protocol == IPPROTO_ICMP) {
  65. u8 _inner_type, *itp;
  66. itp = skb_header_pointer(skb_in,
  67. skb_network_header(skb_in) +
  68. (iph->ihl << 2) +
  69. offsetof(struct icmphdr,
  70. type) -
  71. skb_in->data,
  72. sizeof(_inner_type),
  73. &_inner_type);
  74. if (itp == NULL)
  75. goto out;
  76. /*
  77. * Assume any unknown ICMP type is an error. This
  78. * isn't specified by the RFC, but think about it..
  79. */
  80. if (*itp > NR_ICMP_TYPES ||
  81. icmp_pointers[*itp].error)
  82. goto out;
  83. }
  84. }
  85. //关闭软中断,并为该socket添加自旋锁,确保同一时刻只有一个icmp报文被发送出去
  86. sk = icmp_xmit_lock(net);
  87. if (sk == NULL)
  88. return;
  89. /*
  90. * Construct source address and options.
  91. */
  92. /*
  93. *对于目的地址为本地的入口数据包,则将本地地址作为icmp包的源ip地址
  94. *对于目的地址非本地的入口数据包,则根据 sysctl_icmp_errors_use_inbound_ifaddr
  95. 的值来设置源ip地址
  96. */
  97. saddr = iph->daddr;
  98. if (!(rt->rt_flags & RTCF_LOCAL)) {
  99. struct net_device *dev = NULL;
  100. rcu_read_lock();
  101. if (rt_is_input_route(rt) &&
  102. net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
  103. dev = dev_get_by_index_rcu(net, inet_iif(skb_in));
  104. if (dev)
  105. saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
  106. else
  107. saddr = 0;
  108. rcu_read_unlock();
  109. }
  110. //设置tos值
  111. tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
  112. IPTOS_PREC_INTERNETCONTROL) :
  113. iph->tos;
  114. if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in))
  115. goto out_unlock;
  116. /*
  117. * Prepare data for ICMP header.
  118. */
  119. //设置icmp的头部信息
  120. icmp_param.data.icmph.type = type;
  121. icmp_param.data.icmph.code = code;
  122. icmp_param.data.icmph.un.gateway = info;
  123. icmp_param.data.icmph.checksum = 0;
  124. icmp_param.skb = skb_in;
  125. icmp_param.offset = skb_network_offset(skb_in);
  126. inet_sk(sk)->tos = tos;
  127. ipc.addr = iph->saddr;
  128. ipc.opt = &icmp_param.replyopts.opt;
  129. ipc.tx_flags = 0;
  130. //获取路由
  131. rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos,
  132. type, code, &icmp_param);
  133. if (IS_ERR(rt))
  134. goto out_unlock;
  135. //限速
  136. if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code))
  137. goto ende;
  138. /* RFC says return as much as we can without exceeding 576 bytes. */
  139. room = dst_mtu(&rt->dst);
  140. if (room > 576)
  141. room = 576;
  142. room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
  143. room -= sizeof(struct icmphdr);
  144. icmp_param.data_len = skb_in->len - icmp_param.offset;
  145. if (icmp_param.data_len > room)
  146. icmp_param.data_len = room;
  147. icmp_param.head_len = sizeof(struct icmphdr);
  148. //发送icmp报文
  149. icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
  150. ende:
  151. ip_rt_put(rt);
  152. out_unlock:
  153. icmp_xmit_unlock(sk);
  154. out:;
  155. }
  156. EXPORT_SYMBOL(icmp_send);

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/你好赵伟/article/detail/252978
推荐阅读
相关标签
  

闽ICP备14008679号