7 月 | 2010 | 老沙的博客

IP:分片与重组

1. 引言
为了与互联网上的任意计算机通信,每个应用TCP/IP的计算机必须具有分片和重组的代码,一个设计良好的应用软件,会生成足够小的数据报,
因此主机并不需要经常执行分片任务.

2. 数据报的分片
分片发生在选择路由之后,以及放入接口队列之前.IP把数据报长度与MTU进行比较,确定是否需要分片.
如需分片,IP首先生成多个数据报,并将每个数据报中的分片位置1,将源数据报中的数据按顺序分片,并将它们装入这些数据报中.
它还在同一源数据报产生的所有数据报片中将MF位置为1,末尾片除外.IP一边分片,一边将它们传递给网络接口发送出去.

2.1 为一个数据报片再次分片
对于MF未置1的片,和上边说的没区别,除了最后一个片外,其它全置1.但对于MF为1的源片,再次分片后的所有分片MF全置1

3. 分片的实现

/* ipputp.c – ipputp */

#include <conf.h>
#include <kernel.h>
#include <network.h>

/*————————————————————————
* ipputp – 发送一个数据包到一个接口的输出队列
*————————————————————————
*/
int
ipputp(unsigned ifn, IPaddr nh, struct ep *pep)
{
   struct   netif   *pni = &nif[ifn];       /* 接口指针*/
   struct   ip   *pip;               /* IP报指针*/
   int       hlen, maxdlen, tosend, offset, offindg;

   if (pni->ni_state == NIS_DOWN) {           /* 接口关闭则返回*/
       freebuf(pep);
       return SYSERR;
   }
   pip = (struct ip *)pep->ep_data;           /* IP报指向*/
   if (pip->ip_len <= pni->ni_mtu) {       /* IP报长度小于等于接口MTU,则直接发送数据报*/
       pep->ep_nexthop = nh;
       pip->ip_cksum = 0;
       iph2net(pip);
       pep->ep_order &= ~EPO_IP;
       pip->ip_cksum = cksum((WORD *)pip, IP_HLEN(pip));
       return netwrite(pni, pep, EP_HLEN+net2hs(pip->ip_len));
   }
   /* 否则，我们必须分片 */

   if (pip->ip_fragoff & IP_DF) {           /* 能否分片,不能分片则报错并返回*/
       IpFragFails++;
       icmp(ICT_DESTUR, ICC_FNADF, pip->ip_src, pep, 0);
       return OK;
   }
   maxdlen = (pni->ni_mtu – IP_HLEN(pip)) &~ 7;   /* 最大长度为MTU减去IP头长度,并增加到8的倍数*/
   offset = 0;                   /* 偏移量*/
   offindg = (pip->ip_fragoff & IP_FRAGOFF)<<3;   /* 偏移量,保存控制位*/
   tosend = pip->ip_len – IP_HLEN(pip);       /* 要发送的IP报长度,不包括IP报头*/

   while (tosend > maxdlen) {           /* 仅当剩余的数据大于可发送的最大数据量时才发送*/
       if (ipfsend(pni,nh,pep,offset,maxdlen,offindg) != OK) {   /* 生成并发送分片*/
           IpOutDiscards++;
           freebuf(pep);
           return SYSERR;
       }
       IpFragCreates++;
       tosend -= maxdlen;
       offset += maxdlen;
       offindg += maxdlen;           /* */
   }
   IpFragOKs++;
   IpFragCreates++;
   hlen = ipfhcopy(pep, pep, offindg);       /* 首部拷贝处理，返回首部长度*/
   pip = (struct ip *)pep->ep_data;           /* 取IP数据报*/
   /*处理最后一个分片,当最后剩余的数据小于等于可发送的最大数据量 */
   memcpy(&pep->ep_data[hlen], &pep->ep_data[IP_HLEN(pip)+offset],
       tosend);                   /* 修改源报,使之变为最后一个报片*/
   /*非末尾的数据报片再次分片时，保证MF全为1 */
   pip->ip_fragoff = (pip->ip_fragoff & IP_MF)|(offindg>>3);
   pip->ip_len = tosend + hlen;
   pip->ip_cksum = 0;
   iph2net(pip);
   pep->ep_order &= ~EPO_IP;
   pip->ip_cksum = cksum((WORD *)pip, hlen);
   pep->ep_nexthop = nh;
   return netwrite(pni, pep, EP_HLEN+net2hs(pip->ip_len));
}

3.1 发送一个数据报片

/* ipfsend.c – ipfsend */

#include <conf.h>
#include <kernel.h>
#include <network.h>

int ipfhcopy(struct ep *, struct ep *, unsigned);

/*————————————————————————
* ipfsend – 发送一个IP数据报的分片
*————————————————————————
*/
int
ipfsend(struct netif *pni, IPaddr nexthop, struct ep *pep,
   unsigned offset, unsigned maxdlen, unsigned offindg)
{
   struct   ep   *pepnew;
   struct   ip   *pip, *pipnew;
   int       hlen, len;

   pepnew = (struct ep *)getbuf(Net.netpool);       /* 申请一个新帧的缓冲区*/
   if (pepnew == (struct ep *)SYSERR)
       return SYSERR;
   pepnew->ep_order = ~0;
   hlen = ipfhcopy(pepnew, pep, offindg);   /* 复制头 */

   pip = (struct ip *)pep->ep_data;       /* 源IP报的指向*/
   pipnew = (struct ip *)pepnew->ep_data;   /* 新生成的IP报*/
   pipnew->ip_fragoff = IP_MF | (offindg>>3);
   pipnew->ip_len = len = maxdlen + hlen;
   pipnew->ip_cksum = 0;

   iph2net(pipnew);
   pepnew->ep_order &= ~EPO_IP;       /* 清除字节顺序*/
   pipnew->ip_cksum = cksum((WORD *)pipnew, hlen);

   memcpy(&pepnew->ep_data[hlen],       /* 复制数据*/
       &pep->ep_data[IP_HLEN(pip)+offset], maxdlen);
   pepnew->ep_nexthop = nexthop;

return netwrite(pni, pepnew, EP_HLEN+len); /* 发送并返回 */
}

3.2 复制数据报首部

/* ipfhcopy.c – ipfhcopy */

#include <conf.h>
#include <kernel.h>
#include <network.h>

/*————————————————————————
* ipfhcopy – copy the hardware, IP header, and options for a fragment
*————————————————————————
*/
int
ipfhcopy(struct ep *pepto, struct ep *pepfrom, unsigned offindg)
{
   struct   ip   *pipfrom = (struct ip *)pepfrom->ep_data;   /* 旧IP报*/
   unsigned   i, maxhlen, olen, otype;
   unsigned   hlen = (IP_MINHLEN<<2);       /* IP头最小长度*4 ,就是以字节为单位了*/

   if (offindg == 0) {   /* 偏移为0,说明是第一个分片,复制帧的头和IP头到新的帧*/
       memcpy(pepto, pepfrom, EP_HLEN+IP_HLEN(pipfrom));
       return IP_HLEN(pipfrom);
   }
   /* 以下就说明不是第一个IP分片了*/
   memcpy(pepto, pepfrom, EP_HLEN+hlen);       /* 复制帧的头和除了IP选项外的报头*/

/*复制选项 */

   maxhlen = IP_HLEN(pipfrom);       /* 包括选项的IP头的长度*/
   i = hlen;               /* 不包括选项的IP头长度*/
   while (i < maxhlen) {           /* 最小头比最大头小,说明有IP选项*/
       otype = pepfrom->ep_data[i];   /* IP选项*/
       olen = pepfrom->ep_data[++i];   /* 选项长度*/
       if (otype & IPO_COPY) {       /* 如果复制位为1*/
           memcpy(&pepto->ep_data[hlen],
               &pepfrom->ep_data[i-1], olen);       /* 复制这个选项到所有新的帧*/
           hlen += olen;
       } else if (otype == IPO_NOP || otype == IPO_EOOP) {   /* 选项结束或没有操作*/
           pepto->ep_data[hlen++] = otype;
           olen = 1;
       }
       i += olen-1;

       if (otype == IPO_EOOP)
           break;
   }
   /* pad to a multiple of 4 octets */
   while (hlen % 4)           /* 填充到4字节的整数倍*/
       pepto->ep_data[hlen++] = IPO_NOP;
   return hlen;
}

4. 数据报的重组

4.1 数据结构

/* ipreass.h */

/* Internet Protocol (IP) reassembly support */

#define IP_FQSIZE   10   /* 分片队列的数大数量 */
#define IP_MAXNF   10   /* 分片/数据报的最大数量 */
#define   IP_FTTL       60   /* 生存时间（秒）*/

/* ipf_state flags */

#define   IPFF_VALID   1   /* 内容是有效的 */
#define   IPFF_BOGUS   2   /* 丢弃       */
#define   IPFF_FREE   3   /* 这个队列可以自由分配   */

struct   ipfq   {
   char   ipf_state;       /* 队列状态,值为上边 3种       */
   IPaddr   ipf_src;       /* 源IP地址 */
   short   ipf_id;           /* 数据报ID */
   int   ipf_ttl;       /* 数据报重组的生存时间 */
   int   ipf_q;           /* 分片存储的链表 */
};

extern int ipfmutex; /* 互斥 mutex for ipfqt[] */
extern struct ipfq ipfqt[]; /* IP分片队列表 */

int ipfsend(struct netif *, IPaddr, struct ep *, unsigned, unsigned,
unsigned);
int ipfhcopy(struct ep *, struct ep *, unsigned);

4.2 互斥操作

为了何证进程在数据报片的链表时不会互相干扰,重组程序代码使用了一个互斥信号量ipfmutex.在ipreass.h中

4.3 在链表中加入一个数据报片

/* ipreass.c – ipreass */

#include <conf.h>
#include <kernel.h>
#include <network.h>

int ipfadd(struct ipfq *, struct ep *);
struct ep *ipfjoin(struct ipfq *);

/*————————————————————————
* ipreass – reassemble an IP datagram, if necessary
*   returns packet, if complete; 0 otherwise
* IP数据报重组,如果报片到齐则重组该包,并返回完整的数据报,否则返回0
*————————————————————————
*/
struct ep *
ipreass(struct ep *pep)
{
   struct   ep   *pep2;
   struct   ip   *pip;
   int       firstfree;
   int       i;

pip = (struct ip *)pep->ep_data; /* 得到IP数据报的*/

wait(ipfmutex); /* 互斥*/

   if ((pip->ip_fragoff & (IP_FRAGOFF|IP_MF)) == 0) {   /*如果不是分片,返回当前帧*/
       signal(ipfmutex);
       return pep;
   }
   IpReasmReqds++;
   firstfree = -1;
   /* 以下情况为是分片 */
   for (i=0; i<IP_FQSIZE; ++i) {
       struct   ipfq   *piq = &ipfqt[i];

       if (piq->ipf_state == IPFF_FREE) {       /* 队列未使用,则继续*/
           if (firstfree == -1)
               firstfree = i;
           continue;
       }
       if (piq->ipf_id != pip->ip_id)           /* 队列ID不等于分片ID,则继续*/
           continue;
       if (piq->ipf_src != pip->ip_src)           /* 源地址不同,则继续*/
           continue;
       /* 找到匹配 */
       if (ipfadd(piq, pep) == 0) {           /* 把该分片加入匹配的队列*/
           signal(ipfmutex);
           return 0;
       }
       pep2 = ipfjoin(piq);               /* 检查是否可以重组数据报*/
       signal(ipfmutex);
       return pep2;

   }
   /* 没有匹配 */

   if (firstfree < 0) {                   /* 检查是否有空闲队列*/
       /* no room– drop */               /* 没有空闲则丢弃*/
       freebuf(pep);
       signal(ipfmutex);
       return 0;
   }
   ipfqt[firstfree].ipf_q = newq(IP_FQSIZE, QF_WAIT);   /* 分配一个空闲表项*/
   if (ipfqt[firstfree].ipf_q < 0) {
       freebuf(pep);
       signal(ipfmutex);
       return 0;
   }
   ipfqt[firstfree].ipf_src = pip->ip_src;           /* 填充*/
   ipfqt[firstfree].ipf_id = pip->ip_id;
   ipfqt[firstfree].ipf_ttl = IP_FTTL;
   ipfqt[firstfree].ipf_state = IPFF_VALID;
   ipfadd(&ipfqt[firstfree], pep);
   signal(ipfmutex);
   return 0;
}

int ipfmutex;
struct ipfq ipfqt[IP_FQSIZE];

4.4 溢出时的丢弃

/* ipfadd.c – ipfadd */

#include <conf.h>
#include <kernel.h>
#include <proc.h>
#include <network.h>

/*————————————————————————
* ipfadd – 增加一个分片到一个IP碎片队列
*————————————————————————
*/
Bool
ipfadd(struct ipfq *iq, struct ep *pep)
{
struct ip *pip;
int fragoff;

   if (iq->ipf_state != IPFF_VALID) {           /* 分片队列无效,则丢弃*/
       freebuf(pep);
       return FALSE;
   }
   pip = (struct ip *)pep->ep_data;               /* 得到IP数据报的*/
   fragoff = pip->ip_fragoff & IP_FRAGOFF;           /* 得到偏移量*/
   /* -fragoff用作关键值，越大越靠前 */
   if (enq(iq->ipf_q, pep, -fragoff) < 0) {           /* 举出丢弃并释放内存*/
       /* overflow– free all frags and drop */
       freebuf(pep);
       IpReasmFails++;
       while (pep = (struct ep *)deq(iq->ipf_q)) {   /* 从队列删除帧并释放帧*/
           freebuf(pep);
           IpReasmFails++;
       }
       freeq(iq->ipf_q);               /* 释放队列*/
       iq->ipf_state = IPFF_BOGUS;
       return FALSE;
   }
   iq->ipf_ttl = IP_FTTL;       /* 重置生存时间 */
   return TRUE;
}

4.5 测试一个完整的数据据报

/* ipfjoin.c – ipfjoin */

#include <conf.h>
#include <kernel.h>
#include <proc.h>
#include <network.h>

struct ep *ipfcons(struct ipfq *);

/*————————————————————————
* ipfjoin – join fragments, if all collected
*————————————————————————
*/
struct ep *
ipfjoin(struct ipfq *iq)
{
   struct   ep   *pep;
   struct   ip    *pip = 0;
   int       off, packoff;

   if (iq->ipf_state == IPFF_BOGUS)
       return 0;
   /* see if we have the whole datagram */
   /* 看我们是否有整个的数据包 */

   off = 0;
   while (pep=(struct ep *)seeq(iq->ipf_q)) {       /* 取出帧*/
       pip = (struct ip *)pep->ep_data;           /* 取出IP报*/
       packoff = (pip->ip_fragoff & IP_FRAGOFF)<<3;
       if (off < packoff) {               /* 偏移大于0*/
           while(seeq(iq->ipf_q))           /* 一个不满足,说没没全到*/
               /*empty*/;
           return 0;
       }
       off = packoff + pip->ip_len – IP_HLEN(pip);   /*计算总长度,不含头*/
   }
   /* 这里利用off来测试，首先ipfjoin查看当前数据报片的偏移量是否与off值相符。
       如果当前数据报片的偏移量超过了off的值，那么必定有尚未到达的数据报片，
       因此ipfjoin返回0。如果偏移量与off值一致，那么ipfjoin通过将off值加上当前
       数据报片长度减去首部计算出下一个数据报片的偏移量。*/
   if (off > MAXLRGBUF) {       /* 超过缓冲区则丢弃 – too big for us to handle */
       while (pep = (struct ep *)deq(iq->ipf_q))
           freebuf(pep);
       freeq(iq->ipf_q);
       iq->ipf_state = IPFF_FREE;
       return 0;
   }
   if (pip == 0 || (pip->ip_fragoff & IP_MF) == 0)       /* 没有IP报或没有更多的分片*/
       return ipfcons(iq);               /* 收集数据报片并重建完整的数据报 */

return 0;
}

4.6 将数据报片组装成完整的数据报

/* ipfcons.c – ipfcons */

#include <conf.h>
#include <kernel.h>
#include <network.h>

/*————————————————————————
* ipfcons – 从IP碎片队列构建一个分组
*————————————————————————
*/
struct ep *
ipfcons(struct ipfq *iq)
{
   struct   ep   *pep, *peptmp;
   struct   ip   *pip;
   int       off, seq;

   pep = (struct ep *)getbuf(Net.lrgpool);           /* 申请缓存空间*/
   if (pep == (struct ep *)SYSERR) {           /* 申请失败则丢弃该报*/
       while (peptmp = (struct ep *)deq(iq->ipf_q)) {
           IpReasmFails++;
           freebuf(peptmp);
       }
       freeq(iq->ipf_q);               /* 释放队列*/
       iq->ipf_state = IPFF_FREE;
       return 0;
   }
   /* 复制帧和IP报头 */

   peptmp = (struct ep *)deq(iq->ipf_q);           /* 取出一个分片*/
   pip = (struct ip *)peptmp->ep_data;           /* 得到IP报*/
   off = IP_HLEN(pip);                   /* 得到IP头长度*/
   seq = 0;
   memcpy(pep, peptmp, EP_HLEN+off);           /* 复制IP报头到pep*/

   /* 复制数据部分 */
   while (peptmp != 0) {
       int dlen, doff;

       pip = (struct ip *)peptmp->ep_data;       /* 取IP报*/
       doff = IP_HLEN(pip) + seq
           – ((pip->ip_fragoff&IP_FRAGOFF)<<3);
       dlen = pip->ip_len – doff;
       memcpy(pep->ep_data+off, peptmp->ep_data+doff, dlen);
       off += dlen;
       seq += dlen;
       freebuf(peptmp);
       peptmp = (struct ep *)deq(iq->ipf_q);
   }

   /* 修复分组的头 */
   pip = (struct ip *)pep->ep_data;               /* 取出IP报*/
   pip->ip_len = off;                   /* 修复长度*/
   pip->ip_fragoff = 0;                   /* 修复偏移量*/

   /* 释放资源 */
   freeq(iq->ipf_q);
   iq->ipf_state = IPFF_FREE;
   IpReasmOKs++;
   return pep;
}

5. 数据报片链表的维护管理

/* ipftimer.c – ipftimer */

#include <conf.h>
#include <kernel.h>
#include <network.h>

/*————————————————————————
* ipftimer – 更新生存周期并删除过期的碎片
*————————————————————————
*/
void
ipftimer(int gran)
{
   struct   ep   *pep;
   struct   ip   *pip;
   int       i;

   wait(ipfmutex);                       /* 申请互斥量 */
   for (i=0; i<IP_FQSIZE; ++i) {               /* 遍历队列*/
       struct ipfq *iq = &ipfqt[i];

       if (iq->ipf_state == IPFF_FREE)           /* 空闲则继续*/
           continue;
       iq->ipf_ttl -= gran;               /* 非空闲则ttl-1*/
       if (iq->ipf_ttl <= 0) {               /* 生存周期到达则*/
           if (iq->ipf_state == IPFF_BOGUS) {   /* 如果为丢弃状态,则继续*/
               /* resources already gone */
               iq->ipf_state = IPFF_FREE;
               continue;
           }
           if (pep = (struct ep *)deq(iq->ipf_q)) {   /* 取分片*/
               IpReasmFails++;
               pip = (struct ip *)pep->ep_data;   /* 取IP报*/
               icmp(ICT_TIMEX, ICC_FTIMEX,   /* 向源站报错*/
                   pip->ip_src, pep, 0);
           }
           while (pep = (struct ep *)deq(iq->ipf_q)) {   /* 释放资源*/
               IpReasmFails++;
               freebuf(pep);
           }
           freeq(iq->ipf_q);           /* 释放队列*/
           iq->ipf_state = IPFF_FREE;
       }
   }
   signal(ipfmutex);                   /* 互斥解锁*/
}

6. 初始化

/* ipfinit.c – ipfinit */

#include <conf.h>
#include <kernel.h>
#include <network.h>

/*————————————————————————
* ipfinit – initialize IP fragment queue data structures
*————————————————————————
*/
void
ipfinit(void)
{
int i;

   ipfmutex = screate(1);           /* 不多说了,简单,太~~~,这个看不明白就不用看了. 🙂 */
   for (i=0; i<IP_FQSIZE; ++i)
       ipfqt[i].ipf_state = IPFF_FREE;
}

老沙的博客

月度归档：2010年07月

IP 分片与重组

联通新情1+ ADSL一体机破解 PPPOE 自动拨号 HG527c 522

TP WR340G+ 541G+ 硬改刷DD WRT 成功固件下载