1,mbuf就是存储要发送数据的memery buf,类似于skb_buf.不过结构比较简单。/* header at beginning of each mbuf: */这个结构用来描述mbuf跟具体的内容无关struct m_hdr { struct mbuf *mh_next; /* next buffer in chain */ struct mbuf *mh_nextpkt; /* next chain in queue/record */ int mh_len; /* amount of data in this mbuf */ caddr_t mh_data; /* location of data */ short mh_type; /* type of data in this mbuf */ short mh_flags; /* flags; see below */}; /* record/packet header in first mbuf of chain; valid if M_PKTHDR set */对mbuf中数据的描述,len和接收接口struct pkthdr { int len; /* total packet length */ struct ifnet *rcvif; /* rcv interface */}; /* description of external storage mapped into mbuf, valid if M_EXT set */struct m_ext { caddr_t ext_buf; /* start of buffer */ void (*ext_free)(); /* free routine if not the usual */ u_int ext_size; /* size of buffer, for ext_free */};这个就是mbuf的描述,设计的比较巧妙struct mbuf { struct m_hdr m_hdr; union { struct { struct pkthdr MH_pkthdr; /* M_PKTHDR set */ union { struct m_ext MH_ext; /* M_EXT set */ char MH_databuf[MHLEN]; } MH_dat; } MH; char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */ } M_dat;}; 2,mbuf中mhdr.md_flags: /* mbuf flags */一个mbuf的大小是128字节,猜测是一个cacheline的大小。如果数据比较多,就需要多个mbuf连起来或者用一个叫cluster的东西来存储数据。M_EXT就是这个标志#define M_EXT 0x0001 /* has associated external storage */表明分组的第一个mbuf,在数据区中有pkthdr#define M_PKTHDR 0x0002 /* start of record */表明记录的尾部,TCP是一个字节流,不设置这个标志#define M_EOR 0x0004 /* end of record */ /* mbuf pkthdr flags, also in m_flags */#define M_BCAST 0x0100 /* send/received as link-level broadcast */#define M_MCAST 0x0200 /* send/received as link-level multicast */ /* flags copied when copying m_pkthdr */这个具体干嘛用的不懂。。。#define M_COPYFLAGS (M_PKTHDR|M_EOR|M_BCAST|M_MCAST) 3,mbuf的种类 /* mbuf types */#define MT_FREE 0 /* should be on free list */#define MT_DATA 1 /* dynamic (data) allocation */ 数据就是这个类型#define MT_HEADER 2 /* packet header */#define MT_SOCKET 3 /* socket structure */#define MT_PCB 4 /* protocol control block */#define MT_RTABLE 5 /* routing tables */#define MT_HTABLE 6 /* IMP host tables */#define MT_ATABLE 7 /* address resolution tables */#define MT_SONAME 8 /* socket name */#define MT_SOOPTS 10 /* socket options */#define MT_FTABLE 11 /* fragment reassembly header */#define MT_RIGHTS 12 /* access rights */#define MT_IFADDR 13 /* interface address */#define MT_CONTROL 14 /* extra-data protocol message */#define MT_OOBDATA 15 /* expedited data */ 4,mbuf相关函数 4.1mbuf的分配 /* * mbuf allocation/deallocation macros: * * MGET(struct mbuf *m, int how, int type) * allocates an mbuf and initializes it to contain internal data. * * MGETHDR(struct mbuf *m, int how, int type) * allocates an mbuf and initializes it to contain a packet header * and internal data. */#define MGET(m, how, type) { mbtypes[type]把mbuf的type转换成MALLOC需要的type,如M_MBUF,M_SOCKET等 MALLOC((m), struct mbuf *, MSIZE, mbtypes[type], (how)); if (m) { (m)->m_type = (type); MBUFLOCK改变处理器优先级,防止被网络处理器中断,共享资源的保护 MBUFLOCK(mbstat.m_mtypes[type]++;) (m)->m_next = (struct mbuf *)NULL; (m)->m_nextpkt = (struct mbuf *)NULL; #define m_dat M_dat.M_databuf 为pkthdr和m_ext预留了空间 (m)->m_data = (m)->m_dat; (m)->m_flags = 0; } else 尝试重新分配,一个主要的问题,分配的内存从哪里来?详见后面 (m) = m_retry((how), (type)); } /* * When MGET failes, ask protocols to free space when short of memory, * then re-attempt to allocate an mbuf. */struct mbuf *m_retry(i, t) int i, t;{ register struct mbuf *m; 调用协议的注册函数释放内存 m_reclaim(); 把m_retrydefine成NULL这样就直接返回NULL了,但这里怎么保证这个MGET中m_retry返回的是NULL,而上一个返回的是这个函数???????#define在预编译期间就做替换了。 这个的关键就是MGET是一个宏,而不是函数。#define m_retry(i, t) (struct mbuf *)0 MGET(m, i, t);#undef m_retry return (m);} 这个函数循环调用协议的drain函数分配内存 m_reclaim(){ register struct domain *dp; register struct protosw *pr; 提升处理器的优先级不被网络处理中断 int s = splimp(); for (dp = domains; dp; dp = dp->dom_next) for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) if (pr->pr_drain) (*pr->pr_drain)(); 恢复处理器的优先级 splx(s); mbstat.m_drain++;} 分配一个分组头部的mbuf,对m_data和m_flags进行初始化#define MGETHDR(m, how, type) { MALLOC((m), struct mbuf *, MSIZE, mbtypes[type], (how)); if (m) { (m)->m_type = (type); MBUFLOCK(mbstat.m_mtypes[type]++;) (m)->m_next = (struct mbuf *)NULL; (m)->m_nextpkt = (struct mbuf *)NULL; (m)->m_data = (m)->m_pktdat; (m)->m_flags = M_PKTHDR; } else (m) = m_retryhdr((how), (type)); } 587 /*588 * Routine to copy from device local memory into mbufs.589 */590 struct mbuf *591 m_devget(buf, totlen, off0, ifp, copy)592 char *buf;593 int totlen, off0;594 struct ifnet *ifp;595 void (*copy)();这个函数是对MGET和MGETHDR的封装,一般由设备驱动程序调用,分配mbuf空间。1,如果数据长度《84,则在数据(IP数据包)的前面保留16个字节。为输出时添加14字节的MAC包头准备。(一个包含pak_hdr的mbuf最多放100字节的数据)2,如果数据》85 && 数据《100则不额外保留这16字节的数据3,如果数据》100,则分配一个cluster进行数据的存放。可见m_devget根据数据的长度,分配合适的mbuf 4.2mbuf到mbuf中data的转换定义了两个宏 56 * mtod(m,t) - convert mbuf pointer to data pointer of correct type 57 * dtom(x) - convert data pointer within mbuf to mbuf pointer (XXX) 61 #define mtod(m,t) ((t)((m)->m_data)) MSIZE == 128 这个基于mbuf是128字节对齐 62 #define dtom(x) ((struct mbuf *)((int)(x) & ~(MSIZE-1)))dotm对cluster的数据有问题,不能正常转换到mbuf,所以需要下面的函数 /* * Rearange an mbuf chain so that len bytes are contiguous * and in the data area of an mbuf (so that mtod and dtom * will work for a structure of size len). Returns the resulting * mbuf chain on success, frees it and returns null on failure. * If there is room, it will add up to max_protohdr-len extra bytes to the * contiguous region in an attempt to avoid being called next time. */ 这个函数从mbuf链表中取出len字节的数据放在第一个mbuf中,使dtom能正确运行struct mbuf *m_pullup(n, len) register struct mbuf *n; int len;{ register struct mbuf *m; register int count; int space; /* * If first mbuf has no cluster, and has room for len bytes * without shifting current data, pullup into it, * otherwise allocate a new mbuf to prepend to the chain. */ if ((n->m_flags & M_EXT) == 0 && n->m_data + len < &n->m_dat[MLEN] && n->m_next) { if (n->m_len >= len) return (n); m = n; n = n->m_next; len -= m->m_len; } else { if (len > MHLEN) goto bad; MGET(m, M_DONTWAIT, n->m_type); if (m == 0) goto bad; m->m_len = 0; if (n->m_flags & M_PKTHDR) { M_COPY_PKTHDR(m, n); n->m_flags &= ~M_PKTHDR; } } space = &m->m_dat[MLEN] - (m->m_data + m->m_len); do { count = min(min(max(len, max_protohdr), space), n->m_len); bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, (unsigned)count); len -= count; m->m_len += count; n->m_len -= count; space -= count; if (n->m_len) n->m_data += count; else n = m_free(n); } while (len > 0 && n); if (len > 0) { (void) m_free(m); goto bad; } m->m_next = n; return (m);bad: m_freem(n); MPFail++; return (0);}有些细节的东西还不明确,还需要进一步整理。TCP/IP详解2 学习笔记2---ifnet ifaddr http://www.linuxidc.com/Linux/2014-11/109289.htm本文永久更新链接地址:http://www.linuxidc.com/Linux/2014-11/109290.htm