diff -ru ipvs-1.0.3/ipvs/ip_vs.h ipvs-1.0.3.proc.sys/ipvs/ip_vs.h --- ipvs-1.0.3/ipvs/ip_vs.h Thu May 30 22:00:20 2002 +++ ipvs-1.0.3.proc.sys/ipvs/ip_vs.h Mon Jul 1 12:57:53 2002 @@ -230,6 +230,123 @@ }; +/* + * Delta sequence info structure + * Each ip_vs_conn has 2 (output AND input seq. changes). + * Only used in the VS/NAT. + */ +struct ip_vs_seq { + __u32 init_seq; /* Add delta from this seq */ + __u32 delta; /* Delta in sequence numbers */ + __u32 previous_delta; /* Delta in sequence numbers + before last resized pkt */ +}; + + +/* + * IPVS sync connection entry + */ +struct ip_vs_sync_conn { + __u8 reserved; + + /* Protocol, addresses and port numbers */ + __u8 protocol; /* Which protocol (TCP/UDP) */ + __u16 cport; + __u16 vport; + __u16 dport; + __u32 caddr; /* client address */ + __u32 vaddr; /* virtual address */ + __u32 daddr; /* destination address */ + + /* Flags and state transition */ + __u16 flags; /* status flags */ + __u16 state; /* state info */ + + /* The sequence options start here */ +}; + +struct ip_vs_sync_conn_options { + struct ip_vs_seq in_seq; /* incoming seq. struct */ + struct ip_vs_seq out_seq; /* outgoing seq. struct */ +}; + +#define IP_VS_SYNC_CONN_TIMEOUT (3*60*HZ) +#define IP_VS_SYNC_SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) +#define IP_VS_SYNC_FULL_CONN_SIZE \ +(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options)) + + +/* + The master mulitcasts messages to the backup load balancers in the + following format. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Count Conns | Reserved | Size | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | IPVS Sync Connection (1) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | . | + | . | + | . | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | IPVS Sync Connection (n) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +*/ + +struct ip_vs_sync_mesg { + __u8 nr_conns; + __u8 reserved; + __u16 size; + + /* ip_vs_sync_conn entries start here */ +}; + + +/* Trailing sizeof(struct ip_vs_sync_conn_options) is to allow + * the full connection count to be used by allowing a bit of extra + * space, just in case the last connection is FULL_CONN_SIZE + * instead of SIMPLE_CONN_SIZE */ + +/* At the very least the message needs to hold one message */ +#define IP_VS_SYNC_MESG_MAX_SIZE_MIN \ + (IP_VS_SYNC_SIMPLE_CONN_SIZE + sizeof(struct ip_vs_sync_mesg) \ + + sizeof(struct ip_vs_sync_conn_options)) + +/* At most, 256 messages can be carried. This is because + * the nr_conns element in the ip_vs_sync_mesg structure + * is an unsigned 8bit integer, and thus has a valid range + * of 0 - 255. Beyond that a wraparound will occur. */ +#define IP_VS_SYNC_MESG_MAX_SIZE_MAX \ + (255 * IP_VS_SYNC_SIMPLE_CONN_SIZE + sizeof(struct ip_vs_sync_mesg) \ + + sizeof(struct ip_vs_sync_conn_options)) + +/* By default, send 50 connections per message. This fits niceley + * into a 1500 MTU packet */ +#define IP_VS_SYNC_MESG_MAX_SIZE_DEFAULT \ + (50 * IP_VS_SYNC_SIMPLE_CONN_SIZE + sizeof(struct ip_vs_sync_mesg) \ + + sizeof(struct ip_vs_sync_conn_options)) + +#define IP_VS_SYNC_MESG_MAX_SIZE \ + ((sysctl_ip_vs_sync_msg_max_size > IP_VS_SYNC_MESG_MAX_SIZE_MAX) ? \ + IP_VS_SYNC_MESG_MAX_SIZE_MAX: \ + ((sysctl_ip_vs_sync_msg_max_size < IP_VS_SYNC_MESG_MAX_SIZE_MIN) ? \ + IP_VS_SYNC_MESG_MAX_SIZE_MIN : sysctl_ip_vs_sync_msg_max_size)) + + +#define IP_VS_SYNC_FREQUENCY_DEFAULT 50 +#define IP_VS_SYNC_FREQUENCY \ + ((sysctl_ip_vs_sync_frequency < 1) ? 1 : sysctl_ip_vs_sync_frequency) + +#define IP_VS_SYNC_THRESHOLD_DEFAULT 3 +#define IP_VS_SYNC_THRESHOLD \ + ((sysctl_ip_vs_sync_threshold < 0) ? 0 : \ + (sysctl_ip_vs_sync_threshold >= IP_VS_SYNC_FREQUENCY) ? \ + IP_VS_SYNC_FREQUENCY - 1 : sysctl_ip_vs_sync_threshold) + #ifdef __KERNEL__ #include @@ -328,7 +445,9 @@ NET_IPV4_VS_CACHE_BYPASS=22, NET_IPV4_VS_EXPIRE_NODEST_CONN=23, NET_IPV4_VS_SYNC_THRESHOLD=24, - NET_IPV4_VS_NAT_ICMP_SEND=25, + NET_IPV4_VS_SYNC_FREQUENCY=25, + NET_IPV4_VS_SYNC_MSG_MAX_SIZE=26, + NET_IPV4_VS_NAT_ICMP_SEND=27, NET_IPV4_VS_LAST }; @@ -374,19 +493,6 @@ /* - * Delta sequence info structure - * Each ip_vs_conn has 2 (output AND input seq. changes). - * Only used in the VS/NAT. - */ -struct ip_vs_seq { - __u32 init_seq; /* Add delta from this seq */ - __u32 delta; /* Delta in sequence numbers */ - __u32 previous_delta; /* Delta in sequence numbers - before last resized pkt */ -}; - - -/* * IPVS statistics object */ struct ip_vs_stats @@ -518,7 +624,7 @@ struct list_head n_list; /* d-linked list head */ char *name; /* scheduler name */ atomic_t refcnt; /* reference counter */ - struct module *module; /* THIS_MODULE/NULL */ + struct module *module; /* THIS_MODULE/NULL */ /* scheduler initializing service */ int (*init_service)(struct ip_vs_service *svc); @@ -702,6 +808,8 @@ extern int sysctl_ip_vs_cache_bypass; extern int sysctl_ip_vs_expire_nodest_conn; extern int sysctl_ip_vs_sync_threshold; +extern int sysctl_ip_vs_sync_frequency; +extern int sysctl_ip_vs_sync_msg_max_size; extern int sysctl_ip_vs_nat_icmp_send; extern atomic_t ip_vs_dropentry; extern struct ip_vs_stats ip_vs_stats; diff -ru ipvs-1.0.3/ipvs/ip_vs_core.c ipvs-1.0.3.proc.sys/ipvs/ip_vs_core.c --- ipvs-1.0.3/ipvs/ip_vs_core.c Thu May 30 22:00:20 2002 +++ ipvs-1.0.3.proc.sys/ipvs/ip_vs_core.c Mon Jul 1 12:55:34 2002 @@ -1132,7 +1132,8 @@ if (ip_vs_sync_state == IP_VS_STATE_MASTER && (cp->protocol != IPPROTO_TCP || cp->state == IP_VS_S_ESTABLISHED) && - (atomic_read(&cp->in_pkts) % 50 == sysctl_ip_vs_sync_threshold)) + (atomic_read(&cp->in_pkts) % IP_VS_SYNC_FREQUENCY + == IP_VS_SYNC_THRESHOLD)) ip_vs_sync_conn(cp); ip_vs_conn_put(cp); diff -ru ipvs-1.0.3/ipvs/ip_vs_ctl.c ipvs-1.0.3.proc.sys/ipvs/ip_vs_ctl.c --- ipvs-1.0.3/ipvs/ip_vs_ctl.c Thu May 30 22:00:20 2002 +++ ipvs-1.0.3.proc.sys/ipvs/ip_vs_ctl.c Mon Jul 1 12:32:29 2002 @@ -78,7 +78,9 @@ static int sysctl_ip_vs_am_droprate = 10; int sysctl_ip_vs_cache_bypass = 0; int sysctl_ip_vs_expire_nodest_conn = 0; -int sysctl_ip_vs_sync_threshold = 3; +int sysctl_ip_vs_sync_threshold = IP_VS_SYNC_THRESHOLD_DEFAULT; +int sysctl_ip_vs_sync_frequency = IP_VS_SYNC_FREQUENCY_DEFAULT; +int sysctl_ip_vs_sync_msg_max_size = IP_VS_SYNC_MESG_MAX_SIZE_DEFAULT; int sysctl_ip_vs_nat_icmp_send = 0; #ifdef CONFIG_IP_VS_DEBUG @@ -1410,6 +1412,12 @@ &proc_dointvec}, {NET_IPV4_VS_SYNC_THRESHOLD, "sync_threshold", &sysctl_ip_vs_sync_threshold, sizeof(int), 0644, NULL, + &proc_dointvec}, + {NET_IPV4_VS_SYNC_FREQUENCY, "sync_frequency", + &sysctl_ip_vs_sync_frequency, sizeof(int), 0644, NULL, + &proc_dointvec}, + {NET_IPV4_VS_SYNC_MSG_MAX_SIZE, "sync_msg_max_size", + &sysctl_ip_vs_sync_msg_max_size, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_VS_NAT_ICMP_SEND, "nat_icmp_send", &sysctl_ip_vs_nat_icmp_send, sizeof(int), 0644, NULL, diff -ru ipvs-1.0.3/ipvs/ip_vs_sync.c ipvs-1.0.3.proc.sys/ipvs/ip_vs_sync.c --- ipvs-1.0.3/ipvs/ip_vs_sync.c Tue Dec 11 21:41:29 2001 +++ ipvs-1.0.3.proc.sys/ipvs/ip_vs_sync.c Mon Jul 1 12:28:26 2002 @@ -39,69 +39,6 @@ #define IP_VS_SYNC_PORT 8848 /* multicast port */ -/* - * IPVS sync connection entry - */ -struct ip_vs_sync_conn { - __u8 reserved; - - /* Protocol, addresses and port numbers */ - __u8 protocol; /* Which protocol (TCP/UDP) */ - __u16 cport; - __u16 vport; - __u16 dport; - __u32 caddr; /* client address */ - __u32 vaddr; /* virtual address */ - __u32 daddr; /* destination address */ - - /* Flags and state transition */ - __u16 flags; /* status flags */ - __u16 state; /* state info */ - - /* The sequence options start here */ -}; - -struct ip_vs_sync_conn_options { - struct ip_vs_seq in_seq; /* incoming seq. struct */ - struct ip_vs_seq out_seq; /* outgoing seq. struct */ -}; - -#define IP_VS_SYNC_CONN_TIMEOUT (3*60*HZ) -#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) -#define FULL_CONN_SIZE \ -(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options)) - - -/* - The master mulitcasts messages to the backup load balancers in the - following format. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Count Conns | Reserved | Size | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | IPVS Sync Connection (1) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | . | - | . | - | . | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | IPVS Sync Connection (n) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -*/ -#define SYNC_MESG_MAX_SIZE (24*50+4) -struct ip_vs_sync_mesg { - __u8 nr_conns; - __u8 reserved; - __u16 size; - - /* ip_vs_sync_conn entries start here */ -}; - - struct ip_vs_sync_buff { struct list_head list; unsigned long firstuse; @@ -153,14 +90,14 @@ if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) return NULL; - if (!(sb->mesg=kmalloc(SYNC_MESG_MAX_SIZE, GFP_ATOMIC))) { + if (!(sb->mesg=kmalloc(IP_VS_SYNC_MESG_MAX_SIZE, GFP_ATOMIC))) { kfree(sb); return NULL; } sb->mesg->nr_conns = 0; sb->mesg->size = 4; sb->head = (unsigned char *)sb->mesg + 4; - sb->end = (unsigned char *)sb->mesg + SYNC_MESG_MAX_SIZE; + sb->end = (unsigned char *)sb->mesg + IP_VS_SYNC_MESG_MAX_SIZE; sb->firstuse = jiffies; return sb; } @@ -191,8 +128,8 @@ } } - len = cp->flags&IP_VS_CONN_F_SEQ_MASK ? FULL_CONN_SIZE : - SIMPLE_CONN_SIZE; + len = cp->flags&IP_VS_CONN_F_SEQ_MASK ? IP_VS_SYNC_FULL_CONN_SIZE : + IP_VS_SYNC_SIMPLE_CONN_SIZE; m = curr_sb->mesg; s = (struct ip_vs_sync_conn *)curr_sb->head; @@ -217,7 +154,7 @@ curr_sb->head += len; /* check if there is a space for next one */ - if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) { + if (curr_sb->head+IP_VS_SYNC_FULL_CONN_SIZE > curr_sb->end) { sb_queue_tail(curr_sb); curr_sb = NULL; } @@ -270,9 +207,9 @@ if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { opt = (struct ip_vs_sync_conn_options *)&s[1]; memcpy(&cp->in_seq, opt, sizeof(*opt)); - p += FULL_CONN_SIZE; + p += IP_VS_SYNC_FULL_CONN_SIZE; } else - p += SIMPLE_CONN_SIZE; + p += IP_VS_SYNC_SIMPLE_CONN_SIZE; atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold); cp->timeout = IP_VS_SYNC_CONN_TIMEOUT; @@ -604,7 +541,7 @@ char *buf; int len; - if (!(buf=kmalloc(SYNC_MESG_MAX_SIZE, GFP_ATOMIC))) { + if (!(buf=kmalloc(IP_VS_SYNC_MESG_MAX_SIZE, GFP_ATOMIC))) { IP_VS_ERR("sync_backup_loop: kmalloc error\n"); return; } @@ -618,7 +555,7 @@ /* do you have data now? */ while (!skb_queue_empty(&(sock->sk->receive_queue))) { if ((len=ip_vs_receive(sock, buf, - SYNC_MESG_MAX_SIZE))<=0) { + IP_VS_SYNC_MESG_MAX_SIZE))<=0) { IP_VS_ERR("receiving message error\n"); break; } diff -ru ipvs-1.0.3/ipvs/ipvsadm/ipvsadm.8 ipvs-1.0.3.proc.sys/ipvs/ipvsadm/ipvsadm.8 --- ipvs-1.0.3/ipvs/ipvsadm/ipvsadm.8 Thu May 30 22:00:24 2002 +++ ipvs-1.0.3.proc.sys/ipvs/ipvsadm/ipvsadm.8 Mon Jul 1 14:29:42 2002 @@ -14,6 +14,8 @@ .\" Horms : Tidy up some of the description and the .\" grammar in the -f and sysctl sections .\" Wensong Zhang : -s option description taken from ipchains(8) +.\" Horms : Document synchronisation daemon's proc +.\" entries. .\" .\" This program is free software; you can redistribute it and/or modify .\" it under the terms of the GNU General Public License as published by @@ -154,9 +156,9 @@ the current timeout value of the corresponding entry is preserved. .TP .B --start-daemon \fIstate\fP -Start the connection synchronization daemon. The \fIstate\fP is to +Start the connection synchronisation daemon. The \fIstate\fP is to indicate that the daemon is started as \fImaster\fP or \fIbackup\fP. The -connection synchronization daemon is implemented inside the Linux +connection synchronisation daemon is implemented inside the Linux kernel. The master daemon running on the primary load balancer multicasts changes of connections periodically, and the backup daemon running the backup load balancers receives multicast message and @@ -164,9 +166,33 @@ balancer fails, a backup load balancer will takeover, and it has state of almost all connections, so that almost all established connections can continue to access the service. +.sp +There are 3 proc enties that effect the behaviour of the +synchronisation daemon. In the case of each of +these proc entries, values outside of the valid ranges given +will be rounded up or down as neccessary. +.sp +/proc/sys/net/ipv4/vs/sync_msg_max_size sets the maximum size of messages +sent by the synchronisation daemon in bytes. The default is 1228 and the +useful range is 52 through to 6172. +.sp +/proc/sys/net/ipv4/vs/sync_frequency sets syncrhonisation frequency \- +the how often a connection is +synchronised in terms of the number of packets received. The default is 50 +and the useful range is 1 through to 2147483648. +.sp +/proc/sys/net/ipv4/vs/sync_threshold sets the synchronisation threshold \- +the minmum number of packets a connection needs to receive before it will +be synchronised. The default is 3 and the useful range is from 0 up to the +synchronisation frequency. Once this threshold is passed the connection +will be syncronised each time the number of packets, modulus the +synchronisation frequency equals the threshold. For example, using the +default frequency of 50 and the default threshold of 3, +syncronisation will occur on once the 3rd packet is recieved, and every +50th packet from then onwards. .TP .B --stop-daemon -Stop the connection synchronization daemon. +Stop the connection synchronisation daemon. .TP \fB-h, --help\fR Display a description of the command syntax. @@ -457,6 +483,10 @@ .br .I /proc/sys/net/ipv4/vs/secure_tcp .br +.I /proc/sys/net/ipv4/vs/sync_msg_max_size +.br +.I /proc/sys/net/ipv4/vs/sync_threshold +.br .I /proc/sys/net/ipv4/vs/timeout_close .br .I /proc/sys/net/ipv4/vs/timeout_closewait @@ -488,5 +518,5 @@ Peter Kese man page - Mike Wangsmo Wensong Zhang - Horms + Horms .fi