Subversion Repositories ALCASAR

Rev

Rev 2057 | Blame | Last modification | View Log

ipt-netflow-2.2/0000755000000000000000000000000013213006644012322 5ustar  rootrootipt-netflow-2.2/Makefile.in0000644000000000000000000000671013213006644014373 0ustar  rootroot# Edit Makefile.in and run ./configure

KVERSION = @KVERSION@
KDIR = @KDIR@
KINSTDIR = $(shell dirname @KDIR@)
KOPTS = @KOPTS@
IPTABLES_CFLAGS = @IPTABLES_CFLAGS@
IPTABLES_MODULES = @IPTABLES_MODULES@
DEPMOD = /sbin/depmod -a
CARGS = @CARGS@
SNMPTGSO = /usr/lib/snmp/dlmod/snmp_NETFLOW.so
SNMPCONF = /etc/snmp/snmpd.conf
SNMPLINE = dlmod netflow $(SNMPTGSO)
CC = gcc

# https://www.kernel.org/doc/Documentation/kbuild/modules.txt
# https://www.kernel.org/doc/Documentation/kbuild/makefiles.txt
obj-m = ipt_NETFLOW.o
ccflags-y = @KOPTS@

all: ipt_NETFLOW.ko libipt_NETFLOW.so libip6t_NETFLOW.so @SNMPTARGET@

ipt_NETFLOW.ko: version.h ipt_NETFLOW.c ipt_NETFLOW.h compat.h Makefile
        @echo Compiling for kernel $(KVERSION)
        make -C $(KDIR) M=$(CURDIR) modules CONFIG_DEBUG_INFO=y
        @touch $@
sparse: | version.h ipt_NETFLOW.c ipt_NETFLOW.h compat.h Makefile
        @rm -f ipt_NETFLOW.ko ipt_NETFLOW.o
        @echo Compiling for kernel $(KVERSION)
        make -C $(KDIR) M=$(CURDIR) modules C=1
        @touch ipt_NETFLOW.ko
coverity:
        coverity-submit -v

minstall: | ipt_NETFLOW.ko
        @echo " *"
        make -C $(KDIR) M=$(CURDIR) modules_install INSTALL_MOD_PATH=$(DESTDIR)
        $(DEPMOD)
mclean:
        make -C $(KDIR) M=$(CURDIR) clean
lclean:
        -rm -f *.so *_sh.o
clean: mclean lclean
        -rm -f *.so *.o modules.order version.h

snmp_NETFLOW.so: snmp_NETFLOW.c
        $(CC) -fPIC -shared -o $@ $< -lnetsnmp

sinstall: | snmp_NETFLOW.so IPT-NETFLOW-MIB.my
        @echo " *"
        install -D IPT-NETFLOW-MIB.my $(DESTDIR)/usr/share/snmp/mibs/IPT-NETFLOW-MIB.my
        install -D snmp_NETFLOW.so $(DESTDIR)$(SNMPTGSO)
        @if ! egrep -qs "^ *$(SNMPLINE)" $(SNMPCONF); then \
        echo " *"; \
        echo " *  Add this line to $(SNMPCONF) to enable IPT-NETFLOW-MIB:"; \
        echo " *"; \
        echo " *     $(SNMPLINE)"; \
        echo " *"; \
        fi
        @if killall -0 snmpd >/dev/null 2>&1; then \
          echo " *  (snmpd needs restart for changes to take effect.)"; \
        else \
          echo " *  (snmpd is not started.)"; \
        fi

%_sh.o: libipt_NETFLOW.c
        $(CC) $(CFLAGS) -O2 -Wall -Wunused $(IPTABLES_CFLAGS) -fPIC -o $@ -c libipt_NETFLOW.c

%.so: %_sh.o
        $(CC) -shared -o $@ $<

version.h: ipt_NETFLOW.c ipt_NETFLOW.h compat.h Makefile
        @./version.sh --define > version.h

linstall: | libipt_NETFLOW.so libip6t_NETFLOW.so
        @echo " *"
        install -D libipt_NETFLOW.so $(DESTDIR)$(IPTABLES_MODULES)/libipt_NETFLOW.so
        install -D libip6t_NETFLOW.so $(DESTDIR)$(IPTABLES_MODULES)/libip6t_NETFLOW.so

dinstall:
        @echo " *"
        @./install-dkms.sh --install

install: minstall linstall @DKMSINSTALL@ @SNMPINSTALL@

uninstall:
        -rm -f $(DESTDIR)$(IPTABLES_MODULES)/libipt_NETFLOW.so
        -rm -f $(DESTDIR)$(IPTABLES_MODULES)/libip6t_NETFLOW.so
        -rm -f $(DESTDIR)/usr/share/snmp/mibs/IPT-NETFLOW-MIB.my
        -rm -f $(DESTDIR)$(SNMPTGSO)
        @if egrep -qs "^ *$(SNMPLINE)" $(SNMPCONF); then \
        echo " *"; \
        echo " *  Remove this line from $(SNMPCONF):"; \
        echo " *"; \
        echo " *     "`egrep "^ *$(SNMPLINE)" $(SNMPCONF)`; \
        echo " *"; \
        fi
        @if [ "@DKMSINSTALL@" = dinstall ]; then ./install-dkms.sh --uninstall; fi
        -rm -f $(DESTDIR)$(KINSTDIR)/extra/ipt_NETFLOW.ko

Makefile: Makefile.in configure
        ./configure --make ${CARGS}

load: all
        -insmod ipt_NETFLOW.ko active_timeout=5 protocol=9
        -iptables -I OUTPUT -j NETFLOW
        -iptables -I INPUT -j NETFLOW
        -ip6tables -I OUTPUT -j NETFLOW
        -ip6tables -I INPUT -j NETFLOW

unload:
        -iptables -D OUTPUT -j NETFLOW
        -iptables -D INPUT -j NETFLOW
        -ip6tables -D OUTPUT -j NETFLOW
        -ip6tables -D INPUT -j NETFLOW
        -rmmod ipt_NETFLOW.ko

reload: unload load

ChangeLog:
        gitlog-to-changelog > ChangeLog
.PHONY: ChangeLog

ipt-netflow-2.2/raw_promisc_debian_squeeze6.patch0000644000000000000000000000225513213006644021025 0ustar  rootroot
 Short manual and patch for Debian Squeeze
 suggested by Pavel Odintsov:

On Thu, Dec 27, 2012 at 07:46:30PM +0400, Pavel Odintsov wrote:
>
> ËÒÁÔËÉÊ ÍÁÎÕÁÌ ÄÌÑ ÐÁÔÞÉÎÇÁ Debian Squeeze ÑÄÒÁ ÐÁÔÞÅÍ promisc.
> 
> cd /usr/src
> apt-get install -y dpkg-dev
> apt-get build-dep  linux-image-2.6.32-5-amd64
> cd linux-2.6-2.6.32/
> apt-get source  linux-image-2.6.32-5-amd64
> 
> wget .... /root/raw_promisc_debian_squeeze6.patch
> patch -p1 < raw_promisc_debian_squeeze6.patch
> îÁËÌÁÄÙ×ÁÅÍ ÐÁÔÞÉ ÄÅÂÉÑÎÁ:
> debian/rules source
> 
> úÁÐÕÓËÁÅÍ ÓÂÏÒËÕ:
> debian/rules binary
> 

diff -rupN linux-2.6-2.6.32/net/ipv4/ip_input.c linux-2.6-2.6.32_promisc_raw//net/ipv4/ip_input.c
--- linux-2.6-2.6.32/net/ipv4/ip_input.c        2009-12-03 04:51:21.000000000 +0100
+++ linux-2.6-2.6.32_promisc_raw//net/ipv4/ip_input.c   2012-06-25 19:13:49.000000000 +0200
@@ -383,8 +383,8 @@ int ip_rcv(struct sk_buff *skb, struct n
        /* When the interface is in promisc. mode, drop all the crap
         * that it receives, do not try to analyse it.
         */
-       if (skb->pkt_type == PACKET_OTHERHOST)
-               goto drop;
+       //if (skb->pkt_type == PACKET_OTHERHOST)
+       //      goto drop;
 
 
        IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len);
ipt-netflow-2.2/compat.h0000644000000000000000000003557413213006644013774 0ustar  rootroot/* This code is derived from the Linux Kernel sources intended
 * to maintain compatibility with different Kernel versions.
 * Copyright of original source is of respective Linux Kernel authors.
 * License is GPLv2.
 */

#ifndef COMPAT_NETFLOW_H
#define COMPAT_NETFLOW_H


#ifndef NIPQUAD
# define NIPQUAD(addr) \
        ((unsigned char *)&addr)[0], \
        ((unsigned char *)&addr)[1], \
        ((unsigned char *)&addr)[2], \
        ((unsigned char *)&addr)[3]
#endif
#ifndef HIPQUAD
# if defined(__LITTLE_ENDIAN)
#  define HIPQUAD(addr) \
        ((unsigned char *)&addr)[3], \
        ((unsigned char *)&addr)[2], \
        ((unsigned char *)&addr)[1], \
        ((unsigned char *)&addr)[0]
# elif defined(__BIG_ENDIAN)
#  define HIPQUAD NIPQUAD
# else
#  error "Please fix asm/byteorder.h"
# endif /* __LITTLE_ENDIAN */
#endif

#ifndef IPT_CONTINUE
# define IPT_CONTINUE XT_CONTINUE
# define ipt_target xt_target
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
union nf_inet_addr {
        __be32          ip;
        __be32          ip6[4];
        struct in_addr  in;
        struct in6_addr in6;
};
#endif

#ifndef list_first_entry
#define list_first_entry(ptr, type, member) \
        list_entry((ptr)->next, type, member)
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
# define INIT_NET(x) x
#else
# define INIT_NET(x) init_net.x
#endif

#ifndef ETH_P_8021AD
# define ETH_P_8021AD   0x88A8  /* 802.1ad Service VLAN */
#endif

#ifndef ETH_P_QINQ1
# define ETH_P_QINQ1    0x9100  /* deprecated QinQ VLAN */
# define ETH_P_QINQ2    0x9200  /* deprecated QinQ VLAN */
# define ETH_P_QINQ3    0x9300  /* deprecated QinQ VLAN */
#endif

#ifndef IPPROTO_MH
# define IPPROTO_MH     135
#endif

#ifdef CONFIG_SYSCTL
# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32)
#  define BEFORE2632(x,y) x,y
# else /* since 2.6.32 */
#  define BEFORE2632(x,y)
# endif

# if LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)
#  define ctl_table struct ctl_table
# endif

# ifndef HAVE_GRSECURITY_H
#  define ctl_table_no_const ctl_table
# endif
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0)
# define compat_hlist_for_each_entry                    hlist_for_each_entry
# define compat_hlist_for_each_entry_safe               hlist_for_each_entry_safe
#else /* since 3.9.0 */
# define compat_hlist_for_each_entry(a,pos,c,d)         hlist_for_each_entry(a,c,d)
# define compat_hlist_for_each_entry_safe(a,pos,c,d,e)  hlist_for_each_entry_safe(a,c,d,e)
#endif

#ifndef WARN_ONCE
#define WARN_ONCE(x,fmt...) ({ if (x) printk(KERN_WARNING fmt); })
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
# define IPPROTO_UDPLITE 136
#endif

#ifndef time_is_before_jiffies
# define time_is_before_jiffies(a) time_after(jiffies, a)
#endif
#ifndef time_is_after_jiffies
# define time_is_after_jiffies(a) time_before(jiffies, a)
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0)
# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
#  define prandom_u32 get_random_int
# elif LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
#  define prandom_u32 random32
#endif
#define prandom_u32_max compat_prandom_u32_max
static inline u32 prandom_u32_max(u32 ep_ro)
{
        return (u32)(((u64) prandom_u32() * ep_ro) >> 32);
}
#endif

#ifndef min_not_zero
# define min_not_zero(x, y) ({                  \
        typeof(x) __x = (x);                    \
        typeof(y) __y = (y);                    \
        __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0)
static int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
{
        ASSERT_RTNL();

        if (!dev->ethtool_ops->get_settings)
                return -EOPNOTSUPP;

        memset(cmd, 0, sizeof(struct ethtool_cmd));
        cmd->cmd = ETHTOOL_GSET;
        return dev->ethtool_ops->get_settings(dev, cmd);
}
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27)
# define ethtool_cmd_speed(x) (x)->speed
#endif

#ifndef ARPHRD_PHONET
# define ARPHRD_PHONET          820
# define ARPHRD_PHONET_PIPE     821
#endif
#ifndef ARPHRD_IEEE802154
# define ARPHRD_IEEE802154      804
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
# define for_each_netdev_ns(net, dev) for (dev = dev_base; dev; dev = dev->next)
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
# define for_each_netdev_ns(net, d) for_each_netdev(d)
#else
# define for_each_netdev_ns(net, d) for_each_netdev(net, d)
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35)
# define CHECK_FAIL     0
# define CHECK_OK       1
#else
# define CHECK_FAIL     -EINVAL
# define CHECK_OK       0
#endif

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
# define use_module     ref_module
#endif

#ifndef NF_IP_LOCAL_IN /* 2.6.25 */
# define NF_IP_PRE_ROUTING      NF_INET_PRE_ROUTING
# define NF_IP_LOCAL_IN         NF_INET_LOCAL_IN
# define NF_IP_FORWARD          NF_INET_FORWARD
# define NF_IP_LOCAL_OUT        NF_INET_LOCAL_OUT
# define NF_IP_POST_ROUTING     NF_INET_POST_ROUTING
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
/* net/netfilter/x_tables.c */
static void xt_unregister_targets(struct xt_target *target, unsigned int n)
{
        unsigned int i;

        for (i = 0; i < n; i++)
                xt_unregister_target(&target[i]);
}
static int xt_register_targets(struct xt_target *target, unsigned int n)
{
        unsigned int i;

        int err = 0;
        for (i = 0; i < n; i++)
                if ((err = xt_register_target(&target[i])))
                        goto err;
        return err;
err:
        if (i > 0)
                xt_unregister_targets(target, i);
        return err;
}
#endif

#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0)
#define num_physpages   totalram_pages
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35)
# ifdef ktime_to_timeval
/* ktime_to_timeval is defined on 64bit and inline on 32bit cpu */
/* when it's defined it calls ns_to_timeval, which is not exported */
struct timeval portable_ns_to_timeval(const s64 nsec)
{
        struct timespec ts = ns_to_timespec(nsec);
        struct timeval tv;

        tv.tv_sec = ts.tv_sec;
        tv.tv_usec = (suseconds_t) ts.tv_nsec / 1000;

        return tv;
}
# define ns_to_timeval portable_ns_to_timeval
# endif

static inline s64 portable_ktime_to_ms(const ktime_t kt)
{
        struct timeval tv = ktime_to_timeval(kt);
        return (s64) tv.tv_sec * MSEC_PER_SEC + tv.tv_usec / USEC_PER_MSEC;
}
# define ktime_to_ms portable_ktime_to_ms
#endif /* before 2.6.35 */

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
static inline s64 portable_ktime_to_us(const ktime_t kt)
{
        struct timeval tv = ktime_to_timeval(kt);
        return (s64) tv.tv_sec * USEC_PER_SEC + tv.tv_usec;
}
#define ktime_to_us portable_ktime_to_us
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
static inline void put_unaligned_be16(u16 val, void *p)
{
        put_unaligned(cpu_to_be16(val), (__be16 *)p);
}
static inline void put_unaligned_be32(u32 val, void *p)
{
        put_unaligned(cpu_to_be32(val), (__be32 *)p);
}
static inline void put_unaligned_be64(u64 val, void *p)
{
        put_unaligned(cpu_to_be64(val), (__be64 *)p);
}
#endif

#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,24) && !defined(RHEL_MAJOR)
static void *__seq_open_private(struct file *f, struct seq_operations *ops,
    int psize)
{
        int rc;
        void *private;
        struct seq_file *seq;

        private = kzalloc(psize, GFP_KERNEL);
        if (private == NULL)
                goto out;

        rc = seq_open(f, ops);
        if (rc < 0)
                goto out_free;

        seq = f->private_data;
        seq->private = private;
        return private;

out_free:
        kfree(private);
out:
        return NULL;
}
#endif

/* disappeared in v3.19 */
#ifndef __get_cpu_var
#define __get_cpu_var(var)      (*this_cpu_ptr(&(var)))
#endif

#ifndef MPLS_HLEN
#define MPLS_HLEN 4
static inline int eth_p_mpls(__be16 eth_type)
{
        return eth_type == htons(ETH_P_MPLS_UC) ||
                eth_type == htons(ETH_P_MPLS_MC);
}
#endif
#ifndef MPLS_LS_S_MASK
struct mpls_label {
        __be32 entry;
};
#define MPLS_LS_S_MASK          0x00000100

#endif

/* sockaddr comparison functions is from fs/nfs/client.c */
static int sockaddr_match_ipaddr6(const struct sockaddr *sa1, const struct sockaddr *sa2)
{
        const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1;
        const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2;

        if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr))
                return 0;
#if 0
        else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL)
                return sin1->sin6_scope_id == sin2->sin6_scope_id;
#endif
        return 1;
}

static int sockaddr_match_ipaddr4(const struct sockaddr *sa1, const struct sockaddr *sa2)
{
        const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1;
        const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2;

        return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
}

static int sockaddr_cmp_ip6(const struct sockaddr *sa1, const struct sockaddr *sa2)
{
        const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1;
        const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2;

        return sockaddr_match_ipaddr6(sa1, sa2) &&
                (sin1->sin6_port == sin2->sin6_port);
}

static int sockaddr_cmp_ip4(const struct sockaddr *sa1, const struct sockaddr *sa2)
{
        const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1;
        const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2;

        return sockaddr_match_ipaddr4(sa1, sa2) &&
                (sin1->sin_port == sin2->sin_port);
}

static int sockaddr_cmp(const struct sockaddr_storage *sa1, const struct sockaddr_storage *sa2)
{
        const struct sockaddr *s1 = (const struct sockaddr *)sa1;
        const struct sockaddr *s2 = (const struct sockaddr *)sa2;

        if (sa1->ss_family != sa2->ss_family)
                return 0;

        switch (sa1->ss_family) {
        case AF_INET:
                return sockaddr_cmp_ip4(s1, s2);
        case AF_INET6:
                return sockaddr_cmp_ip6(s1, s2);
        }
        return 0;
}

#ifndef IN6PTON_XDIGIT
#define hex_to_bin compat_hex_to_bin
/* lib/hexdump.c */
int hex_to_bin(char ch)
{
        if ((ch >= '0') && (ch <= '9'))
                return ch - '0';
        ch = tolower(ch);
        if ((ch >= 'a') && (ch <= 'f'))
                return ch - 'a' + 10;
        return -1;
}

/* net/core/utils.c */
#define IN6PTON_XDIGIT          0x00010000
#define IN6PTON_DIGIT           0x00020000
#define IN6PTON_COLON_MASK      0x00700000
#define IN6PTON_COLON_1         0x00100000      /* single : requested */
#define IN6PTON_COLON_2         0x00200000      /* second : requested */
#define IN6PTON_COLON_1_2       0x00400000      /* :: requested */
#define IN6PTON_DOT             0x00800000      /* . */
#define IN6PTON_DELIM           0x10000000
#define IN6PTON_NULL            0x20000000      /* first/tail */
#define IN6PTON_UNKNOWN         0x40000000

static inline int xdigit2bin(char c, int delim)
{
        int val;

        if (c == delim || c == '\0')
                return IN6PTON_DELIM;
        if (c == ':')
                return IN6PTON_COLON_MASK;
        if (c == '.')
                return IN6PTON_DOT;

        val = hex_to_bin(c);
        if (val >= 0)
                return val | IN6PTON_XDIGIT | (val < 10 ? IN6PTON_DIGIT : 0);

        if (delim == -1)
                return IN6PTON_DELIM;
        return IN6PTON_UNKNOWN;
}

int in4_pton(const char *src, int srclen,
             u8 *dst,
             int delim, const char **end)
{
        const char *s;
        u8 *d;
        u8 dbuf[4];
        int ret = 0;
        int i;
        int w = 0;

        if (srclen < 0)
                srclen = strlen(src);
        s = src;
        d = dbuf;
        i = 0;
        while(1) {
                int c;
                c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
                if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK))) {
                        goto out;
                }
                if (c & (IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK)) {
                        if (w == 0)
                                goto out;
                        *d++ = w & 0xff;
                        w = 0;
                        i++;
                        if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) {
                                if (i != 4)
                                        goto out;
                                break;
                        }
                        goto cont;
                }
                w = (w * 10) + c;
                if ((w & 0xffff) > 255) {
                        goto out;
                }
cont:
                if (i >= 4)
                        goto out;
                s++;
                srclen--;
        }
        ret = 1;
        memcpy(dst, dbuf, sizeof(dbuf));
out:
        if (end)
                *end = s;
        return ret;
}

int in6_pton(const char *src, int srclen,
             u8 *dst,
             int delim, const char **end)
{
        const char *s, *tok = NULL;
        u8 *d, *dc = NULL;
        u8 dbuf[16];
        int ret = 0;
        int i;
        int state = IN6PTON_COLON_1_2 | IN6PTON_XDIGIT | IN6PTON_NULL;
        int w = 0;

        memset(dbuf, 0, sizeof(dbuf));

        s = src;
        d = dbuf;
        if (srclen < 0)
                srclen = strlen(src);

        while (1) {
                int c;

                c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
                if (!(c & state))
                        goto out;
                if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) {
                        /* process one 16-bit word */
                        if (!(state & IN6PTON_NULL)) {
                                *d++ = (w >> 8) & 0xff;
                                *d++ = w & 0xff;
                        }
                        w = 0;
                        if (c & IN6PTON_DELIM) {
                                /* We've processed last word */
                                break;
                        }
                        /*
                         * COLON_1 => XDIGIT
                         * COLON_2 => XDIGIT|DELIM
                         * COLON_1_2 => COLON_2
                         */
                        switch (state & IN6PTON_COLON_MASK) {
                        case IN6PTON_COLON_2:
                                dc = d;
                                state = IN6PTON_XDIGIT | IN6PTON_DELIM;
                                if (dc - dbuf >= sizeof(dbuf))
                                        state |= IN6PTON_NULL;
                                break;
                        case IN6PTON_COLON_1|IN6PTON_COLON_1_2:
                                state = IN6PTON_XDIGIT | IN6PTON_COLON_2;
                                break;
                        case IN6PTON_COLON_1:
                                state = IN6PTON_XDIGIT;
                                break;
                        case IN6PTON_COLON_1_2:
                                state = IN6PTON_COLON_2;
                                break;
                        default:
                                state = 0;
                        }
                        tok = s + 1;
                        goto cont;
                }

                if (c & IN6PTON_DOT) {
                        ret = in4_pton(tok ? tok : s, srclen + (int)(s - tok), d, delim, &s);
                        if (ret > 0) {
                                d += 4;
                                break;
                        }
                        goto out;
                }

                w = (w << 4) | (0xff & c);
                state = IN6PTON_COLON_1 | IN6PTON_DELIM;
                if (!(w & 0xf000)) {
                        state |= IN6PTON_XDIGIT;
                }
                if (!dc && d + 2 < dbuf + sizeof(dbuf)) {
                        state |= IN6PTON_COLON_1_2;
                        state &= ~IN6PTON_DELIM;
                }
                if (d + 2 >= dbuf + sizeof(dbuf)) {
                        state &= ~(IN6PTON_COLON_1|IN6PTON_COLON_1_2);
                }
cont:
                if ((dc && d + 4 < dbuf + sizeof(dbuf)) ||
                    d + 4 == dbuf + sizeof(dbuf)) {
                        state |= IN6PTON_DOT;
                }
                if (d >= dbuf + sizeof(dbuf)) {
                        state &= ~(IN6PTON_XDIGIT|IN6PTON_COLON_MASK);
                }
                s++;
                srclen--;
        }

        i = 15; d--;

        if (dc) {
                while(d >= dc)
                        dst[i--] = *d--;
                while(i >= dc - dbuf)
                        dst[i--] = 0;
                while(i >= 0)
                        dst[i--] = *d--;
        } else
                memcpy(dst, dbuf, sizeof(dbuf));

        ret = 1;
out:
        if (end)
                *end = s;
        return ret;
}
#endif /* IN6PTON_XDIGIT */

#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0)
# define sock_create_kern(f, t, p, s) sock_create_kern(&init_net, f, t, p, s)
#endif

#if !defined(vlan_tx_tag_get) && defined(skb_vlan_tag_get)
# define vlan_tx_tag_get skb_vlan_tag_get
# define vlan_tx_tag_present skb_vlan_tag_present
#endif

#ifndef SPEED_UNKNOWN
# define SPEED_UNKNOWN          -1
#endif

#if !defined __GNUC_PREREQ && defined __GNUC__ && defined __GNUC_MINOR__
# define __GNUC_PREREQ(maj, min) \
        ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
#else
# define __GNUC_PREREQ(maj, min) 0
#endif

/* ktime is not union anymore, since 2456e855354415bfaeb7badaa14e11b3e02c8466 */
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0)
# define first_tv64     first.tv64
# define last_tv64      last.tv64
#else
# define first_tv64     first
# define last_tv64      last
#endif

/* Offset changes made in 613dbd95723aee7abd16860745691b6c7bda20dc */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28) && LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0)
# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35)
#  define xt_action_param xt_target_param
# endif
static inline u_int8_t xt_family(const struct xt_action_param *par)
{
        return par->family;
}
static inline const struct net_device *xt_in(const struct xt_action_param *par)
{
        return par->in;
}
static inline const struct net_device *xt_out(const struct xt_action_param *par)
{
        return par->out;
}
static inline unsigned int xt_hooknum(const struct xt_action_param *par)
{
        return par->hooknum;
}
#endif

#ifndef SK_CAN_REUSE
# define SK_CAN_REUSE   1
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(4,13,0)
# define compat_refcount_read atomic_read
#else
# define compat_refcount_read refcount_read
#endif

#endif /* COMPAT_NETFLOW_H */
ipt-netflow-2.2/IPT-NETFLOW-MIB.my0000644000000000000000000004327713213006644015063 0ustar  rootroot-- IPT-NETFLOW-MIB.my

IPT-NETFLOW-MIB DEFINITIONS ::= BEGIN

IMPORTS
    MODULE-IDENTITY, OBJECT-TYPE, Counter64,
    Gauge32, Integer32, Counter32, enterprises
        FROM SNMPv2-SMI
    OBJECT-GROUP, MODULE-COMPLIANCE
        FROM SNMPv2-CONF
    CounterBasedGauge64
        FROM HCNUM-TC
    TEXTUAL-CONVENTION, DisplayString, DateAndTime
        FROM SNMPv2-TC;

iptNetflowMIB MODULE-IDENTITY
    LAST-UPDATED "201409120000Z"
    ORGANIZATION "ABC"
    CONTACT-INFO
        "Author's email: abc at telekom.ru

        Latest version should be obtained from
        https://raw.githubusercontent.com/aabc/ipt-netflow/master/IPT-NETFLOW-MIB.my"

    DESCRIPTION
        "The IPT-NETFLOW-MIB defines managed objects
        for ipt_NETFLOW kernel module, which is high
        performance NetFlow/IPFIX probe for Linux.

        Copyright (c) 2014 <abc at telekom.ru>.

        License: GPL"

    REVISION    "201409110000Z"
    DESCRIPTION "Initial revision."

    ::= { enterprises 37476 9000 10 1 }

-- Top Level --
iptNetflowObjects       OBJECT IDENTIFIER ::= { iptNetflowMIB 1 }
iptNetflowStatistics    OBJECT IDENTIFIER ::= { iptNetflowMIB 2 }
iptNetflowConformance   OBJECT IDENTIFIER ::= { iptNetflowMIB 3 }

-- Objects --

-- modinfo
iptNetflowModule        OBJECT IDENTIFIER ::= { iptNetflowObjects 1 }
-- sysctl net.netflow
iptNetflowSysctl        OBJECT IDENTIFIER ::= { iptNetflowObjects 2 }

-- Modinfo Objects --

name OBJECT-TYPE
    SYNTAX      DisplayString
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Module name."
    ::= { iptNetflowModule 1 }

version OBJECT-TYPE
    SYNTAX      DisplayString
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Software version of the module."
    ::= { iptNetflowModule 2 }

srcversion OBJECT-TYPE
    SYNTAX      DisplayString
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
         "Binary version of the module."
    ::= { iptNetflowModule 3 }

loadTime OBJECT-TYPE
    SYNTAX      DateAndTime
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
         "Module load date-time."
    ::= { iptNetflowModule 4 }

refcnt OBJECT-TYPE
    SYNTAX      Integer32
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
         "Module usage by other kernel objects."
    ::= { iptNetflowModule 5 }

-- RW Sysctl objects --

protocol OBJECT-TYPE
    SYNTAX      INTEGER {
        netflow5(5),
        netflow9(9),
        ipfix(10)
    }
    MAX-ACCESS  read-write
    STATUS      current
    DESCRIPTION
        "Protocol version (5, 9, 10=IPFIX)."
    ::= { iptNetflowSysctl 1 }

hashsize OBJECT-TYPE
    SYNTAX      Integer32
    UNITS       "buckets"
    MAX-ACCESS  read-write
    STATUS      current
    DESCRIPTION
        "Hash table size of flows cache."
    ::= { iptNetflowSysctl 2 }

maxflows OBJECT-TYPE
    SYNTAX      Integer32
    UNITS       "flows"
    MAX-ACCESS  read-write
    STATUS      current
    DESCRIPTION
        "Max flows limit. This limit is used for DoS protection."
    ::= { iptNetflowSysctl 3 }

active-timeout OBJECT-TYPE
    SYNTAX      Integer32
    UNITS       "minutes"
    MAX-ACCESS  read-write
    STATUS      current
    DESCRIPTION
        "Active flows timeout value."
    ::= { iptNetflowSysctl 4 }

inactive-timeout OBJECT-TYPE
    SYNTAX      Integer32
    UNITS       "minutes"
    MAX-ACCESS  read-write
    STATUS      current
    DESCRIPTION
        "Inactive flows timeout value."
    ::= { iptNetflowSysctl 5 }

sndbuf OBJECT-TYPE
    SYNTAX      Integer32
    UNITS       "bytes"
    MAX-ACCESS  read-write
    STATUS      current
    DESCRIPTION
        "Sockets SNDBUF size."
    ::= { iptNetflowSysctl 6 }

destination OBJECT-TYPE
    SYNTAX      DisplayString
    MAX-ACCESS  read-write
    STATUS      current
    DESCRIPTION
        "Export destination parameter."
    ::= { iptNetflowSysctl 7 }

aggregation OBJECT-TYPE
    SYNTAX      DisplayString
    MAX-ACCESS  read-write
    STATUS      current
    DESCRIPTION
        "Aggregation parameters."
    ::= { iptNetflowSysctl 8 }

sampler OBJECT-TYPE
    SYNTAX      DisplayString
    MAX-ACCESS  read-write
    STATUS      current
    DESCRIPTION
        "Sampler parameters: sampling mode:sampling interval.
        Where samplign modes: deterministic, random, hash."
    ::= { iptNetflowSysctl 9 }

natevents OBJECT-TYPE
    SYNTAX      INTEGER {
        disabled(0),
        enabled(1)
    }
    MAX-ACCESS  read-write
    STATUS      current
    DESCRIPTION
        "Natevents (NEL) controlling parameter."
    ::= { iptNetflowSysctl 10 }

promisc OBJECT-TYPE
    SYNTAX      INTEGER {
        disabled(0),
        enabled(1)
    }
    MAX-ACCESS  read-write
    STATUS      current
    DESCRIPTION
        "Promisc hack controlling parameter."
    ::= { iptNetflowSysctl 11 }

snmp-rules OBJECT-TYPE
    SYNTAX      DisplayString
    MAX-ACCESS  read-write
    STATUS      current
    DESCRIPTION
        "SNMP-index translation rules."
    ::= { iptNetflowSysctl 12 }

scan-min OBJECT-TYPE
    SYNTAX      Integer32
    MAX-ACCESS  read-write
    STATUS      current
    DESCRIPTION
        "scan-min parameter."
    ::= { iptNetflowSysctl 13 }

-- Statistics Objects --

iptNetflowTotals        OBJECT IDENTIFIER ::= { iptNetflowStatistics 1 }

inBitRate OBJECT-TYPE
    SYNTAX      CounterBasedGauge64
    UNITS       "bits/second"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Total incoming bits per second."
::= { iptNetflowTotals 1 }

inPacketRate OBJECT-TYPE
    SYNTAX      Gauge32
    UNITS       "packets/second"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Total incoming packets per second."
::= { iptNetflowTotals 2 }

inFlows OBJECT-TYPE
    SYNTAX      Counter64
    UNITS       "flows"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Total observed (metered) flows."
::= { iptNetflowTotals 3 }

inPackets OBJECT-TYPE
    SYNTAX      Counter64
    UNITS       "packets"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Total metered packets. Not couning dropped packets."
::= { iptNetflowTotals 4 }

inBytes OBJECT-TYPE
    SYNTAX      Counter64
    UNITS       "bytes"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Total metered bytes in inPackets."
::= { iptNetflowTotals 5 }

FixedDiv100 ::= TEXTUAL-CONVENTION
    DISPLAY-HINT "d-2"
    STATUS      current
    DESCRIPTION "Fixed point, two decimals."
    SYNTAX      Gauge32

hashMetric OBJECT-TYPE
    SYNTAX      FixedDiv100
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Measure of performance of hash table. When optimal should
        attract to 1.0, when non-optimal will be highly above of 1."
::= { iptNetflowTotals 6 }

hashMemory OBJECT-TYPE
    SYNTAX      Gauge32
    UNITS       "bytes"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "How much system memory is used by the hash table."
::= { iptNetflowTotals 7 }

hashFlows OBJECT-TYPE
    SYNTAX      Gauge32
    UNITS       "flows"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Flows currently residing in the hash table and not
        exported yet."
::= { iptNetflowTotals 8 }

hashPackets OBJECT-TYPE
    SYNTAX      Gauge32
    UNITS       "packets"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Packets in flows currently residing in the hash table."
::= { iptNetflowTotals 9 }

hashBytes OBJECT-TYPE
    SYNTAX      CounterBasedGauge64
    UNITS       "bytes"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Bytes in flows currently residing in the hash table."
::= { iptNetflowTotals 10 }

dropPackets OBJECT-TYPE
    SYNTAX      Counter64
    UNITS       "packets"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Total packets dropped by metering process."
::= { iptNetflowTotals 11 }

dropBytes OBJECT-TYPE
    SYNTAX      Counter64
    UNITS       "bytes"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Total bytes in packets dropped by metering process."
::= { iptNetflowTotals 12 }

outByteRate OBJECT-TYPE
    SYNTAX      Gauge32
    UNITS       "bytes/second"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Total exporter output bytes per second."
::= { iptNetflowTotals 13 }

outFlows OBJECT-TYPE
    SYNTAX      Counter64
    UNITS       "flows"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Total exported flow data records."
::= { iptNetflowTotals 14 }

outPackets OBJECT-TYPE
    SYNTAX      Counter64
    UNITS       "packets"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Total exported packets of netflow stream itself."
::= { iptNetflowTotals 15 }

outBytes OBJECT-TYPE
    SYNTAX      Counter64
    UNITS       "bytes"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Total exported bytes of netflow stream itself."
::= { iptNetflowTotals 16 }

lostFlows OBJECT-TYPE
    SYNTAX      Counter64
    UNITS       "flows"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Total of accounted flows that are lost by exporting process
         due to socket errors. This value will not include asynchronous
         errors (cberr), these will be counted in errTotal."
::= { iptNetflowTotals 17 }

lostPackets OBJECT-TYPE
    SYNTAX      Counter64
    UNITS       "packets"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Total metered packets lost by exporting process.
         See lostFlows for details."
::= { iptNetflowTotals 18 }

lostBytes OBJECT-TYPE
    SYNTAX      Counter64
    UNITS       "bytes"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Total bytes in packets lost by exporting process.
         See lostFlows for details."
::= { iptNetflowTotals 19 }

errTotal OBJECT-TYPE
    SYNTAX      Counter32
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Total exporting sockets errors (including cberr)."
::= { iptNetflowTotals 20 }

sndbufPeak OBJECT-TYPE
    SYNTAX      Counter32
    UNITS       "bytes"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Global maximum value of socket sndbuf. Sort of output
        queue length."
::= { iptNetflowTotals 21 }

-- Per CPU statistics --

iptNetflowCpuTable OBJECT-TYPE
    SYNTAX      SEQUENCE OF IptNetflowCpuEntry
    MAX-ACCESS  not-accessible
    STATUS      current
    DESCRIPTION
        "Per-CPU statistics."
    ::= { iptNetflowStatistics 2 }

iptNetflowCpuEntry OBJECT-TYPE
    SYNTAX      IptNetflowCpuEntry
    MAX-ACCESS  not-accessible
    STATUS      current
    DESCRIPTION
        "Defines an entry in the iptNetflowCpuTable."
    INDEX       { cpuIndex }
    ::= { iptNetflowCpuTable 1 }

IptNetflowCpuEntry ::=
    SEQUENCE {
      cpuIndex          INTEGER,
      cpuInPacketRate   Gauge32,
      cpuInFlows        Counter64,
      cpuInPackets      Counter64,
      cpuInBytes        Counter64,
      cpuHashMetric     FixedDiv100,
      cpuDropPackets    Counter64,
      cpuDropBytes      Counter64,
      cpuErrTrunc       Counter32,
      cpuErrFrag        Counter32,
      cpuErrAlloc       Counter32,
      cpuErrMaxflows    Counter32
    }

cpuIndex OBJECT-TYPE
    SYNTAX      Integer32 (0..4096)
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Index of this cpu."
    ::= { iptNetflowCpuEntry 1 }

cpuInPacketRate OBJECT-TYPE
    SYNTAX      Gauge32
    UNITS       "packets/second"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Incoming packets per second for this cpu."
    ::= { iptNetflowCpuEntry 2 }

cpuInFlows OBJECT-TYPE
    SYNTAX      Counter64
    UNITS       "flows"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Flows metered on this cpu."
    ::= { iptNetflowCpuEntry 3 }

cpuInPackets OBJECT-TYPE
    SYNTAX      Counter64
    UNITS       "packets"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Packets metered for cpuIndex."
    ::= { iptNetflowCpuEntry 4 }

cpuInBytes OBJECT-TYPE
    SYNTAX      Counter64
    UNITS       "bytes"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Bytes metered on this cpu."
    ::= { iptNetflowCpuEntry 5 }

cpuHashMetric OBJECT-TYPE
    SYNTAX      FixedDiv100
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Measure of performance of hash table on this cpu."
    ::= { iptNetflowCpuEntry 6 }

cpuDropPackets OBJECT-TYPE
    SYNTAX      Counter64
    UNITS       "packets"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Packets dropped by metering process on this cpu."
    ::= { iptNetflowCpuEntry 7 }

cpuDropBytes OBJECT-TYPE
    SYNTAX      Counter64
    UNITS       "bytes"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Bytes in cpuDropPackets for this cpu."
    ::= { iptNetflowCpuEntry 8 }

cpuErrTrunc OBJECT-TYPE
    SYNTAX      Counter32
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Truncated packets dropped for this cpu."
    ::= { iptNetflowCpuEntry 9 }

cpuErrFrag OBJECT-TYPE
    SYNTAX      Counter32
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Fragmented packets dropped for this cpu."
    ::= { iptNetflowCpuEntry 10 }

cpuErrAlloc OBJECT-TYPE
    SYNTAX      Counter32
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Packets dropped due to memory allocation errors."
    ::= { iptNetflowCpuEntry 11 }

cpuErrMaxflows OBJECT-TYPE
    SYNTAX      Counter32
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Packets dropped due to maxflows limit being reached."
    ::= { iptNetflowCpuEntry 12 }

-- Per Socket statistics --

iptNetflowSockTable OBJECT-TYPE
    SYNTAX      SEQUENCE OF IptNetflowSockEntry
    MAX-ACCESS  not-accessible
    STATUS      current
    DESCRIPTION
        "Per socket statistics."
    ::= { iptNetflowStatistics 3 }

iptNetflowSockEntry OBJECT-TYPE
    SYNTAX      IptNetflowSockEntry
    MAX-ACCESS  not-accessible
    STATUS      current
    DESCRIPTION
        "Defines an entry in the iptNetflowSockTable."
    INDEX       { sockIndex }
    ::= { iptNetflowSockTable 1 }

IptNetflowSockEntry ::=
    SEQUENCE {
      sockIndex         INTEGER,
      sockDestination   DisplayString,
      sockActive        INTEGER,
      sockErrConnect    Counter32,
      sockErrFull       Counter32,
      sockErrCberr      Counter32,
      sockErrOther      Counter32,
      sockSndbuf        Gauge32,
      sockSndbufFill    Gauge32,
      sockSndbufPeak    Gauge32
    }

sockIndex OBJECT-TYPE
    SYNTAX      Integer32 (0..4096)
    MAX-ACCESS  not-accessible
    STATUS      current
    DESCRIPTION
        "Exporting socket index."
    ::= { iptNetflowSockEntry 1 }

sockDestination OBJECT-TYPE
    SYNTAX      DisplayString
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Exporting connection destination of this socket."
    ::= { iptNetflowSockEntry 2 }

sockActive OBJECT-TYPE
    SYNTAX      INTEGER {
          inactive(0),
          active(1)
    }
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Connection state of this socket."
    ::= { iptNetflowSockEntry 3 }

sockErrConnect OBJECT-TYPE
    SYNTAX      Counter32
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Connections attempt count. High value usually mean
        that network is not set up properly, or module is loaded
        before network is up, in this case it is not dangerous
        and should be ignored."
    ::= { iptNetflowSockEntry 4 }

sockErrFull OBJECT-TYPE
    SYNTAX      Counter32
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Socket full errors on this socket. Usually mean sndbuf
        value is too small."
    ::= { iptNetflowSockEntry 5 }

sockErrCberr OBJECT-TYPE
    SYNTAX      Counter32
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Asynchronous callback errors on this socket. Usually mean
        that there is 'connection refused' errors on UDP socket
        reported via ICMP messages."
    ::= { iptNetflowSockEntry 6 }

sockErrOther OBJECT-TYPE
    SYNTAX      Counter32
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "All other possible errors on this socket."
    ::= { iptNetflowSockEntry 7 }

sockSndbuf OBJECT-TYPE
    SYNTAX      Gauge32
    UNITS       "bytes"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Sndbuf value for this socket. Higher value allows accommodate
        (exporting) traffic bursts."
    ::= { iptNetflowSockEntry 8 }

sockSndbufFill OBJECT-TYPE
    SYNTAX      Gauge32
    UNITS       "bytes"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Amount of data currently in socket buffers. When this value
        will reach size sndbuf, packet loss will occur."
    ::= { iptNetflowSockEntry 9 }

sockSndbufPeak OBJECT-TYPE
    SYNTAX      Gauge32
    UNITS       "bytes"
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
        "Historical peak amount of data in socket buffers. Useful to
        evaluate sndbuf size, because sockSndbufFill is transient."
    ::= { iptNetflowSockEntry 10 }

-- Conformance Information --

iptNetflowCompliances   OBJECT IDENTIFIER ::= { iptNetflowConformance 1 }
iptNetflowGroups        OBJECT IDENTIFIER ::= { iptNetflowConformance 2 }

iptNetflowCompliance MODULE-COMPLIANCE
    STATUS current
    DESCRIPTION "iptNetflowCompliance"
    MODULE
    MANDATORY-GROUPS {
        iptNetflowModuleGroup,
        iptNetflowSysctlGroup,
        iptNetflowTotalsGroup,
        iptNetflowCpuGroup,
        iptNetflowSockGroup
    }
    ::= { iptNetflowCompliances 1 }

iptNetflowModuleGroup OBJECT-GROUP
    OBJECTS {
        name,
        version,
        srcversion,
        loadTime,
        refcnt
    }
    STATUS      current
    DESCRIPTION "Modinfo."
    ::= { iptNetflowGroups 1 }

iptNetflowSysctlGroup OBJECT-GROUP
    OBJECTS {
        hashsize,
        maxflows,
        protocol,
        active-timeout,
        inactive-timeout,
        sndbuf,
        destination,
        aggregation,
        sampler,
        natevents,
        promisc,
        snmp-rules,
        scan-min
    }
    STATUS      current
    DESCRIPTION "Read-write objects accessed via sysctl"
    ::= { iptNetflowGroups 2 }

iptNetflowTotalsGroup OBJECT-GROUP
    OBJECTS {
        inBitRate,
        inPacketRate,
        inFlows,
        inPackets,
        inBytes,
        hashMetric,
        hashMemory,
        hashFlows,
        hashPackets,
        hashBytes,
        dropPackets,
        dropBytes,
        outByteRate,
        outFlows,
        outPackets,
        outBytes,
        lostFlows,
        lostPackets,
        lostBytes,
        errTotal,
        sndbufPeak
    }
    STATUS      current
    DESCRIPTION "Statistics totals."
    ::= { iptNetflowGroups 3 }

iptNetflowCpuGroup OBJECT-GROUP
    OBJECTS {
        cpuIndex,
        cpuInPacketRate,
        cpuInFlows,
        cpuInPackets,
        cpuInBytes,
        cpuHashMetric,
        cpuDropPackets,
        cpuDropBytes,
        cpuErrTrunc,
        cpuErrFrag,
        cpuErrAlloc,
        cpuErrMaxflows
    }
    STATUS      current
    DESCRIPTION "Per CPU statistics."
    ::= { iptNetflowGroups 4 }

iptNetflowSockGroup OBJECT-GROUP
    OBJECTS {
        sockDestination,
        sockActive,
        sockErrConnect,
        sockErrFull,
        sockErrCberr,
        sockErrOther,
        sockSndbuf,
        sockSndbufFill,
        sockSndbufPeak
    }
    STATUS      current
    DESCRIPTION "Per socket statistics."
    ::= { iptNetflowGroups 5 }

END
ipt-netflow-2.2/install-dkms.sh0000755000000000000000000000522413213006644015266 0ustar  rootroot#!/bin/bash
# This script cleanly re-install module into DKMS tree.

PATH=$PATH:/bin:/usr/bin:/usr/sbin:/sbin:/usr/local/sbin

if [ "$1" = --uninstall ]; then
  echo "Uninstalling from DKMS..."
elif [ "$1" = --install ]; then
  echo "Installing into DKMS..."
else
  exit 1
fi

if ! which dkms >/dev/null 2>&1; then
  echo "! You don't have DKMS accessible in system."
  exit 1
fi

if [ ! -e dkms.conf ]; then
  echo "! You don't have DKMS configured for this module."
  exit 1
fi

MVERSION=`./version.sh`

contains() { for e in "${@:2}"; do [[ "$e" = "$1" ]] && return 0; done; return 1; }

D=() # to be list of installed versions
OLDIFS="$IFS"
IFS=$'\n' A=(`dkms status | grep ^ipt-netflow`)
IFS="$OLDIFS"

for i in "${A[@]}"; do
  z=($i)
  v=${z[1]}
  v=${v%,}
  v=${v%:}
  if ! contains "$v" "${D[@]}"; then
    D+=($v)
  fi
done

if [ ${#D[@]} -eq 1 ]; then
  # single version is already installed.
  if [ $D = "$MVERSION" ]; then
    echo "! You have same version of module already installed into DKMS."
  else
    echo "! You have different version of module installed into DKMS."
  fi
  if [ ! -d /usr/src/ipt-netflow-$D ]; then
    echo "! Can not find DKMS dir for it, that's plain weird."
  elif [ -e /usr/src/ipt-netflow-$D/.automatic ]; then
    echo "! That version was automatically installed by this script,"
    echo "! thus, is safe to remove. No worries."
  else
    echo "! That version was manually installed by you."
  fi

  nodepmod=
  if grep -qs no-depmod `which dkms`; then
    nodepmod=--no-depmod
  fi
  echo "! Removing from dkms..."
  dkms $nodepmod remove ipt-netflow/$D --all

  if [ -d "/usr/src/ipt-netflow-$D" ]; then
    echo "! Removing source tree from /usr/src/ipt-netflow-$D"
    rm -rf "/usr/src/ipt-netflow-$D"
  fi

elif [ ${#D[@]} -gt 1 ]; then
  # multiple versions are installed.
  echo "! You have multiple versions of module already installed in DKMS."
  echo "! Please remove them manually to avoid conflict."
  echo "! 'dkms status' output:"
  dkms status
  echo "! Suggested commands to remove them:"
  for i in ${D[@]}; do
    echo "!   root# dkms remove ipt-netflow/$i --all"
  done
  exit 1
fi

if [ "$1" = --uninstall ]; then
  exit 0
fi

if [ "$PWD" = "/usr/src/ipt-netflow-$MVERSION" ]; then
  echo "! You are already in DKMS dir."
  dkms add -m ipt-netflow -v $MVERSION
  exit $?
fi

echo "! Installing $MVERSION into DKMS..."
rm -rf /usr/src/ipt-netflow-$MVERSION

mkdir -p /usr/src/ipt-netflow-$MVERSION
cp -p *.[ch] Make* READ* conf* irq* *.sh *.conf /usr/src/ipt-netflow-$MVERSION/
if [ -d .git ]; then
  cp -pr .git /usr/src/ipt-netflow-$MVERSION/
fi
touch /usr/src/ipt-netflow-$MVERSION/.automatic

dkms add -m ipt-netflow -v $MVERSION
exit $?

ipt-netflow-2.2/ipt_NETFLOW.c0000644000000000000000000046327313213006644014477 0ustar  rootroot/*
 * This is NetFlow exporting module (NETFLOW target) for linux
 * (c) 2008-2016 <abc@telekom.ru>
 *
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/seq_file.h>
#include <linux/random.h>
#include <linux/in6.h>
#include <linux/inet.h>
#include <linux/kernel.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/icmp.h>
#include <linux/igmp.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/hash.h>
#include <linux/delay.h>
#include <linux/spinlock_types.h>
#include <linux/ktime.h>
#include <linux/if_arp.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/tcp.h>
#include <net/route.h>
#include <net/ip6_fib.h>
#include <net/addrconf.h>
#include <net/dst.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#ifndef ENABLE_NAT
# undef CONFIG_NF_NAT_NEEDED
#endif
#if defined(ENABLE_VLAN) || defined(ENABLE_PROMISC)
# include <linux/if_vlan.h>
#endif
#ifdef ENABLE_MAC
# include <linux/if_ether.h>
# include <linux/etherdevice.h>
#endif
#if defined(CONFIG_NF_NAT_NEEDED)
# include <linux/notifier.h>
# include <net/netfilter/nf_conntrack.h>
# include <net/netfilter/nf_conntrack_core.h>
#endif
#include <linux/version.h>
#include <asm/unaligned.h>
#ifdef HAVE_LLIST
        /* llist.h is officially defined since linux 3.1,
         * but centos6 have it backported on its 2.6.32.el6 */
# include <linux/llist.h>
#endif
#include "compat.h"
#include "ipt_NETFLOW.h"
#include "murmur3.h"
#ifdef CONFIG_BRIDGE_NETFILTER
# include <linux/netfilter_bridge.h>
#endif
#ifdef CONFIG_SYSCTL
# include <linux/sysctl.h>
#endif
#ifndef CONFIG_NF_CONNTRACK_EVENTS
/* No conntrack events in the kernel imply no natevents. */
# undef CONFIG_NF_NAT_NEEDED
#endif

#define IPT_NETFLOW_VERSION "2.2"   /* Note that if you are using git, you
                                       will see version in other format. */
#include "version.h"
#ifdef GITVERSION
#undef IPT_NETFLOW_VERSION
#define IPT_NETFLOW_VERSION GITVERSION
#endif

MODULE_LICENSE("GPL");
MODULE_AUTHOR("<abc@telekom.ru>");
MODULE_DESCRIPTION("iptables NETFLOW target module");
MODULE_VERSION(IPT_NETFLOW_VERSION);
MODULE_ALIAS("ip6t_NETFLOW");

static char version_string[128];
static int  version_string_size;
static struct duration start_ts; /* ts of module start (ktime) */

#define DST_SIZE 256
static char destination_buf[DST_SIZE] = "127.0.0.1:2055";
static char *destination = destination_buf;
module_param(destination, charp, 0444);
MODULE_PARM_DESC(destination, "export destination ipaddress:port");

#ifdef ENABLE_SAMPLER
static char sampler_buf[128] = "";
static char *sampler = sampler_buf;
module_param(sampler, charp, 0444);
MODULE_PARM_DESC(sampler, "flow sampler parameters");
static atomic_t flow_count = ATOMIC_INIT(0); /* flow counter for deterministic sampler */
static atomic64_t flows_observed = ATOMIC_INIT(0);
static atomic64_t flows_selected = ATOMIC_INIT(0);
#define SAMPLER_INFO_INTERVAL (5*60)
static unsigned long ts_sampler_last = 0; /* template send time (jiffies) */
static struct duration sampling_ts; /* ts of sampling start (ktime) */
#define SAMPLER_SHIFT       14
#define SAMPLER_INTERVAL_M  ((1 << SAMPLER_SHIFT) - 1)
enum {
        SAMPLER_DETERMINISTIC = 1,
        SAMPLER_RANDOM        = 2,
        SAMPLER_HASH          = 3
};
struct sampling {
        union {
                u32             v32;
                struct {
                        u8      mode;
                        u16     interval;
                };
        };
} samp;
#endif

static int inactive_timeout = 15;
module_param(inactive_timeout, int, 0644);
MODULE_PARM_DESC(inactive_timeout, "inactive flows timeout in seconds");

static int active_timeout = 30 * 60;
module_param(active_timeout, int, 0644);
MODULE_PARM_DESC(active_timeout, "active flows timeout in seconds");

static int exportcpu = -1;
module_param(exportcpu, int, 0644);
MODULE_PARM_DESC(exportcpu, "lock exporter to this cpu");

#ifdef ENABLE_PROMISC
static int promisc = 0;
module_param(promisc, int, 0444);
MODULE_PARM_DESC(promisc, "enable promisc hack (0=default, 1)");
static DEFINE_MUTEX(promisc_lock);
#endif

static int debug = 0;
module_param(debug, int, 0644);
MODULE_PARM_DESC(debug, "debug verbosity level");

static int sndbuf;
module_param(sndbuf, int, 0444);
MODULE_PARM_DESC(sndbuf, "udp socket SNDBUF size");

static int protocol = 5;
module_param(protocol, int, 0444);
MODULE_PARM_DESC(protocol, "netflow protocol version (5, 9, 10=IPFIX)");

static unsigned int refresh_rate = 20;
module_param(refresh_rate, uint, 0644);
MODULE_PARM_DESC(refresh_rate, "NetFlow v9/IPFIX refresh rate (packets)");

static unsigned int timeout_rate = 30;
module_param(timeout_rate, uint, 0644);
MODULE_PARM_DESC(timeout_rate, "NetFlow v9/IPFIX timeout rate (minutes)");

static int one = 1;
static unsigned int scan_min = 1;
static unsigned int scan_max = HZ / 10;
module_param(scan_min, uint, 0644);
MODULE_PARM_DESC(scan_min, "Minimal interval between export scans (jiffies)");

#ifdef SNMP_RULES
static char snmp_rules_buf[DST_SIZE] = "";
static char *snmp_rules = snmp_rules_buf;
module_param(snmp_rules, charp, 0444);
MODULE_PARM_DESC(snmp_rules, "SNMP-index conversion rules");
static unsigned char *snmp_ruleset;
static DEFINE_SPINLOCK(snmp_lock);
#endif

#ifdef CONFIG_NF_NAT_NEEDED
static int natevents = 0;
module_param(natevents, int, 0444);
MODULE_PARM_DESC(natevents, "enable NAT Events");
#endif

static int hashsize;
module_param(hashsize, int, 0444);
MODULE_PARM_DESC(hashsize, "hash table size");

static int maxflows = 2000000;
module_param(maxflows, int, 0644);
MODULE_PARM_DESC(maxflows, "maximum number of flows");
static int peakflows = 0;
static unsigned long peakflows_at; /* jfffies */

static int engine_id = 0;
module_param(engine_id, int, 0644);
MODULE_PARM_DESC(engine_id, "Observation Domain ID");

#ifdef ENABLE_AGGR
#define AGGR_SIZE 1024
static char aggregation_buf[AGGR_SIZE] = "";
static char *aggregation = aggregation_buf;
module_param(aggregation, charp, 0400);
MODULE_PARM_DESC(aggregation, "aggregation ruleset");
static LIST_HEAD(aggr_n_list);
static LIST_HEAD(aggr_p_list);
static DEFINE_RWLOCK(aggr_lock);
static void aggregation_remove(struct list_head *list);
static int add_aggregation(char *ptr);
#endif

static DEFINE_PER_CPU(struct ipt_netflow_stat, ipt_netflow_stat);
static LIST_HEAD(usock_list);
static DEFINE_MUTEX(sock_lock);

#define LOCK_COUNT (1<<8)
#define LOCK_COUNT_MASK (LOCK_COUNT-1)
struct stripe_entry {
        struct list_head list; /* struct ipt_netflow, list for export */
        spinlock_t lock; /* this locks both: hash table stripe & list above */
};
static struct stripe_entry htable_stripes[LOCK_COUNT];
static DEFINE_RWLOCK(htable_rwlock); /* global rwlock to protect htable[] resize */
static struct hlist_head *htable __read_mostly; /* hash table memory */
static unsigned int htable_size __read_mostly = 0; /* buckets */
/* How it's organized:
 *  htable_rwlock locks access to htable[hash], where
 *  htable[htable_size] is big/resizable hash table, which is striped into
 *  htable_stripes[LOCK_COUNT] smaller/static hash table, which contains
 *  .list - list of flows ordered by exportability (usually it's access time)
 *  .lock - lock to both: that .list and to htable[hash], where
 *  hash to the htable[] is hash_netflow(&tuple) % htable_size
 *  hash to the htable_stripes[] is hash & LOCK_COUNT_MASK
 */
#ifdef HAVE_LLIST
static LLIST_HEAD(export_llist); /* flows to purge */
#endif
#ifdef CONFIG_NF_NAT_NEEDED
static LIST_HEAD(nat_list); /* nat events */
static DEFINE_SPINLOCK(nat_lock);
static unsigned long nat_events_start = 0;
static unsigned long nat_events_stop = 0;
#endif
static struct kmem_cache *ipt_netflow_cachep __read_mostly; /* ipt_netflow memory */
static atomic_t ipt_netflow_count = ATOMIC_INIT(0);

static long long pdu_packets = 0, pdu_traf = 0; /* how much accounted traffic in pdu */
static unsigned int pdu_count = 0;
static unsigned int pdu_seq = 0;
static unsigned int pdu_data_records = 0; /* Data records */
static unsigned int pdu_flow_records = 0; /* Data records with flows (for stat only) */
static unsigned int pdu_tpl_records = 0;
static unsigned long pdu_ts_mod; /* ts(jiffies) of last flow */
static unsigned int pdu_needs_export = 0;
static union {
        __be16 version;
        struct netflow5_pdu v5;
        struct netflow9_pdu v9;
        struct ipfix_pdu ipfix;
} pdu;
static __u8 *pdu_data_used;
static __u8 *pdu_high_wm; /* high watermark */
static struct flowset_data *pdu_flowset = NULL; /* current data flowset */

static unsigned long wk_start; /* last start of worker (jiffies) */
static unsigned long wk_busy;  /* last work busy time (jiffies) */
static unsigned int wk_count;  /* how much is scanned */
static unsigned int wk_cpu;
static unsigned int wk_trylock;
static unsigned int wk_llist;
static void (*netflow_export_flow)(struct ipt_netflow *nf);
static void (*netflow_export_pdu)(void); /* called on timeout */
static void netflow_switch_version(int ver);

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
static void netflow_work_fn(void *work);
static DECLARE_WORK(netflow_work, netflow_work_fn, NULL);
#else
static void netflow_work_fn(struct work_struct *work);
static DECLARE_DELAYED_WORK(netflow_work, netflow_work_fn);
#endif
static struct timer_list rate_timer;

#define TCP_SYN_ACK 0x12
#define TCP_FIN_RST 0x05

static long long sec_prate = 0, sec_brate = 0;
static long long min_prate = 0, min_brate = 0;
static long long min5_prate = 0, min5_brate = 0;
#define METRIC_DFL 100
static int metric = METRIC_DFL,
           min15_metric = METRIC_DFL,
           min5_metric = METRIC_DFL,
           min_metric = METRIC_DFL; /* hash metrics */

static int set_hashsize(int new_size);
static void destination_removeall(void);
static int add_destinations(const char *ptr);
static int netflow_scan_and_export(int flush);
enum {
        DONT_FLUSH, AND_FLUSH
};
static int template_ids = FLOWSET_DATA_FIRST;
static int tpl_count = 0; /* how much active templates */
#define STAT_INTERVAL    (1*60)
#define SYSINFO_INTERVAL (5*60)
static unsigned long ts_stat_last = 0; /* (jiffies) */
static unsigned long ts_sysinf_last = 0; /* (jiffies) */
static unsigned long ts_ifnames_last = 0; /* (jiffies) */

static inline __be32 bits2mask(int bits) {
        return (bits? 0xffffffff << (32 - bits) : 0);
}

static inline int mask2bits(__be32 mask) {
        int n;

        for (n = 0; mask; n++)
                mask = (mask << 1) & 0xffffffff;
        return n;
}

/* under that lock worker is always stopped and not rescheduled,
 * and we can call worker sub-functions manually */
static DEFINE_MUTEX(worker_lock);

static int worker_delay = HZ / 10;
static inline void _schedule_scan_worker(const int pdus)
{
        int cpu = exportcpu;

        /* rudimentary congestion avoidance */
        if (pdus > 0)
                worker_delay /= pdus;
        else
                worker_delay *= 2;

        if (worker_delay < scan_min)
                worker_delay = scan_min;
        else if (worker_delay > scan_max)
                worker_delay = scan_max;

        if (cpu >= 0) {
                if (cpu < NR_CPUS &&
                    cpu_online(cpu)) {
                        schedule_delayed_work_on(cpu, &netflow_work, worker_delay);
                        return;
                }
                printk(KERN_WARNING "ipt_NETFLOW: can't schedule exporter on cpu %d. Disabling cpu lock.\n",
                    cpu);
                exportcpu = -1;
        }
        schedule_delayed_work(&netflow_work, worker_delay);
}

/* This is only called soon after pause_scan_worker. */
static inline void cont_scan_worker(void)
{
        _schedule_scan_worker(0);
        mutex_unlock(&worker_lock);
}

static inline void _unschedule_scan_worker(void)
{
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
        cancel_rearming_delayed_work(&netflow_work);
#else
        cancel_delayed_work_sync(&netflow_work);
#endif
}

/* This is only used for quick pause (in procctl). */
static inline void pause_scan_worker(void)
{
        mutex_lock(&worker_lock);
        _unschedule_scan_worker();
}

#ifdef ENABLE_SAMPLER
static inline unsigned char get_sampler_mode(void)
{
        return samp.mode;
}
static inline unsigned short get_sampler_interval(void)
{
        return samp.interval;
}
static inline const char *sampler_mode_string(void)
{
        const unsigned char mode = get_sampler_mode();
        return mode == SAMPLER_DETERMINISTIC? "deterministic" :
                mode == SAMPLER_RANDOM? "random" : "hash";
}
/* map SAMPLER_HASH into SAMPLER_RANDOM */
static unsigned char get_sampler_mode_nf(void)
{
        const unsigned char mode = get_sampler_mode();
        return (mode == SAMPLER_HASH)? SAMPLER_RANDOM : mode;
}
static inline unsigned short sampler_nf_v5(void)
{
        return (get_sampler_mode_nf() << SAMPLER_SHIFT) | get_sampler_interval();
}
#endif

/* return value is different from usual snprintf */
static char *snprintf_sockaddr(char *buf, size_t len, const struct sockaddr_storage *ss)
{
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32)
        if (ss->ss_family == AF_INET) {
                const struct sockaddr_in *sin = (struct sockaddr_in *)ss;

                snprintf(buf, len, "%u.%u.%u.%u:%u",
                    NIPQUAD(sin->sin_addr.s_addr),
                    ntohs(sin->sin_port));
        } else if (ss->ss_family == AF_INET6) {
                const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss;

                snprintf(buf, len, "[%x:%x:%x:%x:%x:%x:%x:%x]:%u",
                    ntohs(sin6->sin6_addr.s6_addr16[0]),
                    ntohs(sin6->sin6_addr.s6_addr16[1]),
                    ntohs(sin6->sin6_addr.s6_addr16[2]),
                    ntohs(sin6->sin6_addr.s6_addr16[3]),
                    ntohs(sin6->sin6_addr.s6_addr16[4]),
                    ntohs(sin6->sin6_addr.s6_addr16[5]),
                    ntohs(sin6->sin6_addr.s6_addr16[6]),
                    ntohs(sin6->sin6_addr.s6_addr16[7]),
                    ntohs(sin6->sin6_port));
        } else
                snprintf(buf, len, "(invalid address)");
#elif LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0)
        if (ss->ss_family == AF_INET)
                snprintf(buf, len, "%pI4:%u",
                    &((const struct sockaddr_in *)ss)->sin_addr,
                    ntohs(((const struct sockaddr_in *)ss)->sin_port));
        else if (ss->ss_family == AF_INET6)
                snprintf(buf, len, "[%pI6c]:%u",
                    &((const struct sockaddr_in6 *)ss)->sin6_addr,
                    ntohs(((const struct sockaddr_in6 *)ss)->sin6_port));
        else
                snprintf(buf, len, "(invalid address)");
#else
        snprintf(buf, len, "%pISpc", ss);
#endif
        return buf;
}

static char *print_sockaddr(const struct sockaddr_storage *ss)
{
        static char buf[64];

        return snprintf_sockaddr(buf, sizeof(buf), ss);
}

static int is_zero_addr(const struct sockaddr_storage *ss)
{
        if (ss->ss_family == AF_INET)
                return ((const struct sockaddr_in *)ss)->sin_addr.s_addr == 0;
        else if (ss->ss_family == AF_INET6)
                return  ((const struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 &&
                        ((const struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 &&
                        ((const struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 &&
                        ((const struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0;
        else /* AF_UNSPEC */
                return 1;
}

static char *print_usock_addr(struct ipt_netflow_sock *usock)
{
        static char buf[128];
        size_t len;

        snprintf(buf, sizeof(buf), "%s", print_sockaddr(&usock->addr));

        if (!is_zero_addr(&usock->saddr)) {
                len = strlen(buf);
                snprintf(buf + len, sizeof(buf) - len, "@%s",
                    print_sockaddr(&usock->saddr));
                len = strlen(buf);
                /* strip zero port */
                if (len > 2 && buf[len - 1] == '0' && buf[len - 2] == ':')
                        buf[len - 2] = '\0';
        }
        if (usock->sdev[0]) {
                len = strlen(buf);
                snprintf(buf + len, sizeof(buf) - len, "%%%s", usock->sdev);
        }
        return buf;
}

#ifdef CONFIG_PROC_FS
static inline int ABS(int x) { return x >= 0 ? x : -x; }
#define SAFEDIV(x,y) ((y)? ({ u64 __tmp = x; do_div(__tmp, y); (int)__tmp; }) : 0)
#define FFLOAT(x, prec) (int)(x) / prec, ABS((int)(x) % prec)
static int snmp_seq_show(struct seq_file *seq, void *v)
{
        int cpu;
        unsigned int nr_flows = atomic_read(&ipt_netflow_count);
        struct ipt_netflow_stat t = { 0 };
        struct ipt_netflow_sock *usock;
        unsigned int sndbuf_peak = 0;
        int snum = 0;

        for_each_present_cpu(cpu) {
                struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu);

                t.notfound      += st->notfound;
                t.pkt_total     += st->pkt_total;
                t.traf_total    += st->traf_total;

                t.send_failed   += st->send_failed;
                t.sock_cberr    += st->sock_cberr;

                t.exported_rate += st->exported_rate;
                t.exported_pkt  += st->exported_pkt;
                t.exported_flow += st->exported_flow;
                t.exported_traf += st->exported_traf;

                t.pkt_drop      += st->pkt_drop;
                t.traf_drop     += st->traf_drop;
                t.pkt_lost      += st->pkt_lost;
                t.traf_lost     += st->traf_lost;
                t.flow_lost     += st->flow_lost;
        }


        seq_printf(seq,
            "inBitRate    %llu\n"
            "inPacketRate %llu\n"
            "inFlows      %llu\n"
            "inPackets    %llu\n"
            "inBytes      %llu\n"
            "hashMetric   %d.%02d\n"
            "hashMemory   %lu\n"
            "hashFlows    %u\n"
            "hashPackets  %llu\n"
            "hashBytes    %llu\n"
            "dropPackets  %llu\n"
            "dropBytes    %llu\n"
            "outByteRate  %u\n"
            "outFlows     %llu\n"
            "outPackets   %llu\n"
            "outBytes     %llu\n"
            "lostFlows    %llu\n"
            "lostPackets  %llu\n"
            "lostBytes    %llu\n"
            "errTotal     %u\n",
            sec_brate,
            sec_prate,
            t.notfound,
            t.pkt_total,
            t.traf_total,
            FFLOAT(SAFEDIV(100LL * (t.searched + t.found + t.notfound), (t.found + t.notfound)), 100),
            (unsigned long)nr_flows * sizeof(struct ipt_netflow) +
                   (unsigned long)htable_size * sizeof(struct hlist_head),
            nr_flows,
            t.pkt_total - t.pkt_out,
            t.traf_total - t.traf_out,
            t.pkt_drop,
            t.traf_drop,
            t.exported_rate,
            t.exported_flow,
            t.exported_pkt,
            t.exported_traf,
            t.flow_lost,
            t.pkt_lost,
            t.traf_lost,
            t.send_failed + t.sock_cberr);

        for_each_present_cpu(cpu) {
                struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu);

                seq_printf(seq,
                    "cpu%u %u %llu %llu %llu %d.%02d %llu %llu %u %u %u %u\n",
                    cpu,
                    st->pkt_total_rate,
                    st->notfound,
                    st->pkt_total,
                    st->traf_total,
                    FFLOAT(st->metric, 100),
                    st->pkt_drop,
                    st->traf_drop,
                    st->truncated,
                    st->frags,
                    st->alloc_err,
                    st->maxflows_err);
        }

        mutex_lock(&sock_lock);
        list_for_each_entry(usock, &usock_list, list) {
                int wmem_peak = atomic_read(&usock->wmem_peak);

                if (sndbuf_peak < wmem_peak)
                        sndbuf_peak = wmem_peak;
                seq_printf(seq, "sock%d %s %d %u %u %u %u",
                    snum,
                    print_usock_addr(usock),
                    !!usock->sock,
                    usock->err_connect,
                    usock->err_full,
                    usock->err_cberr,
                    usock->err_other);
                if (usock->sock) {
                        struct sock *sk = usock->sock->sk;

                        seq_printf(seq, " %u %u %u\n",
                            sk->sk_sndbuf,
                            compat_refcount_read(&sk->sk_wmem_alloc),
                            wmem_peak);
                } else
                        seq_printf(seq, " 0 0 %u\n", wmem_peak);

                snum++;
        }
        mutex_unlock(&sock_lock);
        seq_printf(seq, "sndbufPeak   %u\n", sndbuf_peak);

        return 0;
}

/* procfs statistics /proc/net/stat/ipt_netflow */
static int nf_seq_show(struct seq_file *seq, void *v)
{
        unsigned int nr_flows = atomic_read(&ipt_netflow_count);
        int cpu;
        struct ipt_netflow_stat t = { 0 };
        struct ipt_netflow_sock *usock;
#ifdef ENABLE_AGGR
        struct netflow_aggr_n *aggr_n;
        struct netflow_aggr_p *aggr_p;
#endif
        int snum = 0;
        int peak = (jiffies - peakflows_at) / HZ;

        seq_printf(seq, "ipt_NETFLOW " IPT_NETFLOW_VERSION ", srcversion %s;"
#ifdef ENABLE_AGGR
            " aggr"
#endif
#ifdef ENABLE_DIRECTION
            " dir"
#endif
#ifdef HAVE_LLIST
            " llist"
#endif
#ifdef ENABLE_MAC
            " mac"
#endif
#ifdef CONFIG_NF_NAT_NEEDED
            " nel"
#endif
#ifdef ENABLE_PROMISC
            " promisc"
# ifdef PROMISC_MPLS
            "+mpls"
# endif
#endif
#ifdef ENABLE_SAMPLER
            " samp"
# ifdef SAMPLING_HASH
            "-h"
# endif
#endif
#ifdef SNMP_RULES
            " snmp"
#endif
#ifdef ENABLE_VLAN
            " vlan"
#endif
            "\n",
            THIS_MODULE->srcversion);

        seq_printf(seq, "Protocol version %d", protocol);
        if (protocol == 10)
                seq_printf(seq, " (ipfix)");
        else
                seq_printf(seq, " (netflow)");
        if (protocol >= 9)
                seq_printf(seq, ", refresh-rate %u, timeout-rate %u, (templates %d, active %d).\n",
                    refresh_rate, timeout_rate, template_ids - FLOWSET_DATA_FIRST, tpl_count);
        else
                seq_printf(seq, "\n");

        seq_printf(seq, "Timeouts: active %ds, inactive %ds. Maxflows %u\n",
            active_timeout,
            inactive_timeout,
            maxflows);

        for_each_present_cpu(cpu) {
                struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu);

                t.searched      += st->searched;
                t.found         += st->found;
                t.notfound      += st->notfound;
                t.pkt_total     += st->pkt_total;
                t.traf_total    += st->traf_total;
#ifdef ENABLE_PROMISC
                t.pkt_promisc   += st->pkt_promisc;
                t.pkt_promisc_drop += st->pkt_promisc_drop;
#endif
                t.truncated     += st->truncated;
                t.frags         += st->frags;
                t.maxflows_err  += st->maxflows_err;
                t.alloc_err     += st->alloc_err;
                t.send_failed   += st->send_failed;
                t.sock_cberr    += st->sock_cberr;

                t.exported_rate += st->exported_rate;
                t.exported_pkt  += st->exported_pkt;
                t.exported_flow += st->exported_flow;
                t.exported_traf += st->exported_traf;

                t.pkt_total_rate += st->pkt_total_rate;
                t.pkt_drop      += st->pkt_drop;
                t.traf_drop     += st->traf_drop;
                t.pkt_lost      += st->pkt_lost;
                t.traf_lost     += st->traf_lost;
                t.flow_lost     += st->flow_lost;
                t.pkt_out       += st->pkt_out;
                t.traf_out      += st->traf_out;
#ifdef ENABLE_SAMPLER
                t.pkts_observed += st->pkts_observed;
                t.pkts_selected += st->pkts_selected;
#endif
        }

#ifdef ENABLE_SAMPLER
        if (get_sampler_mode()) {
                seq_printf(seq, "Flow sampling mode %s one-out-of %u.",
                    sampler_mode_string(),
                    get_sampler_interval());
                if (get_sampler_mode() != SAMPLER_HASH)
                        seq_printf(seq, " Flows selected %lu, discarded %lu.",
                            atomic64_read(&flows_selected),
                            atomic64_read(&flows_observed) - atomic64_read(&flows_selected));
                else
                        seq_printf(seq, " Flows selected %lu.", atomic64_read(&flows_selected));
                seq_printf(seq, " Pkts selected %llu, discarded %llu.\n",
                    t.pkts_selected,
                    t.pkts_observed - t.pkts_selected);
        } else
                seq_printf(seq, "Flow sampling is disabled.\n");
#endif

#ifdef ENABLE_PROMISC
        seq_printf(seq, "Promisc hack is %s (observed %llu packets, discarded %llu).\n",
            promisc? "enabled" : "disabled",
            t.pkt_promisc,
            t.pkt_promisc_drop);
#endif

#ifdef CONFIG_NF_NAT_NEEDED
        seq_printf(seq, "Natevents %s, count start %lu, stop %lu.\n", natevents? "enabled" : "disabled",
            nat_events_start, nat_events_stop);
#endif

        seq_printf(seq, "Flows: active %u (peak %u reached %ud%uh%um ago), mem %uK, worker delay %d/%d"
            " [%d..%d] (%u ms, %u us, %u:%u"
#ifdef HAVE_LLIST
            " %u"
#endif
            " [cpu%u]).\n",
                   nr_flows,
                   peakflows,
                   peak / (60 * 60 * 24), (peak / (60 * 60)) % 24, (peak / 60) % 60,
                   (unsigned int)(((unsigned long)nr_flows * sizeof(struct ipt_netflow) +
                                   (unsigned long)htable_size * sizeof(struct hlist_head)) >> 10),
                   worker_delay, HZ,
                   scan_min, scan_max,
                   jiffies_to_msecs(jiffies - wk_start),
                   jiffies_to_usecs(wk_busy),
                   wk_count,
                   wk_trylock,
#ifdef HAVE_LLIST
                   wk_llist,
#endif
                   wk_cpu);

        seq_printf(seq, "Hash: size %u (mem %uK), metric %d.%02d [%d.%02d, %d.%02d, %d.%02d]."
            " InHash: %llu pkt, %llu K, InPDU %llu, %llu.\n",
            htable_size,
            (unsigned int)((htable_size * sizeof(struct hlist_head)) >> 10),
            FFLOAT(metric, 100),
            FFLOAT(min_metric, 100),
            FFLOAT(min5_metric, 100),
            FFLOAT(min15_metric, 100),
            t.pkt_total - t.pkt_out,
            (t.traf_total - t.traf_out) >> 10,
            pdu_packets,
            pdu_traf);

        seq_printf(seq, "Rate: %llu bits/sec, %llu packets/sec;"
            " Avg 1 min: %llu bps, %llu pps; 5 min: %llu bps, %llu pps\n",
            sec_brate, sec_prate, min_brate, min_prate, min5_brate, min5_prate);

        seq_printf(seq, "cpu#     pps; <search found new [metric], trunc frag alloc maxflows>,"
            " traffic: <pkt, bytes>, drop: <pkt, bytes>\n");

        seq_printf(seq, "Total %6u; %6llu %6llu %6llu [%d.%02d], %4u %4u %4u %4u,"
            " traffic: %llu, %llu MB, drop: %llu, %llu K\n",
            t.pkt_total_rate,
            t.searched,
            t.found,
            t.notfound,
            FFLOAT(SAFEDIV(100LL * (t.searched + t.found + t.notfound), (t.found + t.notfound)), 100),
            t.truncated, t.frags, t.alloc_err, t.maxflows_err,
            t.pkt_total, t.traf_total >> 20,
            t.pkt_drop, t.traf_drop >> 10);

        if (num_present_cpus() > 1) {
                for_each_present_cpu(cpu) {
                        struct ipt_netflow_stat *st;

                        st = &per_cpu(ipt_netflow_stat, cpu);
                        seq_printf(seq, "cpu%-2u %6u; %6llu %6llu %6llu [%d.%02d], %4u %4u %4u %4u,"
                            " traffic: %llu, %llu MB, drop: %llu, %llu K\n",
                            cpu,
                            st->pkt_total_rate,
                            st->searched,
                            st->found,
                            st->notfound,
                            FFLOAT(st->metric, 100),
                            st->truncated, st->frags, st->alloc_err, st->maxflows_err,
                            st->pkt_total, st->traf_total >> 20,
                            st->pkt_drop, st->traf_drop >> 10);
                }
        }

        seq_printf(seq, "Export: Rate %u bytes/s; Total %llu pkts, %llu MB, %llu flows;"
            " Errors %u pkts; Traffic lost %llu pkts, %llu Kbytes, %llu flows.\n",
            t.exported_rate,
            t.exported_pkt,
            t.exported_traf >> 20,
            t.exported_flow,
            t.send_failed,
            t.pkt_lost,
            t.traf_lost >> 10,
            t.flow_lost);

        mutex_lock(&sock_lock);
        list_for_each_entry(usock, &usock_list, list) {
                seq_printf(seq, "sock%d: %s",
                    snum,
                    print_usock_addr(usock));
                if (usock->sock) {
                        struct sock *sk = usock->sock->sk;

                        seq_printf(seq, ", sndbuf %u, filled %u, peak %u;"
                            " err: sndbuf reached %u, connect %u, cberr %u, other %u\n",
                            sk->sk_sndbuf,
                            compat_refcount_read(&sk->sk_wmem_alloc),
                            atomic_read(&usock->wmem_peak),
                            usock->err_full,
                            usock->err_connect,
                            usock->err_cberr,
                            usock->err_other);
                } else
                        seq_printf(seq, " unconnected (%u attempts).\n",
                            usock->err_connect);
                snum++;
        }
        mutex_unlock(&sock_lock);

#ifdef ENABLE_AGGR
        read_lock_bh(&aggr_lock);
        snum = 0;
        list_for_each_entry(aggr_n, &aggr_n_list, list) {
                seq_printf(seq, "aggr#%d net: match %u.%u.%u.%u/%d strip %d (usage %u)\n",
                    snum,
                    HIPQUAD(aggr_n->addr),
                    mask2bits(aggr_n->mask),
                    mask2bits(aggr_n->aggr_mask),
                    atomic_read(&aggr_n->usage));
                snum++;
        }
        snum = 0;
        list_for_each_entry(aggr_p, &aggr_p_list, list) {
                seq_printf(seq, "aggr#%d port: ports %u-%u replace %u (usage %u)\n",
                    snum,
                    aggr_p->port1,
                    aggr_p->port2,
                    aggr_p->aggr_port,
                    atomic_read(&aggr_p->usage));
                snum++;
        }
        read_unlock_bh(&aggr_lock);
#endif
#ifdef SNMP_RULES
        {
                const unsigned char *rules;

                snum = 0;
                rcu_read_lock();
                rules = rcu_dereference(snmp_ruleset);
                if (rules)
                while (*rules) {
                        const unsigned int len = *rules++;

                        seq_printf(seq, "SNMP-rule#%d: prefix '%.*s' map to %d\n",
                                snum, len, rules, (rules[len] << 8) + rules[len + 1]);
                        rules += len + 2;
                        ++snum;
                }
                rcu_read_unlock();
        }
#endif
        return 0;
}

static int nf_seq_open(struct inode *inode, struct file *file)
{
        return single_open(file, nf_seq_show, NULL);
}

static int snmp_seq_open(struct inode *inode, struct file *file)
{
        return single_open(file, snmp_seq_show, NULL);
}

static struct file_operations nf_seq_fops = {
        .owner   = THIS_MODULE,
        .open    = nf_seq_open,
        .read    = seq_read,
        .llseek  = seq_lseek,
        .release = single_release,
};

static struct file_operations snmp_seq_fops = {
        .owner   = THIS_MODULE,
        .open    = snmp_seq_open,
        .read    = seq_read,
        .llseek  = seq_lseek,
        .release = single_release,
};

static inline int inactive_needs_export(const struct ipt_netflow *nf, const long i_timeout,
    const unsigned long jiff);
static inline int active_needs_export(const struct ipt_netflow *nf, const long a_timeout,
    const unsigned long jiff);
static inline u_int32_t hash_netflow(const struct ipt_netflow_tuple *tuple);

struct flows_dump_private {
        int pcache;     /* pos */
        void *vcache;   /* corresponding pointer for pos */
        int stripe;     /* current stripe */
        struct list_head list; /* copy of stripe */
        int alloc_errors;
};

/* deallocate copied stripe */
static void nf_free_stripe(struct list_head *list)
{
        struct ipt_netflow *cf, *tmp;

        list_for_each_entry_safe(cf, tmp, list, flows_list) {
                kmem_cache_free(ipt_netflow_cachep, cf);
        }
        INIT_LIST_HEAD(list);
}

/* quickly clone stripe into flows_dump_private then it can be walked slowly
 * and lockless */
static void __nf_copy_stripe(struct flows_dump_private *st, const struct list_head *list)
{
        const struct ipt_netflow *nf;
        struct ipt_netflow *cf;

        nf_free_stripe(&st->list);
        list_for_each_entry(nf, list, flows_list) {
                cf = kmem_cache_alloc(ipt_netflow_cachep, GFP_ATOMIC);
                if (!cf) {
                        st->alloc_errors++;
                        continue;
                }
                memcpy(cf, nf, sizeof(*cf));
                list_add(&cf->flows_list, &st->list);
        }
}

/* nstripe is desired stripe, in st->stripe will be recorded actual stripe used
 * (with empty stripes skipped), -1 is there is no valid stripes anymore,
 * return first element in stripe list or NULL */
static struct list_head *nf_get_stripe(struct flows_dump_private *st, int nstripe)
{
        read_lock_bh(&htable_rwlock);
        for (; nstripe < LOCK_COUNT; nstripe++) {
                struct stripe_entry *stripe = &htable_stripes[nstripe];

                spin_lock(&stripe->lock);
                if (!list_empty(&stripe->list)) {
                        st->stripe = nstripe;
                        __nf_copy_stripe(st, &stripe->list);
                        spin_unlock(&stripe->lock);
                        read_unlock_bh(&htable_rwlock);
                        return st->list.next;
                }
                spin_unlock(&stripe->lock);
        }
        read_unlock_bh(&htable_rwlock);
        st->stripe = -1;
        return NULL;
}

/* simply next element in flows list or NULL */
static struct list_head *nf_get_next(struct flows_dump_private *st, struct list_head *head)
{
        if (head == SEQ_START_TOKEN)
                return nf_get_stripe(st, 0);
        if (st->stripe < 0)
                return NULL;
        /* next element */
        if (!list_is_last(head, &st->list))
                return head->next;
        /* next bucket */
        return nf_get_stripe(st, st->stripe + 1);
}

/* seq_file could arbitrarily start/stop iteration as it feels need,
 * so, I try to cache things to (significantly) speed it up. */
static void *flows_dump_seq_start(struct seq_file *seq, loff_t *pos)
{
        struct flows_dump_private *st = seq->private;
        int ppos = *pos;
        struct list_head *lh;

        if (!ppos) {
                /* first */
                st->pcache = 0;
                st->vcache = SEQ_START_TOKEN;
                return st->vcache;
        }
        if (ppos >= st->pcache) {
                /* can iterate forward */
                ppos -= st->pcache;
                lh = st->vcache;
        } else /* can't, start from 0 */
                lh = SEQ_START_TOKEN;
        /* iterate forward */
        while (ppos--)
                lh = nf_get_next(st, lh);
        st->pcache = *pos;
        st->vcache = lh;
        return st->vcache;
}

static void *flows_dump_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
        struct flows_dump_private *st = seq->private;

        st->pcache = ++*pos;
        st->vcache = nf_get_next(st, (struct list_head *)v);
        return st->vcache;
}

static void flows_dump_seq_stop(struct seq_file *seq, void *v)
{
}

/* To view this: cat /sys/kernel/debug/netflow_dump */
static int flows_dump_seq_show(struct seq_file *seq, void *v)
{
        struct flows_dump_private *st = seq->private;
        const long i_timeout = inactive_timeout * HZ;
        const long a_timeout = active_timeout * HZ;
        const struct ipt_netflow *nf;

        if (v == SEQ_START_TOKEN) {
                seq_printf(seq, "# hash a dev:i,o"
#ifdef SNMP_RULES
                    " snmp:i,o"
#endif
#ifdef ENABLE_MAC
                    " mac:src,dst"
#endif
#ifdef ENABLE_VLAN
                    " vlan"
#endif
#if defined(ENABLE_MAC) || defined(ENABLE_VLAN)
                    " type"
#endif
                    " proto src:ip,port dst:ip,port nexthop"
                    " tos,tcpflags,options,tcpoptions"
                    " packets bytes ts:first,last\n");
                return 0;
        }

        nf = list_entry(v, struct ipt_netflow, flows_list);
        seq_printf(seq, "%d %04x %x",
            st->pcache,
            hash_netflow(&nf->tuple),
            (!!inactive_needs_export(nf, i_timeout, jiffies)) | 
            (active_needs_export(nf, a_timeout, jiffies) << 1));
        seq_printf(seq, " %hd,%hd",
            nf->tuple.i_ifc,
            nf->o_ifc);
#ifdef SNMP_RULES
        seq_printf(seq, " %hd,%hd",
            nf->i_ifcr,
            nf->o_ifcr);
#endif
#ifdef ENABLE_MAC
        seq_printf(seq, " %pM,%pM", &nf->tuple.h_src, &nf->tuple.h_dst);
#endif
#ifdef ENABLE_VLAN
        if (nf->tuple.tag[0]) {
                seq_printf(seq, " %d", ntohs(nf->tuple.tag[0]));
                if (nf->tuple.tag[1])
                        seq_printf(seq, ",%d", ntohs(nf->tuple.tag[1]));
        }
#endif
#if defined(ENABLE_MAC) || defined(ENABLE_VLAN)
        seq_printf(seq, " %04x", ntohs(nf->ethernetType));
#endif
        seq_printf(seq, " %u ",
            nf->tuple.protocol);
        if (nf->tuple.l3proto == AF_INET) {
                seq_printf(seq, "%pI4n,%u %pI4n,%u %pI4n",
                    &nf->tuple.src,
                    ntohs(nf->tuple.s_port),
                    &nf->tuple.dst,
                    ntohs(nf->tuple.d_port),
                    &nf->nh);
        } else if (nf->tuple.l3proto == AF_INET6) {
                seq_printf(seq, "%pI6c,%u %pI6c,%u %pI6c",
                    &nf->tuple.src,
                    ntohs(nf->tuple.s_port),
                    &nf->tuple.dst,
                    ntohs(nf->tuple.d_port),
                    &nf->nh);
        } else {
                seq_puts(seq, "?,? ?,? ?");
        }
        seq_printf(seq, " %x,%x,%x,%x",
            nf->tuple.tos,
            nf->tcp_flags,
            nf->options,
            nf->tcpoptions);
        seq_printf(seq, " %u %u %lu,%lu\n",
            nf->nr_packets,
            nf->nr_bytes,
            jiffies - nf->nf_ts_first,
            jiffies - nf->nf_ts_last);

        return 0;
}

static struct seq_operations flows_dump_seq_ops = {
        .start  = flows_dump_seq_start,
        .show   = flows_dump_seq_show,
        .next   = flows_dump_seq_next,
        .stop   = flows_dump_seq_stop,
};

static int flows_seq_open(struct inode *inode, struct file *file)
{
        struct flows_dump_private *st;
        char *buf;
        const size_t size = 4 * PAGE_SIZE;

        buf = kmalloc(size, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        st = __seq_open_private(file, &flows_dump_seq_ops, sizeof(struct flows_dump_private));
        if (!st) {
                kfree(buf);
                return -ENOMEM;
        }
        INIT_LIST_HEAD(&st->list);
        /* speed up seq interface with bigger buffer */
        ((struct seq_file *)file->private_data)->buf = buf;
        ((struct seq_file *)file->private_data)->size = size;
        return 0;

}
static int flows_seq_release(struct inode *inode, struct file *file)
{
        struct seq_file *seq = file->private_data;
        struct flows_dump_private *st = seq->private;

        nf_free_stripe(&st->list);
        if (st->alloc_errors)
                printk(KERN_INFO "ipt_NETFLOW: alloc_errors %d\n", st->alloc_errors);
        return seq_release_private(inode, file);
}

static struct file_operations flows_seq_fops = {
        .owner   = THIS_MODULE,
        .open    = flows_seq_open,
        .read    = seq_read,
        .llseek  = seq_lseek,
        .release = flows_seq_release,
};
#endif /* CONFIG_PROC_FS */

#ifdef ENABLE_PROMISC
static int promisc_finish(
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0)
    struct net *net,
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) || \
    (defined(RHEL_MAJOR) && RHEL_MAJOR == 7 && RHEL_MINOR >= 2)
    struct sock *sk,
#endif
    struct sk_buff *skb)
{
        /* don't pass to the routing */
        kfree_skb(skb);
        return NET_RX_DROP;
}

static int promisc4_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
        const struct iphdr *iph;
        u32 len;

        /* clone skb and do basic IPv4 sanity checking and preparations
         * for L3, this is quick and dirty version of ip_rcv() */
        if (!pskb_may_pull(skb, sizeof(struct iphdr)))
                goto drop;
        iph = ip_hdr(skb);
        if (iph->ihl < 5 || iph->version != 4)
                goto drop;
        if (!pskb_may_pull(skb, iph->ihl*4))
                goto drop;
        iph = ip_hdr(skb);
        if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
                goto drop;
        len = ntohs(iph->tot_len);
        if (skb->len < len)
                goto drop;
        else if (len < (iph->ihl*4))
                goto drop;
        if (pskb_trim_rcsum(skb, len))
                goto drop;
        skb->transport_header = skb->network_header + iph->ihl*4;
        memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
        skb_orphan(skb);

        return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0)
            dev_net(dev),
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) || (defined(RHEL_MAJOR) && RHEL_MAJOR == 7 && RHEL_MINOR > 1)
            NULL,
#endif
            skb, dev, NULL, promisc_finish);
drop:
        NETFLOW_STAT_INC(pkt_promisc_drop);
        kfree_skb(skb);
        return NET_RX_DROP;
}

static int promisc6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
        const struct ipv6hdr *hdr;
        u32 pkt_len;
        struct inet6_dev *idev;

        /* quick and dirty version of ipv6_rcv(), basic sanity checking
         * and preparation of skb for later processing */
        rcu_read_lock();
        idev = __in6_dev_get(skb->dev);
        if (!idev || unlikely(idev->cnf.disable_ipv6))
                goto drop;
        memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
        IP6CB(skb)->iif = skb_dst(skb) ? ip6_dst_idev(skb_dst(skb))->dev->ifindex : dev->ifindex;
        if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
                goto drop;
        hdr = ipv6_hdr(skb);
        if (hdr->version != 6)
                goto drop;
        if (!(dev->flags & IFF_LOOPBACK) &&
            ipv6_addr_loopback(&hdr->daddr))
                goto drop;
        if (!(skb->pkt_type == PACKET_LOOPBACK ||
                    dev->flags & IFF_LOOPBACK) &&
            ipv6_addr_is_multicast(&hdr->daddr) &&
            IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1)
                goto drop;
        if (ipv6_addr_is_multicast(&hdr->daddr) &&
            IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 0)
                goto drop;
        if (ipv6_addr_is_multicast(&hdr->saddr))
                goto drop;
        skb->transport_header = skb->network_header + sizeof(*hdr);
        IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
        pkt_len = ntohs(hdr->payload_len);
        if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
                if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
                        goto drop;
                if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
                        goto drop;
                hdr = ipv6_hdr(skb);
        }
        if (hdr->nexthdr == NEXTHDR_HOP) {
                int optlen;
                /* ipv6_parse_hopopts() is not exported by kernel.
                 * I dont really need to parse hop options, since packets
                 * are not routed, nor terminated, but I keep calculations
                 * in case other code depend on it. */
                if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) ||
                    !pskb_may_pull(skb, (sizeof(struct ipv6hdr) +
                                    ((skb_transport_header(skb)[1] + 1) << 3))))
                        goto drop;
                optlen = (skb_transport_header(skb)[1] + 1) << 3;
                if (skb_transport_offset(skb) + optlen > skb_headlen(skb))
                        goto drop;
                skb->transport_header += optlen;
                IP6CB(skb)->nhoff = sizeof(struct ipv6hdr);
        }
        rcu_read_unlock();
        skb_orphan(skb);

        return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0)
            dev_net(dev),
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) || (defined(RHEL_MAJOR) && RHEL_MAJOR == 7 && RHEL_MINOR > 1)
            NULL,
#endif
            skb, dev, NULL, promisc_finish);
drop:
        rcu_read_unlock();
        NETFLOW_STAT_INC(pkt_promisc_drop);
        kfree_skb(skb);
        return NET_RX_DROP;
}

/* source is skb_network_protocol() and __vlan_get_protocol() */
static __be16 __skb_network_protocol(struct sk_buff *skb, int *depth)
{
        __be16 type = skb->protocol;
        unsigned int vlan_depth;

        if (type == htons(ETH_P_TEB)) {
                struct ethhdr *eth;

                if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
                        return 0;

                eth = (struct ethhdr *)skb_mac_header(skb);
                type = eth->h_proto;
        }

        vlan_depth = skb->mac_len;
        if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
                if (vlan_depth) {
                        if (WARN_ON(vlan_depth < VLAN_HLEN))
                                return 0;
                        vlan_depth -= VLAN_HLEN;
                } else {
                        vlan_depth = ETH_HLEN;
                }
                do {
                        struct vlan_hdr *vh;

                        if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
                                return 0;

                        vh = (struct vlan_hdr *)(skb->data + vlan_depth);
                        type = vh->h_vlan_encapsulated_proto;
                        vlan_depth += VLAN_HLEN;
                } while (type == htons(ETH_P_8021Q) ||
                         type == htons(ETH_P_8021AD));
        }

        *depth = vlan_depth;

        return type;
}

static int promisc_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
        /* what is not PACKET_OTHERHOST will be processed normally */
        if (skb->pkt_type != PACKET_OTHERHOST)
                goto out;

        NETFLOW_STAT_INC(pkt_promisc);

        if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
                goto drop;

        /* Note about vlans:
         * - older kernels will pass raw packet;
         * - newer kernes (since 3.0) will have one vlan tag
         * physically stripped out of the packet, and it will
         * be saved into skb->vlan_tci. skb->protocol will be
         * untagged etherType. */

        if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
            skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
                int vlan_depth = skb->mac_len;

                skb_push(skb, skb->data - skb_mac_header(skb));
                skb->protocol = __skb_network_protocol(skb, &vlan_depth);
                skb_pull(skb, vlan_depth);

                skb_reset_network_header(skb);
                skb_reset_mac_len(skb);
        }
# ifdef PROMISC_MPLS
        if (eth_p_mpls(skb->protocol)) {
                size_t stack_len = 0;
                const struct mpls_label *mpls;

                do {
                        stack_len += MPLS_HLEN;
                        if (unlikely(!pskb_may_pull(skb, stack_len)))
                                goto drop;
                        mpls = (struct mpls_label *)(skb->data + stack_len - MPLS_HLEN);
                } while (!(mpls->entry & htonl(MPLS_LS_S_MASK)));

                skb_pull(skb, stack_len);
                skb_reset_network_header(skb);

                if (!pskb_may_pull(skb, 1))
                        goto drop;
                switch (ip_hdr(skb)->version) {
                case 4:  skb->protocol = htons(ETH_P_IP);   break;
                case 6:  skb->protocol = htons(ETH_P_IPV6); break;
                default: goto drop;
                }
        }
# endif
        switch (skb->protocol) {
        case htons(ETH_P_IP):
                return promisc4_rcv(skb, dev, pt, orig_dev);
        case htons(ETH_P_IPV6):
                return promisc6_rcv(skb, dev, pt, orig_dev);
        }
drop:
        NETFLOW_STAT_INC(pkt_promisc_drop);
out:
        kfree_skb(skb);
        return 0;
}

static struct packet_type promisc_packet_type __read_mostly = {
        .type = htons(ETH_P_ALL),
        .func = promisc_rcv,
};

/* should not have promisc passed as parameter */
static int switch_promisc(int newpromisc)
{
        newpromisc = !!newpromisc;
        mutex_lock(&promisc_lock);
        if (newpromisc == promisc)
                goto unlock;
        if (newpromisc)
                dev_add_pack(&promisc_packet_type);
        else
                dev_remove_pack(&promisc_packet_type);
        printk(KERN_INFO "ipt_NETFLOW: promisc hack is %s\n",
            newpromisc? "enabled" : "disabled");
        promisc = newpromisc;
unlock:
        mutex_unlock(&promisc_lock);
        return 0;
}
#endif

#ifdef CONFIG_SYSCTL
/* sysctl /proc/sys/net/netflow */
static int hsize_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
                         void __user *buffer, size_t *lenp, loff_t *fpos)
{
        int ret, hsize;
        ctl_table_no_const lctl = *ctl;

        if (write)
                lctl.data = &hsize;
        ret = proc_dointvec(&lctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
        if (write) {
                if (hsize < LOCK_COUNT)
                        return -EPERM;
                return set_hashsize(hsize)?:ret;
        } else
                return ret;
}

static int sndbuf_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
                         void __user *buffer, size_t *lenp, loff_t *fpos)
{
        int ret;
        struct ipt_netflow_sock *usock;
        ctl_table_no_const lctl = *ctl;

        mutex_lock(&sock_lock);
        if (list_empty(&usock_list)) {
                mutex_unlock(&sock_lock);
                return -ENOENT;
        }
        usock = list_first_entry(&usock_list, struct ipt_netflow_sock, list);
        if (usock->sock)
                sndbuf = usock->sock->sk->sk_sndbuf;
        mutex_unlock(&sock_lock);

        lctl.data = &sndbuf;
        ret = proc_dointvec(&lctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
        if (!write)
                return ret;
        if (sndbuf < SOCK_MIN_SNDBUF)
                sndbuf = SOCK_MIN_SNDBUF;
        pause_scan_worker();
        mutex_lock(&sock_lock);
        list_for_each_entry(usock, &usock_list, list) {
                if (usock->sock)
                        usock->sock->sk->sk_sndbuf = sndbuf;
        }
        mutex_unlock(&sock_lock);
        cont_scan_worker();
        return ret;
}

static void free_templates(void);
static int destination_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
                         void __user *buffer, size_t *lenp, loff_t *fpos)
{
        int ret;

        ret = proc_dostring(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
        if (ret >= 0 && write) {
                pause_scan_worker();
                destination_removeall();
                add_destinations(destination_buf);
                free_templates();
                cont_scan_worker();
        }
        return ret;
}

#ifdef ENABLE_AGGR
static int aggregation_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
                         void __user *buffer, size_t *lenp, loff_t *fpos)
{
        int ret;

        if (debug > 1)
                printk(KERN_INFO "aggregation_procctl (%d) %u %llu\n", write, (unsigned int)(*lenp), *fpos);
        ret = proc_dostring(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
        if (ret >= 0 && write)
                add_aggregation(aggregation_buf);
        return ret;
}
#endif

#ifdef ENABLE_PROMISC
static int promisc_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
                         void __user *buffer, size_t *lenp, loff_t *fpos)
{
        int newpromisc = promisc;
        int ret;
        ctl_table_no_const lctl = *ctl;

        lctl.data = &newpromisc;
        ret = proc_dointvec(&lctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
        if (ret < 0 || !write)
                return ret;
        return switch_promisc(newpromisc);
}
#endif

#ifdef ENABLE_SAMPLER
static int parse_sampler(char *ptr);
static int sampler_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
                         void __user *buffer, size_t *lenp, loff_t *fpos)
{
        int ret;

        if (debug > 1)
                printk(KERN_INFO "sampler_procctl (%d) %u %llu\n", write, (unsigned int)(*lenp), *fpos);
        ret = proc_dostring(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
        if (ret >= 0 && write) {
                int cpu;

                pause_scan_worker();
                netflow_scan_and_export(AND_FLUSH);
                /* paused for sampling_code reads to be consistent */
                ret = parse_sampler(sampler_buf);
                /* resend templates */
                ts_sampler_last = 0;
                /* zero stat */
                atomic64_set(&flows_observed, 0);
                atomic64_set(&flows_selected, 0);
                for_each_present_cpu(cpu) {
                        struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu);
                        st->pkts_selected = 0;
                        st->pkts_observed = 0;
                }
                cont_scan_worker();
        }
        return ret;
}
#endif

#ifdef SNMP_RULES
static int add_snmp_rules(char *ptr);
static int snmp_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
                         void __user *buffer, size_t *lenp, loff_t *fpos)
{
       int ret;

       if (debug > 1)
               printk(KERN_INFO "snmp_procctl (%d) %u %llu\n", write, (unsigned int)(*lenp), *fpos);
       ret = proc_dostring(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
       if (ret >= 0 && write)
               return add_snmp_rules(snmp_rules_buf);
       return ret;
}
#endif

static void clear_ipt_netflow_stat(void)
{
        int cpu;

        for_each_present_cpu(cpu) {
                struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu);
                memset(st, 0, sizeof(*st));
                st->metric = METRIC_DFL;
        }
}

static int flush_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
                         void __user *buffer, size_t *lenp, loff_t *fpos)
{
        int ret;
        int val = 0;
        ctl_table_no_const lctl = *ctl;

        lctl.data = &val;
        ret = proc_dointvec(&lctl, write, BEFORE2632(filp,) buffer, lenp, fpos);

        if (!write)
                return ret;

        if (val > 0) {
                char *stat = "";

                pause_scan_worker();
                netflow_scan_and_export(AND_FLUSH);
                if (val > 1) {
                        clear_ipt_netflow_stat();
                        stat = " (reset stat counters)";
                }
                printk(KERN_INFO "ipt_NETFLOW: forced flush%s.\n", stat);
                cont_scan_worker();
        }

        return ret;
}

static int protocol_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
                         void __user *buffer, size_t *lenp, loff_t *fpos)
{
        int ret;
        int ver = protocol;
        ctl_table_no_const lctl = *ctl;

        lctl.data = &ver;
        ret = proc_dointvec(&lctl, write, BEFORE2632(filp,) buffer, lenp, fpos);

        if (!write)
                return ret;

        switch (ver) {
                case 5:
                case 9:
                case 10:
                        printk(KERN_INFO "ipt_NETFLOW: forced flush (protocol version change)\n");
                        pause_scan_worker();
                        netflow_scan_and_export(AND_FLUSH);
                        netflow_switch_version(ver);
                        cont_scan_worker();
                        break;
                default:
                        return -EPERM;
        }

        return ret;
}

#ifdef CONFIG_NF_NAT_NEEDED
static void register_ct_events(void);
static void unregister_ct_events(void);
static int natevents_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
                         void __user *buffer, size_t *lenp, loff_t *fpos)
{
        int ret;
        int val = natevents;
        ctl_table_no_const lctl = *ctl;

        lctl.data = &val;
        ret = proc_dointvec(&lctl, write, BEFORE2632(filp,) buffer, lenp, fpos);

        if (!write)
                return ret;

        if (natevents && !val)
                unregister_ct_events();
        else if (!natevents && val)
                register_ct_events();

        return ret;
}
#endif

static struct ctl_table_header *netflow_sysctl_header;

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
#define _CTL_NAME(x) .ctl_name = x,
static void ctl_table_renumber(ctl_table *table)
{
        int c;

        for (c = 1; table->procname; table++, c++)
                table->ctl_name = c;
}
#else
#define _CTL_NAME(x)
#define ctl_table_renumber(x)
#endif
static ctl_table netflow_sysctl_table[] = {
        {
                .procname       = "active_timeout",
                .mode           = 0644,
                .data           = &active_timeout,
                .maxlen         = sizeof(int),
                .proc_handler   = &proc_dointvec,
        },
        {
                .procname       = "inactive_timeout",
                .mode           = 0644,
                .data           = &inactive_timeout,
                .maxlen         = sizeof(int),
                .proc_handler   = &proc_dointvec,
        },
        {
                .procname       = "debug",
                .mode           = 0644,
                .data           = &debug,
                .maxlen         = sizeof(int),
                .proc_handler   = &proc_dointvec,
        },
        {
                .procname       = "hashsize",
                .mode           = 0644,
                .data           = &htable_size,
                .maxlen         = sizeof(int),
                .proc_handler   = &hsize_procctl,
        },
        {
                .procname       = "sndbuf",
                .mode           = 0644,
                .maxlen         = sizeof(int),
                .proc_handler   = &sndbuf_procctl,
        },
        {
                .procname       = "destination",
                .mode           = 0644,
                .data           = &destination_buf,
                .maxlen         = sizeof(destination_buf),
                .proc_handler   = &destination_procctl,
        },
#ifdef ENABLE_AGGR
        {
                .procname       = "aggregation",
                .mode           = 0644,
                .data           = &aggregation_buf,
                .maxlen         = sizeof(aggregation_buf),
                .proc_handler   = &aggregation_procctl,
        },
#endif
        {
                .procname       = "maxflows",
                .mode           = 0644,
                .data           = &maxflows,
                .maxlen         = sizeof(int),
                .proc_handler   = &proc_dointvec,
        },
        {
                .procname       = "flush",
                .mode           = 0644,
                .maxlen         = sizeof(int),
                .proc_handler   = &flush_procctl,
        },
        {
                .procname       = "protocol",
                .mode           = 0644,
                .maxlen         = sizeof(int),
                .proc_handler   = &protocol_procctl,
        },
        {
                .procname       = "refresh-rate",
                .mode           = 0644,
                .data           = &refresh_rate,
                .maxlen         = sizeof(int),
                .proc_handler   = &proc_dointvec,
        },
        {
                .procname       = "timeout-rate",
                .mode           = 0644,
                .data           = &timeout_rate,
                .maxlen         = sizeof(int),
                .proc_handler   = &proc_dointvec,
        },
#ifdef ENABLE_PROMISC
        {
                .procname       = "promisc",
                .mode           = 0644,
                .data           = &promisc,
                .maxlen         = sizeof(int),
                .proc_handler   = &promisc_procctl,
        },
#endif
#ifdef ENABLE_SAMPLER
        {
                .procname       = "sampler",
                .mode           = 0644,
                .data           = &sampler_buf,
                .maxlen         = sizeof(sampler_buf),
                .proc_handler   = &sampler_procctl,
        },
#endif
        {
                .procname       = "scan-min",
                .mode           = 0644,
                .data           = &scan_min,
                .maxlen         = sizeof(int),
                .proc_handler   = &proc_dointvec_minmax,
                .extra1         = &one,
                .extra2         = &scan_max,
        },
#ifdef SNMP_RULES
        {
                .procname       = "snmp-rules",
                .mode           = 0644,
                .data           = &snmp_rules_buf,
                .maxlen         = sizeof(snmp_rules_buf),
                .proc_handler   = &snmp_procctl,
        },
#endif
#ifdef CONFIG_NF_NAT_NEEDED
        {
                .procname       = "natevents",
                .mode           = 0644,
                .maxlen         = sizeof(int),
                .proc_handler   = &natevents_procctl,
        },
#endif
        { }
};

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
static ctl_table netflow_sysctl_root[] = {
        {
                _CTL_NAME(33)
                .procname       = "netflow",
                .mode           = 0555,
                .child          = netflow_sysctl_table,
        },
        { }
};

static ctl_table netflow_net_table[] = {
        {
                .ctl_name       = CTL_NET,
                .procname       = "net",
                .mode           = 0555,
                .child          = netflow_sysctl_root,
        },
        { }
};
#else /* >= 2.6.25 */
static struct ctl_path netflow_sysctl_path[] = {
        {
                .procname = "net",
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33)
                .ctl_name = CTL_NET
#endif
        },
        { .procname = "netflow" },
        { }
};
#endif /* 2.6.25 */
#endif /* CONFIG_SYSCTL */

/* socket code */
static void sk_error_report(struct sock *sk)
{
        struct ipt_netflow_sock *usock;

        /* clear connection refused errors if any */
        if (debug > 1)
                printk(KERN_INFO "ipt_NETFLOW: socket error <%d>\n", sk->sk_err);
        sk->sk_err = 0;
        usock = sk->sk_user_data;
        if (usock)
                usock->err_cberr++;
        NETFLOW_STAT_INC(sock_cberr);
        /* It's theoretically possible to determine to which datagram this reply is,
         * because ICMP message frequently includes header of erroneous packet, but
         * this is not that reliable - packets could be spoofed, and requires keeping
         * book of sent packets. */
        return;
}

static struct socket *usock_open_sock(struct ipt_netflow_sock *usock)
{
        struct socket *sock;
        int error;
        int salen = 0;

        if ((error = sock_create_kern(usock->addr.ss_family, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
                printk(KERN_ERR "ipt_NETFLOW: sock_create_kern error %d\n", -error);
                return NULL;
        }
        sock->sk->sk_allocation = GFP_ATOMIC;
        sock->sk->sk_prot->unhash(sock->sk); /* hidden from input */
        sock->sk->sk_error_report = &sk_error_report; /* clear ECONNREFUSED */
        sock->sk->sk_user_data = usock;
        sock->sk->sk_reuse = SK_CAN_REUSE;

        if (usock->sdev[0]) {
                struct net_device *dev = dev_get_by_name(&init_net, usock->sdev);

                if (dev) {
                        sock->sk->sk_bound_dev_if = dev->ifindex;
                        dev_put(dev);
                } else {
                        printk(KERN_ERR "ipt_NETFLOW: error binding to device %s, errno %d\n",
                            usock->sdev, -error);
                        return NULL;
                }
        }
        if (!is_zero_addr(&usock->saddr)) {
                if (usock->saddr.ss_family == AF_INET)
                        salen = sizeof(struct sockaddr_in);
                else if (usock->saddr.ss_family == AF_INET6)
                        salen = sizeof(struct sockaddr_in6);
                if ((error = sock->ops->bind(sock, (struct sockaddr *)&usock->saddr, salen)) < 0) {
                        printk(KERN_ERR "ipt_NETFLOW: error binding socket %d\n", -error);
                        return NULL;
                }
        }

        if (sndbuf)
                sock->sk->sk_sndbuf = sndbuf;
        else
                sndbuf = sock->sk->sk_sndbuf;
        error = sock->ops->connect(sock, (struct sockaddr *)&usock->addr, sizeof(usock->addr), 0);
        if (error < 0) {
                printk(KERN_ERR "ipt_NETFLOW: error connecting UDP socket %d,"
                    " don't worry, will try reconnect later.\n", -error);
                /* ENETUNREACH when no interfaces */
                sock_release(sock);
                return NULL;
        }
        return sock;
}

static void usock_connect(struct ipt_netflow_sock *usock, const int sendmsg)
{
        usock->sock = usock_open_sock(usock);
        if (usock->sock) {
                if (sendmsg || debug)
                        printk(KERN_INFO "ipt_NETFLOW: connected %s\n",
                            print_sockaddr(&usock->addr));
        } else {
                usock->err_connect++;
                if (debug)
                        printk(KERN_INFO "ipt_NETFLOW: connect to %s failed%s.\n",
                            print_sockaddr(&usock->addr),
                            (sendmsg)? " (pdu lost)" : "");
        }
        atomic_set(&usock->wmem_peak, 0);
        usock->err_full = 0;
        usock->err_other = 0;
}

static void usock_close(struct ipt_netflow_sock *usock)
{
        if (usock->sock)
                sock_release(usock->sock);
        usock->sock = NULL;
}

ktime_t ktime_get_real(void);

// return numbers of sends succeded, 0 if none
/* only called in scan worker path */
static void netflow_sendmsg(void *buffer, const int len)
{
        struct msghdr msg = { .msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL };
        struct kvec iov = { buffer, len };
        int retok = 0, ret;
        int snum = 0;
        struct ipt_netflow_sock *usock;

        mutex_lock(&sock_lock);
        list_for_each_entry(usock, &usock_list, list) {
                usock->pkt_exp++;
                usock->bytes_exp += len;
                if (!usock->sock)
                        usock_connect(usock, 1);
                if (!usock->sock) {
                        NETFLOW_STAT_INC(send_failed);
                        usock->pkt_fail++;
                        continue;
                }
                if (debug)
                        printk(KERN_INFO "netflow_sendmsg: sendmsg(%d, %d) [%u %u]\n",
                               snum,
                               len,
                               compat_refcount_read(&usock->sock->sk->sk_wmem_alloc),
                               usock->sock->sk->sk_sndbuf);
                ret = kernel_sendmsg(usock->sock, &msg, &iov, 1, (size_t)len);
                if (ret < 0) {
                        char *suggestion = "";

                        NETFLOW_STAT_INC(send_failed);
                        usock->pkt_fail++;
                        if (ret == -EAGAIN) {
                                usock->err_full++;
                                suggestion = ": increase sndbuf!";
                        } else {
                                usock->err_other++;
                                if (ret == -ENETUNREACH) {
                                        suggestion = ": network is unreachable.";
                                } else if (ret == -EINVAL) {
                                        usock_close(usock);
                                        suggestion = ": will reconnect.";
                                }
                        }
                        printk(KERN_ERR "ipt_NETFLOW: sendmsg[%d] error %d: data loss %llu pkt, %llu bytes%s\n",
                               snum, ret, pdu_packets, pdu_traf, suggestion);
                } else {
                        unsigned int wmem = compat_refcount_read(&usock->sock->sk->sk_wmem_alloc);
                        if (wmem > atomic_read(&usock->wmem_peak))
                                atomic_set(&usock->wmem_peak, wmem);
                        NETFLOW_STAT_INC(exported_pkt);
                        NETFLOW_STAT_ADD(exported_traf, ret);
                        usock->pkt_sent++;
                        retok++;
                }
                snum++;
        }
        mutex_unlock(&sock_lock);
        if (retok == 0) {
                /* not least one send succeded, account stat for dropped packets */
                NETFLOW_STAT_ADD(pkt_lost, pdu_packets);
                NETFLOW_STAT_ADD(traf_lost, pdu_traf);
                NETFLOW_STAT_ADD(flow_lost, pdu_flow_records);
                NETFLOW_STAT_TS(lost);
        } else {
                NETFLOW_STAT_ADD(exported_flow, pdu_flow_records);
        }
}

static void usock_close_free(struct ipt_netflow_sock *usock)
{
        printk(KERN_INFO "ipt_NETFLOW: removed destination %s\n",
               print_sockaddr(&usock->addr));
        usock_close(usock);
        vfree(usock);
}

static void destination_removeall(void)
{
        mutex_lock(&sock_lock);
        while (!list_empty(&usock_list)) {
                struct ipt_netflow_sock *usock;

                usock = list_entry(usock_list.next, struct ipt_netflow_sock, list);
                list_del(&usock->list);
                mutex_unlock(&sock_lock);
                usock_close_free(usock);
                mutex_lock(&sock_lock);
        }
        mutex_unlock(&sock_lock);
}

static void add_usock(struct ipt_netflow_sock *usock)
{
        struct ipt_netflow_sock *sk;

        mutex_lock(&sock_lock);
        /* don't need duplicated sockets */
        list_for_each_entry(sk, &usock_list, list) {
                if (sockaddr_cmp(&sk->addr, &usock->addr)) {
                        mutex_unlock(&sock_lock);
                        usock_close_free(usock);
                        return;
                }
        }
        list_add_tail(&usock->list, &usock_list);
        printk(KERN_INFO "ipt_NETFLOW: added destination %s%s\n",
               print_usock_addr(usock),
               (!usock->sock)? " (unconnected)" : "");
        mutex_unlock(&sock_lock);
}

#if defined(ENABLE_SAMPLER) || defined(SNMP_RULES)
static inline int xisdigit(int ch)
{
        return (ch >= '0') && (ch <= '9');
}

static inline int simple_atoi(const char *p)
{
        int i;

        for (i = 0; xisdigit(*p); p++)
                i = i * 10 + *p - '0';
        return i;
}
#endif

#ifdef ENABLE_SAMPLER
static void set_sampler(const unsigned char mode, const unsigned short interval)
{
        struct sampling s;

        s.mode = mode;
        s.interval = interval;
        if (!mode || interval > SAMPLER_INTERVAL_M) {
                *sampler_buf = 0;
                samp.v32 = s.v32;
                printk(KERN_ERR "ipt_NETFLOW: flow sampling is disabled.\n");
        } else {
                sampling_ts.first = ktime_get_real();
                /* no race here, becasue exporting process is stopped */
                samp.v32 = s.v32;
                sprintf(sampler_buf, "%s:%u", sampler_mode_string(), interval);
                printk(KERN_ERR "ipt_NETFLOW: flow sampling is enabled, mode %s one-out-of %u.\n",
                    sampler_mode_string(), interval);
        }
}

static int parse_sampler(char *ptr)
{
        char *p;
        unsigned char mode;
        unsigned int val;
        int ret = 0;

        switch (tolower(*ptr)) {
        case 'd': mode = SAMPLER_DETERMINISTIC; break;
        case 'r': mode = SAMPLER_RANDOM; break;
#ifdef SAMPLING_HASH
        case 'h': mode = SAMPLER_HASH; break;
#endif
        default:
                printk(KERN_ERR "ipt_NETFLOW: sampler parse error (%s '%s').\n",
                    "unknown mode", ptr);
                ret = -EINVAL;
                /* FALLTHROUGH */
        case '\0': /* empty */
        case 'n':  /* none */
        case 'o':  /* off */
        case '0':  /* zero */
                  set_sampler(0, 0);
                  return ret;
        }
        p = strchr(ptr, ':');
        if (!p) {
                printk(KERN_ERR "ipt_NETFLOW: sampler parse error (%s '%s').\n",
                    "no interval specified", ptr);
                set_sampler(0, 0);
                return -EINVAL;
        }
        val = simple_atoi(++p);
        if (val < 2 || val > SAMPLER_INTERVAL_M) {
                printk(KERN_ERR "ipt_NETFLOW: sampler parse error (%s '%s').\n",
                    "illegal interval", p);
                set_sampler(0, 0);
                return -EINVAL;
        }
        set_sampler(mode, val);
        return 0;
}
#endif

#ifdef SNMP_RULES
/* source string: eth:100,ppp:200,vlan:300 */
/* reformat to: length[1], prefix[len], offset[2], ..., null[1]. */
static int parse_snmp_rules(char *ptr, unsigned char *dst)
{
        int osize = 0;

        while (*ptr) {
                char *prefix = ptr;
                unsigned int number;
                int len, lsize;
                char *p;

                p = strchr(ptr, ':');
                if (!p)
                        return -EINVAL;
                len = p - ptr;
                if (len == 0)
                        return -EINVAL;
                ptr += len;
                if (sscanf(ptr, ":%d%n", &number, &lsize) < 1)
                        return -EINVAL;
                ptr += lsize;
                if (*ptr) /* any separator will work */
                        ptr++;
                osize += 1 + len + 2;
                if (dst) {
                        *dst++ = len;
                        memcpy(dst, prefix, len);
                        dst += len;
                        *dst++ = (number >> 8) & 0xff;
                        *dst++ = number & 0xff;
                }
        }
        osize += 1;
        if (dst)
                *dst = '\0';
        return osize;
}

static int add_snmp_rules(char *ptr)
{
        int osize = parse_snmp_rules(ptr, NULL);
        char *dst;
        char *old;

        if (osize <= 0) {
                printk(KERN_ERR "ipt_NETFLOW: add_snmp_rules parse error.\n");
                strcpy(snmp_rules_buf, "parse error");
                return -EINVAL;
        }
        dst = kmalloc(osize, GFP_KERNEL);
        if (!dst) {
                strcpy(snmp_rules_buf, "no memory");
                printk(KERN_ERR "ipt_NETFLOW: add_snmp_rules no memory.\n");
                return -ENOMEM;
        }
        parse_snmp_rules(ptr, dst);
        spin_lock(&snmp_lock);
        old = snmp_ruleset;
        rcu_assign_pointer(snmp_ruleset, dst);
        spin_unlock(&snmp_lock);
        synchronize_rcu();
        if (old)
                kfree(old);
        return 0;
}

static inline int resolve_snmp(const struct net_device *ifc)
{
        const unsigned char *rules;

        if (!ifc)
                return -1;
        rules = rcu_dereference(snmp_ruleset);
        if (!rules)
                return ifc->ifindex;
        while (*rules) {
                const unsigned int len = *rules++;
                const char *ifname = ifc->name;

                if (!strncmp(ifname, rules, len)) {
                        rules += len;
                        return (rules[0] << 8) + rules[1] +
                                simple_atoi(ifname + len);
                }
                rules += len + 2;
        }
        return ifc->ifindex;
}
#endif /* SNMP_RULES */

/* count how much character c is in the string */
static size_t strncount(const char *s, size_t count, int c)
{
        size_t amount = 0;

        for (; count-- && *s != '\0'; ++s)
                if (*s == (char)c)
                        ++amount;
        return amount;
}

#define SEPARATORS " ,;\t\n"
static int add_destinations(const char *ptr)
{
        int len;

        for (; ptr; ptr += len) {
                struct sockaddr_storage ss;
                struct sockaddr_storage sbind = {};
                struct ipt_netflow_sock *usock;
                const char *end;
                int succ = 0;
                char name[IFNAMSIZ] = { 0 };

                /* skip initial separators */
                ptr += strspn(ptr, SEPARATORS);

                len = strcspn(ptr, SEPARATORS);
                if (!len)
                        break;
                memset(&ss, 0, sizeof(ss));

                if (strncount(ptr, len, ':') >= 2) {
                        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ss;
                        struct sockaddr_in6 *sout = (struct sockaddr_in6 *)&sbind;
                        const char *c = ptr;
                        int clen = len;

                        sin6->sin6_family = AF_INET6;
                        sin6->sin6_port = htons(2055);
                        if (*c == '[') {
                                ++c;
                                --clen;
                        }
                        succ = in6_pton(c, clen, (u8 *)&sin6->sin6_addr, -1, &end);
                        if (succ && *ptr == '[' && *end == ']')
                                ++end;
                        if (succ &&
                            (*end == ':' || *end == '.' || *end == 'p' || *end == '#'))
                                sin6->sin6_port = htons(simple_strtoul(++end, (char **)&end, 0));
                        if (succ && *end == '@') {
                                ++end;
                                sout->sin6_family = AF_INET6;
                                sout->sin6_port   = 0;
                                succ = in6_pton(end, strcspn(end, SEPARATORS), (u8 *)&sout->sin6_addr, -1, &end);
                        }
                } else {
                        struct sockaddr_in *sin  = (struct sockaddr_in *)&ss;
                        struct sockaddr_in *sout = (struct sockaddr_in *)&sbind;

                        sin->sin_family = AF_INET;
                        sin->sin_port = htons(2055);
                        succ = in4_pton(ptr, len, (u8 *)&sin->sin_addr, -1, &end);
                        if (succ && *end == ':')
                                sin->sin_port = htons(simple_strtoul(++end, (char **)&end, 0));
                        if (succ && *end == '@') {
                                ++end;
                                sout->sin_family = AF_INET;
                                sout->sin_port   = 0;
                                succ = in4_pton(end, strcspn(end, SEPARATORS), (u8 *)&sout->sin_addr, -1, &end);
                        }
                }
                if (succ && *end == '%') {
                        ++end;
                        snprintf(name, sizeof(name), "%.*s", (int)strcspn(end, SEPARATORS), end);
                }
                if (!succ) {
                        printk(KERN_ERR "ipt_NETFLOW: can't parse destination: %.*s\n",
                            len, ptr);
                        continue;
                }

                if (!(usock = vmalloc(sizeof(*usock)))) {
                        printk(KERN_ERR "ipt_NETFLOW: can't vmalloc socket\n");
                        return -ENOMEM;
                }
                memset(usock, 0, sizeof(*usock));
                usock->addr  = ss;
                usock->saddr = sbind;
                memcpy(usock->sdev, name, sizeof(usock->sdev));
                usock_connect(usock, 0);
                add_usock(usock);
        }
        return 0;
}

#ifdef ENABLE_AGGR
static void aggregation_remove(struct list_head *list)
{
        write_lock_bh(&aggr_lock);
        while (!list_empty(list)) {
                struct netflow_aggr_n *aggr; /* match netflow_aggr_p too */

                aggr = list_entry(list->next, struct netflow_aggr_n, list);
                list_del(&aggr->list);
                write_unlock_bh(&aggr_lock);
                vfree(aggr);
                write_lock_bh(&aggr_lock);
        }
        write_unlock_bh(&aggr_lock);
}

static int add_aggregation(char *ptr)
{
        struct netflow_aggr_n *aggr_n, *aggr, *tmp;
        struct netflow_aggr_p *aggr_p;
        LIST_HEAD(new_aggr_n_list);
        LIST_HEAD(new_aggr_p_list);
        LIST_HEAD(old_aggr_list);

        while (ptr && *ptr) {
                unsigned char ip[4];
                unsigned int mask;
                unsigned int port1, port2;
                unsigned int aggr_to;

                ptr += strspn(ptr, SEPARATORS);

                if (sscanf(ptr, "%hhu.%hhu.%hhu.%hhu/%u=%u",
                           ip, ip + 1, ip + 2, ip + 3, &mask, &aggr_to) == 6) {

                        if (!(aggr_n = vmalloc(sizeof(*aggr_n)))) {
                                printk(KERN_ERR "ipt_NETFLOW: can't vmalloc aggr\n");
                                return -ENOMEM;
                        }
                        memset(aggr_n, 0, sizeof(*aggr_n));

                        aggr_n->mask = bits2mask(mask);
                        aggr_n->addr = ntohl(*(__be32 *)ip) & aggr_n->mask;
                        aggr_n->aggr_mask = bits2mask(aggr_to);
                        aggr_n->prefix = mask;
                        printk(KERN_INFO "ipt_NETFLOW: add aggregation [%u.%u.%u.%u/%u=%u]\n",
                               HIPQUAD(aggr_n->addr), mask, aggr_to);
                        list_add_tail(&aggr_n->list, &new_aggr_n_list);

                } else if (sscanf(ptr, "%u-%u=%u", &port1, &port2, &aggr_to) == 3 ||
                           sscanf(ptr, "%u=%u", &port2, &aggr_to) == 2) {

                        if (!(aggr_p = vmalloc(sizeof(*aggr_p)))) {
                                printk(KERN_ERR "ipt_NETFLOW: can't vmalloc aggr\n");
                                return -ENOMEM;
                        }
                        memset(aggr_p, 0, sizeof(*aggr_p));

                        aggr_p->port1 = port1;
                        aggr_p->port2 = port2;
                        aggr_p->aggr_port = aggr_to;
                        printk(KERN_INFO "ipt_NETFLOW: add aggregation [%u-%u=%u]\n",
                               port1, port2, aggr_to);
                        list_add_tail(&aggr_p->list, &new_aggr_p_list);
                } else {
                        printk(KERN_ERR "ipt_NETFLOW: bad aggregation rule: %s (ignoring)\n", ptr);
                        break;
                }

                ptr = strpbrk(ptr, SEPARATORS);
        }

        /* swap lists */
        write_lock_bh(&aggr_lock);
        list_for_each_entry_safe(aggr, tmp, &aggr_n_list, list)
                list_move(&aggr->list, &old_aggr_list);
        list_for_each_entry_safe(aggr, tmp, &aggr_p_list, list)
                list_move(&aggr->list, &old_aggr_list);

        list_for_each_entry_safe(aggr, tmp, &new_aggr_n_list, list)
                list_move_tail(&aggr->list, &aggr_n_list);
        list_for_each_entry_safe(aggr, tmp, &new_aggr_p_list, list)
                list_move_tail(&aggr->list, &aggr_p_list);
        write_unlock_bh(&aggr_lock);
        aggregation_remove(&old_aggr_list);
        return 0;
}
#endif

#ifdef SAMPLING_HASH
static uint32_t hash_seed;
#define HASH_SEED hash_seed
#else
#define HASH_SEED 0
#endif
static inline u_int32_t __hash_netflow(const struct ipt_netflow_tuple *tuple)
{
        return murmur3(tuple, sizeof(struct ipt_netflow_tuple), HASH_SEED);
}

static inline u_int32_t hash_netflow(const struct ipt_netflow_tuple *tuple)
{
        return __hash_netflow(tuple) % htable_size;
}

static struct ipt_netflow *
ipt_netflow_find(const struct ipt_netflow_tuple *tuple, const unsigned int hash)
{
        struct ipt_netflow *nf;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0)
        struct hlist_node *pos;
#endif

        compat_hlist_for_each_entry(nf, pos, &htable[hash], hlist) {
                if (ipt_netflow_tuple_equal(tuple, &nf->tuple) &&
                    nf->nr_bytes < FLOW_FULL_WATERMARK) {
                        NETFLOW_STAT_INC(found);
                        return nf;
                }
                NETFLOW_STAT_INC(searched);
        }
        NETFLOW_STAT_INC(notfound);
        return NULL;
}

static struct hlist_head *alloc_hashtable(const int size)
{
        struct hlist_head *hash;

        hash = vmalloc(sizeof(struct hlist_head) * size);
        if (hash) {
                int i;

                for (i = 0; i < size; i++)
                        INIT_HLIST_HEAD(&hash[i]);
        } else
                printk(KERN_ERR "ipt_NETFLOW: unable to vmalloc hash table.\n");

        return hash;
}

static int set_hashsize(int new_size)
{
        struct hlist_head *new_hash, *old_hash;
        struct ipt_netflow *nf, *tmp;
        LIST_HEAD(all_list);
        int i;

        if (new_size < LOCK_COUNT)
                new_size = LOCK_COUNT;
        printk(KERN_INFO "ipt_NETFLOW: allocating new hash table %u -> %u buckets\n",
               htable_size, new_size);
        new_hash = alloc_hashtable(new_size);
        if (!new_hash)
                return -ENOMEM;

        /* rehash */
        write_lock_bh(&htable_rwlock);
        old_hash = htable;
        htable = new_hash;
        htable_size = new_size;
        for (i = 0; i < LOCK_COUNT; i++) {
                struct stripe_entry *stripe = &htable_stripes[i];
                spin_lock(&stripe->lock);
                list_splice_init(&stripe->list, &all_list);
                spin_unlock(&stripe->lock);
        }
        list_for_each_entry_safe(nf, tmp, &all_list, flows_list) {
                unsigned int hash;
                struct stripe_entry *stripe;

                hash = hash_netflow(&nf->tuple);
                stripe = &htable_stripes[hash & LOCK_COUNT_MASK];
                spin_lock(&stripe->lock);
                list_move_tail(&nf->flows_list, &stripe->list);
                hlist_add_head(&nf->hlist, &htable[hash]);
                spin_unlock(&stripe->lock);
        }
        write_unlock_bh(&htable_rwlock);
        vfree(old_hash);

        return 0;
}

static struct ipt_netflow *
ipt_netflow_alloc(const struct ipt_netflow_tuple *tuple)
{
        struct ipt_netflow *nf;
        long count;

        nf = kmem_cache_alloc(ipt_netflow_cachep, GFP_ATOMIC);
        if (!nf) {
                printk(KERN_ERR "ipt_NETFLOW: Can't allocate flow.\n");
                return NULL;
        }

        memset(nf, 0, sizeof(*nf));
        nf->tuple = *tuple;

        count = atomic_inc_return(&ipt_netflow_count);
        if (count > peakflows) {
                peakflows = count;
                peakflows_at = jiffies;
        }

        return nf;
}

static void ipt_netflow_free(struct ipt_netflow *nf)
{
        if (IS_DUMMY_FLOW(nf))
                return;
        atomic_dec(&ipt_netflow_count);
        kmem_cache_free(ipt_netflow_cachep, nf);
}

/* cook pdu, send, and clean */
/* only called in scan worker path */
static void netflow_export_pdu_v5(void)
{
        struct timeval tv;
        int pdusize;

        if (!pdu_data_records)
                return;

        if (debug > 1)
                printk(KERN_INFO "netflow_export_pdu_v5 with %d records\n", pdu_data_records);

        pdu.v5.version          = htons(5);
        pdu.v5.nr_records       = htons(pdu_data_records);
        pdu.v5.ts_uptime        = htonl(jiffies_to_msecs(jiffies));
        do_gettimeofday(&tv);
        pdu.v5.ts_usecs         = htonl(tv.tv_sec);
        pdu.v5.ts_unsecs        = htonl(tv.tv_usec);
        pdu.v5.seq              = htonl(pdu_seq);
        //pdu.v5.eng_type       = 0;
        pdu.v5.eng_id           = (__u8)engine_id;
#ifdef ENABLE_SAMPLER
        pdu.v5.sampling         = htons(sampler_nf_v5());
#endif
        pdusize = NETFLOW5_HEADER_SIZE + sizeof(struct netflow5_record) * pdu_data_records;

        netflow_sendmsg(&pdu.v5, pdusize);

        pdu_packets = 0;
        pdu_traf    = 0;

        pdu_seq += pdu_data_records;
        pdu_count++;
        pdu_flow_records = pdu_data_records = 0;
}

/* only called in scan worker path */
static void netflow_export_flow_v5(struct ipt_netflow *nf)
{
        struct netflow5_record *rec;

        if (unlikely(debug > 2))
                printk(KERN_INFO "adding flow to export (%d)\n", pdu_data_records);

        pdu_packets += nf->nr_packets;
        pdu_traf += nf->nr_bytes;
        pdu_ts_mod = jiffies;
        rec = &pdu.v5.flow[pdu_data_records++];
        pdu_flow_records++;

        /* make V5 flow record */
        rec->s_addr     = nf->tuple.src.ip;
        rec->d_addr     = nf->tuple.dst.ip;
        rec->nexthop    = nf->nh.ip;
#ifdef SNMP_RULES
        rec->i_ifc      = htons(nf->i_ifcr);
        rec->o_ifc      = htons(nf->o_ifcr);
#else
        rec->i_ifc      = htons(nf->tuple.i_ifc);
        rec->o_ifc      = htons(nf->o_ifc);
#endif
        rec->nr_packets = htonl(nf->nr_packets);
        rec->nr_octets  = htonl(nf->nr_bytes);
        rec->first_ms   = htonl(jiffies_to_msecs(nf->nf_ts_first));
        rec->last_ms    = htonl(jiffies_to_msecs(nf->nf_ts_last));
        rec->s_port     = nf->tuple.s_port;
        rec->d_port     = nf->tuple.d_port;
        //rec->reserved = 0; /* pdu is always zeroized for v5 in netflow_switch_version */
        rec->tcp_flags  = nf->tcp_flags;
        rec->protocol   = nf->tuple.protocol;
        rec->tos        = nf->tuple.tos;
#ifdef CONFIG_NF_NAT_NEEDED
        rec->s_as       = nf->s_as;
        rec->d_as       = nf->d_as;
#endif
        rec->s_mask     = nf->s_mask;
        rec->d_mask     = nf->d_mask;
        //rec->padding  = 0;
        ipt_netflow_free(nf);

        if (pdu_data_records == NETFLOW5_RECORDS_MAX)
                netflow_export_pdu_v5();
}

/* pdu is initially blank, export current pdu, and prepare next for filling. */
static void netflow_export_pdu_v9(void)
{
        struct timeval tv;
        int pdusize;

        if (pdu_data_used <= pdu.v9.data)
                return;

        if (debug > 1)
                printk(KERN_INFO "netflow_export_pdu_v9 with %d records\n",
                    pdu_data_records + pdu_tpl_records);

        pdu.v9.version          = htons(9);
        pdu.v9.nr_records       = htons(pdu_data_records + pdu_tpl_records);
        pdu.v9.sys_uptime_ms    = htonl(jiffies_to_msecs(jiffies));
        do_gettimeofday(&tv);
        pdu.v9.export_time_s    = htonl(tv.tv_sec);
        pdu.v9.seq              = htonl(pdu_seq);
        pdu.v9.source_id        = htonl(engine_id);

        pdusize = pdu_data_used - (unsigned char *)&pdu.v9;

        netflow_sendmsg(&pdu.v9, pdusize);

        pdu_packets = 0;
        pdu_traf    = 0;

        pdu_seq++;
        pdu_count++;
        pdu_flow_records = pdu_data_records = pdu_tpl_records = 0;
        pdu_data_used = pdu.v9.data;
        pdu_flowset = NULL;
}

static void netflow_export_pdu_ipfix(void)
{
        struct timeval tv;
        int pdusize;

        if (pdu_data_used <= pdu.ipfix.data)
                return;

        if (debug > 1)
                printk(KERN_INFO "netflow_export_pduX with %d records\n",
                    pdu_data_records);

        pdu.ipfix.version       = htons(10);
        do_gettimeofday(&tv);
        pdu.ipfix.export_time_s = htonl(tv.tv_sec);
        pdu.ipfix.seq           = htonl(pdu_seq);
        pdu.ipfix.odomain_id    = htonl(engine_id);
        pdusize = pdu_data_used - (unsigned char *)&pdu;
        pdu.ipfix.length        = htons(pdusize);

        netflow_sendmsg(&pdu.ipfix, pdusize);

        pdu_packets = 0;
        pdu_traf    = 0;

        pdu_seq += pdu_data_records;
        pdu_count++;
        pdu_flow_records = pdu_data_records = pdu_tpl_records = 0;
        pdu_data_used = pdu.ipfix.data;
        pdu_flowset = NULL;
}

static inline int pdu_have_space(const size_t size)
{
        return ((pdu_data_used + size) <= pdu_high_wm);
}

static inline unsigned char *pdu_grab_space(const size_t size)
{
        unsigned char *ptr = pdu_data_used;
        pdu_data_used += size;
        return ptr;
}

static inline void pdu_rewind_space(const size_t size)
{
        pdu_data_used -= size;
}

/* allocate data space in pdu, or export (reallocate) and fail. */
static inline unsigned char *pdu_alloc_fail_export(const size_t size)
{
        if (unlikely(!pdu_have_space(size))) {
                netflow_export_pdu();
                return NULL;
        }
        return pdu_grab_space(size);
}

/* doesn't fail, but can provide empty pdu. */
static unsigned char *pdu_alloc_export(const size_t size)
{
        return pdu_alloc_fail_export(size) ?: pdu_grab_space(size);
}

/* global table of sizes of template field types */
#define two(id, a, b, len)      [id] = len,
#define one(id, a, len)         [id] = len,
static u_int8_t tpl_element_sizes[] = {
        Elements
};
#undef two
#undef one

#define TEMPLATES_HASH_BSIZE    8
#define TEMPLATES_HASH_SIZE     (1<<TEMPLATES_HASH_BSIZE)
static struct hlist_head templates_hash[TEMPLATES_HASH_SIZE];

struct base_template {
        int length; /* number of elements in template */
        u_int16_t types[]; /* {type, size} pairs */
};

/* Data Templates */
#define BTPL_BASE9      0x00000001      /* netflow base stat */
#define BTPL_BASEIPFIX  0x00000002      /* ipfix base stat */
#define BTPL_IP4        0x00000004      /* IPv4 */
#define BTPL_MASK4      0x00000008      /* Aggregated */
#define BTPL_PORTS      0x00000010      /* UDP&TCP */
#define BTPL_IP6        0x00000020      /* IPv6 */
#define BTPL_ICMP9      0x00000040      /* ICMP (for V9) */
#define BTPL_ICMPX4     0x00000080      /* ICMP IPv4 (for IPFIX) */
#define BTPL_ICMPX6     0x00000100      /* ICMP IPv6 (for IPFIX) */
#define BTPL_IGMP       0x00000200      /* IGMP */
#define BTPL_IPSEC      0x00000400      /* AH&ESP */
#define BTPL_NAT4       0x00000800      /* NAT IPv4 */
#define BTPL_LABEL6     0x00001000      /* IPv6 flow label */
#define BTPL_IP4OPTIONS 0x00002000      /* IPv4 Options */
#define BTPL_IP6OPTIONS 0x00004000      /* IPv6 Options */
#define BTPL_TCPOPTIONS 0x00008000      /* TCP Options */
#define BTPL_MAC        0x00010000      /* MAC addresses */
#define BTPL_VLAN9      0x00020000      /* outer VLAN for v9 */
#define BTPL_VLANX      0x00040000      /* outer VLAN for IPFIX */
#define BTPL_VLANI      0x00080000      /* inner VLAN (IPFIX) */
#define BTPL_ETHERTYPE  0x00100000      /* ethernetType */
#define BTPL_DIRECTION  0x00200000      /* flowDirection */
#define BTPL_SAMPLERID  0x00400000      /* samplerId (v9) */
#define BTPL_SELECTORID 0x00800000      /* selectorId (IPFIX) */
#define BTPL_MPLS       0x01000000      /* MPLS stack */
#define BTPL_OPTION     0x80000000      /* Options Template */
#define BTPL_MAX        32
/* Options Templates */
#define OTPL(x) (BTPL_OPTION | x)
#define OTPL_SYSITIME   OTPL(1)         /* systemInitTimeMilliseconds */
#define OTPL_MPSTAT     OTPL(2)         /* The Metering Process Statistics (rfc5101) */
#define OTPL_MPRSTAT    OTPL(3)         /* The Metering Process Reliability Statistics */
#define OTPL_EPRSTAT    OTPL(4)         /* The Exporting Process Reliability Statistics */
#define OTPL_SAMPLER    OTPL(5)         /* Flow Sampler for v9 */
#define OTPL_SEL_RAND   OTPL(6)         /* Random Flow Selector for IPFIX */
#define OTPL_SEL_COUNT  OTPL(7)         /* Systematic count-based Flow Selector for IPFIX */
#define OTPL_SEL_STAT   OTPL(8)         /* rfc7014 */
#define OTPL_SEL_STATH  OTPL(9)         /* OTPL_SEL_STAT, except selectorIDTotalFlowsObserved */
#define OTPL_IFNAMES    OTPL(10)

static struct base_template template_base_9 = {
        .types = {
                INPUT_SNMP,
                OUTPUT_SNMP,
#ifdef ENABLE_PHYSDEV
                ingressPhysicalInterface,
                egressPhysicalInterface,
#endif
                IN_PKTS,
                IN_BYTES,
                FIRST_SWITCHED,
                LAST_SWITCHED,
                PROTOCOL,
                TOS,
                0
        }
};
static struct base_template template_base_ipfix = {
        .types = {
                ingressInterface,
                egressInterface,
#ifdef ENABLE_PHYSDEV
                ingressPhysicalInterface,
                egressPhysicalInterface,
#endif
                packetDeltaCount,
                octetDeltaCount,
                flowStartMilliseconds,
                flowEndMilliseconds,
                protocolIdentifier,
                ipClassOfService,
                flowEndReason,
                0
        }
};
#ifdef ENABLE_MAC
static struct base_template template_mac_ipfix = {
        .types = {
                destinationMacAddress,
                sourceMacAddress,
                0
        }
};
#endif
#if defined(ENABLE_MAC) || defined(ENABLE_VLAN)
static struct base_template template_ethertype = {
        .types = { ethernetType, 0 }
};
#endif
#ifdef ENABLE_VLAN
static struct base_template template_vlan_v9 = {
        .types = { SRC_VLAN, 0 }
};
/* IPFIX is different from v9, see rfc7133. */
static struct base_template template_vlan_ipfix = {
        .types = {
                dot1qVlanId,
                dot1qPriority,
                0
        }
};
static struct base_template template_vlan_inner = {
        .types = {
                dot1qCustomerVlanId,
                dot1qCustomerPriority,
                0
        }
};
#endif
#ifdef MPLS_DEPTH
static struct base_template template_mpls = {
        .types = {
                mplsTopLabelTTL,
                /* do not just add element here, becasue this array
                 * is truncated in ipt_netflow_init() */
#define MPLS_LABELS_BASE_INDEX 1
                MPLS_LABEL_1,
                MPLS_LABEL_2,
                MPLS_LABEL_3,
                MPLS_LABEL_4,
                MPLS_LABEL_5,
                MPLS_LABEL_6,
                MPLS_LABEL_7,
                MPLS_LABEL_8,
                MPLS_LABEL_9,
                MPLS_LABEL_10,
                0
        }
};
#endif
#ifdef ENABLE_DIRECTION
static struct base_template template_direction = {
        .types = { DIRECTION, 0 }
};
#endif
static struct base_template template_ipv4 = {
        .types = {
                IPV4_SRC_ADDR,
                IPV4_DST_ADDR,
                IPV4_NEXT_HOP,
                0
        }
};
static struct base_template template_options4 = {
        .types = { ipv4Options, 0 }
};
static struct base_template template_tcpoptions = {
        .types = { tcpOptions, 0 }
};
static struct base_template template_ipv6 = {
        .types = {
                IPV6_SRC_ADDR,
                IPV6_DST_ADDR,
                IPV6_NEXT_HOP,
                0
        }
};
static struct base_template template_options6 = {
        .types = { IPV6_OPTION_HEADERS, 0 }
};
static struct base_template template_label6 = {
        .types = { IPV6_FLOW_LABEL, 0 }
};
static struct base_template template_ipv4_mask = {
        .types = {
                SRC_MASK,
                DST_MASK,
                0
        }
};
static struct base_template template_ports = {
        .types = {
                L4_SRC_PORT,
                L4_DST_PORT,
                TCP_FLAGS,
                0
        }
};
static struct base_template template_icmp_v9 = {
        .types = {
                L4_SRC_PORT,    /* dummy (required by some collector(s) to
                                   recognize ICMP flows) */
                L4_DST_PORT,    /* actually used in V9 world instead of
                                   ICMP_TYPE(32), disregarding docs */
                0
        }
};
static struct base_template template_icmp_ipv4 = {
        .types = { icmpTypeCodeIPv4, 0 }
};
static struct base_template template_icmp_ipv6 = {
        .types = { icmpTypeCodeIPv6, 0 }
};
static struct base_template template_igmp = {
        .types = { MUL_IGMP_TYPE, 0 }
};
static struct base_template template_ipsec = {
        .types = { IPSecSPI, 0 }
};
static struct base_template template_nat4 = {
        .types = {
                observationTimeMilliseconds,
                IPV4_SRC_ADDR,
                IPV4_DST_ADDR,
                postNATSourceIPv4Address,
                postNATDestinationIPv4Address,
                L4_SRC_PORT,
                L4_DST_PORT,
                postNAPTSourceTransportPort,
                postNAPTDestinationTransportPort,
                PROTOCOL,
                natEvent,
                0
        }
};

static struct base_template template_sys_init_time = {
        .types = {
                observationDomainId,

                /* ipfix does not report sys_uptime_ms like v9 does,
                 * so this could be useful to detect system restart
                 * (rfc5102), and conversion of flow times to absolute
                 * time (rfc5153 4.7) */
                systemInitTimeMilliseconds,

                /* this will let collector detect module version and
                 * recompilation (by srcversion) */
                observationDomainName,

                /* useful to detect module reload */
                flowStartMilliseconds,
                flowEndMilliseconds,
                0
        }
};

/* http://tools.ietf.org/html/rfc5101#section-4 */
/* The Metering Process Statistics Option Template */
static struct base_template template_meter_stat = {
        .types = {
                observationDomainId,
                exportedMessageTotalCount,
                exportedFlowRecordTotalCount,
                exportedOctetTotalCount,
                observedFlowTotalCount,
                0
        }
};
/* The Metering Process Reliability Statistics Option Template */
static struct base_template template_meter_rel_stat = {
        .types = {
                observationDomainId,
                ignoredPacketTotalCount,
                ignoredOctetTotalCount,
                flowStartMilliseconds, /* sampling start time */
                flowEndMilliseconds,
                0
        }
};
/* The Exporting Process Reliability Statistics Option Template */
static struct base_template template_exp_rel_stat = {
        .types = {
                exportingProcessId,
                notSentFlowTotalCount,
                notSentPacketTotalCount,
                notSentOctetTotalCount,
                flowStartMilliseconds, /* sampling start time */
                flowEndMilliseconds,
                0
        }
};

#ifdef ENABLE_SAMPLER
static struct base_template template_samplerid = {
        .types = { FLOW_SAMPLER_ID, 0 }
};
static struct base_template template_selectorid = {
        .types = { selectorId, 0 }
};

/* sampler for v9 */
static struct base_template template_sampler = {
        .types = {
                observationDomainId,
                FLOW_SAMPLER_ID,
                FLOW_SAMPLER_MODE,
                FLOW_SAMPLER_RANDOM_INTERVAL,
                0
        }
};
/* sampler for ipfix */
static struct base_template template_selector_systematic = {
        .types = {
                observationDomainId,
                selectorId,
                flowSelectorAlgorithm,
                samplingFlowInterval,
                samplingFlowSpacing,
                0
        }
};
static struct base_template template_selector_random = {
        .types = {
                observationDomainId,
                selectorId,
                flowSelectorAlgorithm,
                samplingSize,
                samplingPopulation,
                0
        }
};
static struct base_template template_selector_stat = {
        .types = {
                selectorId,
                selectorIDTotalFlowsObserved,
                selectorIDTotalFlowsSelected,
                selectorIdTotalPktsObserved,
                selectorIdTotalPktsSelected,
                flowStartMilliseconds,
                flowEndMilliseconds,
                0
        }
};
/* can't calc selectorIDTotalFlowsObserved for hash sampling,
 * because dropped flows are not accounted */
static struct base_template template_selector_stat_hash = {
        .types = {
                selectorId,
                selectorIDTotalFlowsSelected,
                selectorIdTotalPktsObserved,
                selectorIdTotalPktsSelected,
                flowStartMilliseconds,
                flowEndMilliseconds,
                0
        }
};
#endif

static struct base_template template_interfaces = {
        .types = {
                observationDomainId,
                INPUT_SNMP,
                IF_NAME,
                IF_DESC,
                0
        }
};

struct data_template {
        struct hlist_node hlist;
        unsigned int tpl_key;

        char options;   /* is it Options Template */
        short length;   /* number of elements in template */
        short tpl_size; /* whole size of template itself (with header), for alloc */
        short rec_size; /* size of one template record (w/o header) */
        int template_id_n; /* uassigned from template_ids, network order. */
        int             exported_cnt;
        unsigned long   exported_ts; /* last exported (jiffies) */
        u_int16_t fields[]; /* {type, size} pairs */
} __attribute__ ((packed));

#define TPL_FIELD_NSIZE 4 /* one complete template field's network size */

static void free_templates(void)
{
        int i;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0)
        struct hlist_node *pos;
#endif
        struct hlist_node *tmp;

        for (i = 0; i < TEMPLATES_HASH_SIZE; i++) {
                struct hlist_head *thead = &templates_hash[i];
                struct data_template *tpl;

                compat_hlist_for_each_entry_safe(tpl, pos, tmp, thead, hlist)
                        kfree(tpl);
                INIT_HLIST_HEAD(thead);
        }
        tpl_count = 0;

        /* reinitialize template timeouts */
        ts_sysinf_last = ts_stat_last = 0;
#ifdef ENABLE_SAMPLER
        ts_sampler_last = 0;
#endif
}

/* find old, or create new combined template from template key (tmask) */
static struct data_template *get_template(const unsigned int tmask)
{
        struct base_template *tlist[BTPL_MAX];
        struct data_template *tpl;
        int tnum;
        int length;
        int i, j, k;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0)
        struct hlist_node *pos;
#endif
        int hash = hash_long(tmask, TEMPLATES_HASH_BSIZE);

        compat_hlist_for_each_entry(tpl, pos, &templates_hash[hash], hlist)
                if (tpl->tpl_key == tmask)
                        return tpl;

        tnum = 0;
        /* assemble array of base_templates from template key */
        /* NB: this should not have exporting protocol dependent checks */
        if (tmask & BTPL_OPTION) {
                switch (tmask) {
                case OTPL_SYSITIME:
                        tlist[tnum++] = &template_sys_init_time;
                        break;
                case OTPL_MPSTAT:
                        tlist[tnum++] = &template_meter_stat;
                        break;
                case OTPL_MPRSTAT:
                        tlist[tnum++] = &template_meter_rel_stat;
                        break;
                case OTPL_EPRSTAT:
                        tlist[tnum++] = &template_exp_rel_stat;
                        break;
#ifdef ENABLE_SAMPLER
                case OTPL_SAMPLER:
                        tlist[tnum++] = &template_sampler;
                        break;
                case OTPL_SEL_RAND:
                        tlist[tnum++] = &template_selector_random;
                        break;
                case OTPL_SEL_COUNT:
                        tlist[tnum++] = &template_selector_systematic;
                        break;
                case OTPL_SEL_STAT:
                        tlist[tnum++] = &template_selector_stat;
                        break;
                case OTPL_SEL_STATH:
                        tlist[tnum++] = &template_selector_stat_hash;
                        break;
#endif
                case OTPL_IFNAMES:
                        tlist[tnum++] = &template_interfaces;
                        break;
                }
        } else {
                if (tmask & BTPL_IP4) {
                        tlist[tnum++] = &template_ipv4;
                        if (tmask & BTPL_IP4OPTIONS)
                                tlist[tnum++] = &template_options4;
                        if (tmask & BTPL_MASK4)
                                tlist[tnum++] = &template_ipv4_mask;
                        if (tmask & BTPL_ICMPX4)
                                tlist[tnum++] = &template_icmp_ipv4;
                } else if (tmask & BTPL_IP6) {
                        tlist[tnum++] = &template_ipv6;
                        if (tmask & BTPL_LABEL6)
                                tlist[tnum++] = &template_label6;
                        if (tmask & BTPL_IP6OPTIONS)
                                tlist[tnum++] = &template_options6;
                        if (tmask & BTPL_ICMPX6)
                                tlist[tnum++] = &template_icmp_ipv6;
                } else if (tmask & BTPL_NAT4)
                        tlist[tnum++] = &template_nat4;
                if (tmask & BTPL_PORTS)
                        tlist[tnum++] = &template_ports;
                else if (tmask & BTPL_ICMP9)
                        tlist[tnum++] = &template_icmp_v9;
                if (tmask & BTPL_BASE9)
                        tlist[tnum++] = &template_base_9;
                else if (tmask & BTPL_BASEIPFIX)
                        tlist[tnum++] = &template_base_ipfix;
                if (tmask & BTPL_TCPOPTIONS)
                        tlist[tnum++] = &template_tcpoptions;
                if (tmask & BTPL_IGMP)
                        tlist[tnum++] = &template_igmp;
                if (tmask & BTPL_IPSEC)
                        tlist[tnum++] = &template_ipsec;
#ifdef ENABLE_MAC
                if (tmask & BTPL_MAC)
                        tlist[tnum++] = &template_mac_ipfix;
#endif
#ifdef ENABLE_VLAN
                if (tmask & BTPL_VLAN9)
                        tlist[tnum++] = &template_vlan_v9;
                else {
                        if (tmask & BTPL_VLANX)
                                tlist[tnum++] = &template_vlan_ipfix;
                        if (tmask & BTPL_VLANI)
                                tlist[tnum++] = &template_vlan_inner;
                }
#endif
#if defined(ENABLE_MAC) || defined(ENABLE_VLAN)
                if (tmask & BTPL_ETHERTYPE)
                        tlist[tnum++] = &template_ethertype;
#endif
#ifdef MPLS_DEPTH
                if (tmask & BTPL_MPLS)
                        tlist[tnum++] = &template_mpls;
#endif
#ifdef ENABLE_DIRECTION
                if (tmask & BTPL_DIRECTION)
                        tlist[tnum++] = &template_direction;
#endif
#ifdef ENABLE_SAMPLER
                if (tmask & BTPL_SAMPLERID)
                        tlist[tnum++] = &template_samplerid;
                else if (tmask & BTPL_SELECTORID)
                        tlist[tnum++] = &template_selectorid;
#endif
        } /* !BTPL_OPTION */

        /* calculate resulting template length
         * and update base_template array lengths  */
        length = 0;
        for (i = 0; i < tnum; i++) {
                if (!tlist[i]->length) {
                        for (k = 0; tlist[i]->types[k]; k++);
                        tlist[i]->length = k;
                }
                length += tlist[i]->length;
        }
        /* elements are [type, len] pairs + one termiantor */
        tpl = kmalloc(sizeof(struct data_template) + (length * 2 + 1) * sizeof(u_int16_t), GFP_KERNEL);
        if (!tpl) {
                printk(KERN_ERR "ipt_NETFLOW: unable to kmalloc template (%#x).\n", tmask);
                return NULL;
        }
        tpl->tpl_key = tmask;
        tpl->options = (tmask & BTPL_OPTION) != 0;
        if (tpl->options)
                tpl->tpl_size = sizeof(struct flowset_opt_tpl_v9); /* ipfix is of the same size */
        else
                tpl->tpl_size = sizeof(struct flowset_template);
        tpl->length = length;
        tpl->rec_size = 0;
        tpl->template_id_n = htons(template_ids++);
        tpl->exported_cnt = 0;
        tpl->exported_ts = 0;

        /* construct resulting data_template and fill lengths */
        j = 0;
        for (i = 0; i < tnum; i++) {
                struct base_template *btpl = tlist[i];

                for (k = 0; k < btpl->length; k++) {
                        int size;
                        int type = btpl->types[k];

                        tpl->fields[j++] = type;
                        size = tpl_element_sizes[type];
                        tpl->fields[j++] = size;
                        tpl->rec_size += size;
                }
                tpl->tpl_size += btpl->length * TPL_FIELD_NSIZE;
        }
        tpl->fields[j++] = 0;

        hlist_add_head(&tpl->hlist, &templates_hash[hash]);
        tpl_count++;

        return tpl;
}

static u_int16_t scope_ipfix_to_v9(const u_int16_t elem)
{
        switch (elem) {
        case observationDomainId:
        case meteringProcessId:
        case exportingProcessId:
                return V9_SCOPE_SYSTEM;
        case ingressInterface:
        case portId:
                return V9_SCOPE_INTERFACE;
        case observationPointId:
        case LineCardId:
                return V9_SCOPE_LINECARD;
        case TemplateId:
                return V9_SCOPE_TEMPLATE;
        default:
                return -1;
        }
}

/* add template of any type and version */
static void pdu_add_template(struct data_template *tpl)
{
        __u8 *ptr;
        struct flowset_template *ntpl;
        __be16 *sptr, *fields;
        size_t added_size = 0;

        /* for options template we also make sure there is enough
         * room in the packet for one record, with flowset header */
        if (tpl->options)
                added_size = sizeof(struct flowset_data) + tpl->rec_size;
        ptr = pdu_alloc_export(tpl->tpl_size + added_size);
        pdu_rewind_space(added_size);
        ntpl = (void *)ptr;

        /* first three fields are equal for all types of templates */
        if (tpl->options)
                ntpl->flowset_id = protocol == 9? htons(FLOWSET_OPTIONS) : htons(IPFIX_OPTIONS);
        else
                ntpl->flowset_id = protocol == 9? htons(FLOWSET_TEMPLATE) : htons(IPFIX_TEMPLATE);
        ntpl->length      = htons(tpl->tpl_size);
        ntpl->template_id = tpl->template_id_n;

        if (tpl->options) {
                /* option templates should be defined with first element being scope */
                if (protocol == 9) {
                        struct flowset_opt_tpl_v9 *otpl = (void *)ptr;

                        otpl->scope_len   = htons(TPL_FIELD_NSIZE);
                        otpl->opt_len     = htons((tpl->length - 1) * TPL_FIELD_NSIZE);
                        ptr += sizeof(struct flowset_opt_tpl_v9);
                } else {
                        struct flowset_opt_tpl_ipfix *otpl = (void *)ptr;

                        otpl->field_count = htons(tpl->length);
                        otpl->scope_count = htons(1);
                        ptr += sizeof(struct flowset_opt_tpl_ipfix);
                }
        } else {
                ntpl->field_count = htons(tpl->length);
                ptr += sizeof(struct flowset_template);
        }

        sptr = (__be16 *)ptr;
        fields = tpl->fields;
        if (tpl->options && protocol == 9) {
                /* v9 scope */
                *sptr++ = htons(scope_ipfix_to_v9(*fields++));
                *sptr++ = htons(*fields++);
        }
        for (;;) {
                const int type = *fields++;
                if (!type)
                        break;
                *sptr++ = htons(type);
                *sptr++ = htons(*fields++);
        }

        tpl->exported_cnt = pdu_count;
        tpl->exported_ts = jiffies;

        pdu_flowset = NULL;
        pdu_tpl_records++;
}

#ifdef ENABLE_DIRECTION
static inline __u8 hook2dir(const __u8 hooknum)
{
        switch (hooknum) {
        case NF_INET_PRE_ROUTING:
        case NF_INET_LOCAL_IN:
                return 0;
        case NF_INET_LOCAL_OUT:
        case NF_INET_POST_ROUTING:
                return 1;
        default:
                return -1;
        }
}
#endif

static inline void put_unaligned_be24(u32 val, unsigned char *p)
{
        *p++ = val >> 16;
        put_unaligned_be16(val, p);
}

static struct {
        s64             ms;      /* this much abs milliseconds */
        unsigned long   jiffies; /* is that much jiffies */
} jiffies_base;

/* prepare for jiffies_to_ms_abs() batch */
static void set_jiffies_base(void)
{
        ktime_t ktime;

        /* try to get them atomically */
        local_bh_disable();
        jiffies_base.jiffies = jiffies;
        ktime = ktime_get_real();
        local_bh_enable();

        jiffies_base.ms = ktime_to_ms(ktime);
}

/* convert jiffies to ktime and rebase to unix epoch */
static inline s64 jiffies_to_ms_abs(unsigned long j)
{
        long jdiff = jiffies_base.jiffies - j;

        if (likely(jdiff >= 0))
                return jiffies_base.ms - (s64)jiffies_to_msecs(jdiff);
        else
                return jiffies_base.ms + (s64)jiffies_to_msecs(-jdiff);
}

typedef struct in6_addr in6_t;
/* encode one field (data records only) */
static inline void add_tpl_field(__u8 *ptr, const int type, const struct ipt_netflow *nf)
{
        switch (type) {
        case IN_BYTES:       put_unaligned_be32(nf->nr_bytes, ptr); break;
        case IN_PKTS:        put_unaligned_be32(nf->nr_packets, ptr); break;
        case FIRST_SWITCHED: put_unaligned_be32(jiffies_to_msecs(nf->nf_ts_first), ptr); break;
        case LAST_SWITCHED:  put_unaligned_be32(jiffies_to_msecs(nf->nf_ts_last), ptr); break;
        case flowStartMilliseconds: put_unaligned_be64(jiffies_to_ms_abs(nf->nf_ts_first), ptr); break;
        case flowEndMilliseconds:   put_unaligned_be64(jiffies_to_ms_abs(nf->nf_ts_last), ptr); break;
        case IPV4_SRC_ADDR:  put_unaligned(nf->tuple.src.ip, (__be32 *)ptr); break;
        case IPV4_DST_ADDR:  put_unaligned(nf->tuple.dst.ip, (__be32 *)ptr); break;
        case IPV4_NEXT_HOP:  put_unaligned(nf->nh.ip, (__be32 *)ptr); break;
        case L4_SRC_PORT:    put_unaligned(nf->tuple.s_port, (__be16 *)ptr); break;
        case L4_DST_PORT:    put_unaligned(nf->tuple.d_port, (__be16 *)ptr); break;
#ifdef SNMP_RULES
        case INPUT_SNMP:     put_unaligned_be16(nf->i_ifcr, ptr); break;
        case OUTPUT_SNMP:    put_unaligned_be16(nf->o_ifcr, ptr); break;
#else
        case INPUT_SNMP:     put_unaligned_be16(nf->tuple.i_ifc, ptr); break;
        case OUTPUT_SNMP:    put_unaligned_be16(nf->o_ifc, ptr); break;
#endif
#ifdef ENABLE_PHYSDEV
        case ingressPhysicalInterface:
                             put_unaligned_be16(nf->i_ifphys, ptr); break;
        case egressPhysicalInterface:
                             put_unaligned_be16(nf->o_ifphys, ptr); break;
#endif
#ifdef ENABLE_VLAN
#define EXTRACT_VLAN_PRIO(tag) ((ntohs(tag) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT)
        case SRC_VLAN:
        case dot1qVlanId:    put_unaligned(nf->tuple.tag[0] & htons(VLAN_VID_MASK), (__be16 *)ptr); break;
        case dot1qPriority:            *ptr = EXTRACT_VLAN_PRIO(nf->tuple.tag[0]); break;
        case dot1qCustomerVlanId:
                             put_unaligned(nf->tuple.tag[1] & htons(VLAN_VID_MASK), (__be16 *)ptr); break;
        case dot1qCustomerPriority:    *ptr = EXTRACT_VLAN_PRIO(nf->tuple.tag[1]); break;
#endif
#if defined(ENABLE_MAC) || defined(ENABLE_VLAN)
        case ethernetType:   put_unaligned(nf->ethernetType, (__be16 *)ptr); break;
#endif
#ifdef ENABLE_MAC
        case destinationMacAddress: memcpy(ptr, &nf->tuple.h_dst, ETH_ALEN); break;
        case sourceMacAddress:      memcpy(ptr, &nf->tuple.h_src, ETH_ALEN); break;
#endif
#ifdef MPLS_DEPTH
# if __GNUC_PREREQ(4,6)
#  pragma GCC diagnostic push
#  pragma GCC diagnostic ignored "-Warray-bounds"
# endif
        case MPLS_LABEL_1:    memcpy(ptr, &nf->tuple.mpls[0], 3); break;
        case MPLS_LABEL_2:    memcpy(ptr, &nf->tuple.mpls[1], 3); break;
        case MPLS_LABEL_3:    memcpy(ptr, &nf->tuple.mpls[2], 3); break;
# if MPLS_DEPTH > 3
        case MPLS_LABEL_4:    memcpy(ptr, &nf->tuple.mpls[3], 3); break;
        case MPLS_LABEL_5:    memcpy(ptr, &nf->tuple.mpls[4], 3); break;
        case MPLS_LABEL_6:    memcpy(ptr, &nf->tuple.mpls[5], 3); break;
        case MPLS_LABEL_7:    memcpy(ptr, &nf->tuple.mpls[6], 3); break;
        case MPLS_LABEL_8:    memcpy(ptr, &nf->tuple.mpls[7], 3); break;
        case MPLS_LABEL_9:    memcpy(ptr, &nf->tuple.mpls[8], 3); break;
        case MPLS_LABEL_10:   memcpy(ptr, &nf->tuple.mpls[9], 3); break;
# endif
# if __GNUC_PREREQ(4,6)
#  pragma GCC diagnostic pop
# endif
        case mplsTopLabelTTL: *ptr = ntohl(nf->tuple.mpls[0]); break;
#endif
#ifdef ENABLE_DIRECTION
        case DIRECTION:                *ptr = hook2dir(nf->hooknumx - 1); break;
#endif
        case PROTOCOL:                 *ptr = nf->tuple.protocol; break;
        case TCP_FLAGS:                *ptr = nf->tcp_flags; break;
        case TOS:                      *ptr = nf->tuple.tos; break;
        case IPV6_SRC_ADDR:   *(in6_t *)ptr = nf->tuple.src.in6; break;
        case IPV6_DST_ADDR:   *(in6_t *)ptr = nf->tuple.dst.in6; break;
        case IPV6_NEXT_HOP:   *(in6_t *)ptr = nf->nh.in6; break;
        case IPV6_FLOW_LABEL: put_unaligned_be24(nf->flow_label, ptr); break;
        case tcpOptions:      put_unaligned_be32(nf->tcpoptions, ptr); break;
        case ipv4Options:     put_unaligned_be32(nf->options, ptr); break;
        case IPV6_OPTION_HEADERS:
                              put_unaligned_be16(nf->options, ptr); break;
        case SRC_MASK:                 *ptr = nf->s_mask; break;
        case DST_MASK:                 *ptr = nf->d_mask; break;
        case icmpTypeCodeIPv4:  /*FALLTHROUGH*/
        case icmpTypeCodeIPv6:  put_unaligned(nf->tuple.d_port, (__be16 *)ptr); break;
        case MUL_IGMP_TYPE:            *ptr = nf->tuple.d_port; break;
        case flowEndReason:            *ptr = nf->flowEndReason; break;
#ifdef CONFIG_NF_NAT_NEEDED
        case postNATSourceIPv4Address:         put_unaligned(nf->nat->post.s_addr, (__be32 *)ptr); break;
        case postNATDestinationIPv4Address:    put_unaligned(nf->nat->post.d_addr, (__be32 *)ptr); break;
        case postNAPTSourceTransportPort:      put_unaligned(nf->nat->post.s_port, (__be16 *)ptr); break;
        case postNAPTDestinationTransportPort: put_unaligned(nf->nat->post.d_port, (__be16 *)ptr); break;
        case natEvent:                 *ptr = nf->nat->nat_event; break;
#endif
        case IPSecSPI:       put_unaligned(EXTRACT_SPI(nf->tuple), (__be32 *)ptr); break;
        case observationTimeMilliseconds:
                             put_unaligned_be64(ktime_to_ms(nf->nf_ts_obs), ptr); break;
        case observationTimeMicroseconds:
                             put_unaligned_be64(ktime_to_us(nf->nf_ts_obs), ptr); break;
        case observationTimeNanoseconds:
                             put_unaligned_be64(ktime_to_ns(nf->nf_ts_obs), ptr); break;
#ifdef ENABLE_SAMPLER
        case FLOW_SAMPLER_ID:
        case selectorId:
                             *ptr = get_sampler_mode(); break;
#endif
        default:
                             WARN_ONCE(1, "NETFLOW: Unknown Element id %d\n", type);
                             memset(ptr, 0, tpl_element_sizes[type]);
        }
}

#define PAD_SIZE 4 /* rfc prescribes flowsets to be padded */

/* cache timeout_rate in jiffies */
static inline unsigned long timeout_rate_j(void)
{
        static unsigned int t_rate = 0;
        static unsigned long t_rate_j = 0;

        if (unlikely(timeout_rate != t_rate)) {
                struct timeval tv = { .tv_sec = timeout_rate * 60, .tv_usec = 0 };

                t_rate = timeout_rate;
                t_rate_j = timeval_to_jiffies(&tv);
        }
        return t_rate_j;
}

/* return buffer where to write records data */
static unsigned char *alloc_record_tpl(struct data_template *tpl)
{
        unsigned char *ptr;

        /* If previous write was to the same template and there is room, then we just add new record,
         * otherwise we (re)allocate flowset (and/or whole pdu). */
        if (!pdu_flowset ||
            pdu_flowset->flowset_id != tpl->template_id_n ||
            !(ptr = pdu_alloc_fail_export(tpl->rec_size))) {

                /* if there was previous data template we should pad it to 4 bytes */
                if (pdu_flowset) {
                        int padding = (PAD_SIZE - ntohs(pdu_flowset->length) % PAD_SIZE) % PAD_SIZE;
                        if (padding && (ptr = pdu_alloc_fail_export(padding))) {
                                pdu_flowset->length = htons(ntohs(pdu_flowset->length) + padding);
                                for (; padding; padding--)
                                        *ptr++ = 0;
                        }
                }

                /* export template if needed */
                if (!tpl->exported_ts ||
                    pdu_count > (tpl->exported_cnt + refresh_rate) ||
                    time_is_before_jiffies(tpl->exported_ts + timeout_rate_j())) {
                        pdu_add_template(tpl);
                }

                /* new flowset */
                ptr = pdu_alloc_export(sizeof(struct flowset_data) + tpl->rec_size);
                pdu_flowset             = (struct flowset_data *)ptr;
                pdu_flowset->flowset_id = tpl->template_id_n;
                pdu_flowset->length     = htons(sizeof(struct flowset_data));
                ptr += sizeof(struct flowset_data);
        }
        return ptr;
}

static unsigned char *alloc_record_key(const unsigned int t_key, struct data_template **ptpl)
{
        struct data_template *tpl;

        tpl = get_template(t_key);
        if (unlikely(!tpl)) {
                printk(KERN_INFO "ipt_NETFLOW: template %#x allocation failed.\n", t_key);
                NETFLOW_STAT_INC_ATOMIC(alloc_err);
                return NULL;
        }
        *ptpl = tpl;
        return alloc_record_tpl(tpl);
}

static void netflow_export_flow_tpl(struct ipt_netflow *nf)
{
        unsigned char *ptr;
        struct data_template *tpl;
        unsigned int tpl_mask;
        int i;

        if (unlikely(debug > 2))
                printk(KERN_INFO "adding flow to export (%d)\n",
                    pdu_data_records + pdu_tpl_records);

        /* build the template key */
#ifdef CONFIG_NF_NAT_NEEDED
        if (nf->nat) {
                tpl_mask = BTPL_NAT4;
                goto ready;
        }
#endif
        tpl_mask = (protocol == 9)? BTPL_BASE9 : BTPL_BASEIPFIX;
        if (likely(nf->tuple.l3proto == AF_INET)) {
                tpl_mask |= BTPL_IP4;
                if (unlikely(nf->options))
                        tpl_mask |= BTPL_IP4OPTIONS;
        } else {
                tpl_mask |= BTPL_IP6;
                if (unlikely(nf->options))
                        tpl_mask |= BTPL_IP6OPTIONS;
                if (unlikely(nf->flow_label))
                        tpl_mask |= BTPL_LABEL6;
        }
        if (unlikely(nf->tcpoptions))
                tpl_mask |= BTPL_TCPOPTIONS;
        if (unlikely(nf->s_mask || nf->d_mask))
                tpl_mask |= BTPL_MASK4;
        if (likely(nf->tuple.protocol == IPPROTO_TCP ||
                    nf->tuple.protocol == IPPROTO_UDP ||
                    nf->tuple.protocol == IPPROTO_SCTP ||
                    nf->tuple.protocol == IPPROTO_UDPLITE))
                tpl_mask |= BTPL_PORTS;
        else if (nf->tuple.protocol == IPPROTO_ICMP ||
                 nf->tuple.protocol == IPPROTO_ICMPV6) {
                if (protocol == 9)
                        tpl_mask |= BTPL_ICMP9;
                else if (likely(nf->tuple.l3proto == AF_INET))
                        tpl_mask |= BTPL_ICMPX4;
                else
                        tpl_mask |= BTPL_ICMPX6;
        } else if (nf->tuple.protocol == IPPROTO_IGMP)
                tpl_mask |= BTPL_IGMP;
        else if (nf->tuple.protocol == IPPROTO_AH ||
                    nf->tuple.protocol == IPPROTO_ESP)
                tpl_mask |= BTPL_IPSEC;
#ifdef ENABLE_MAC
        if (!is_zero_ether_addr(nf->tuple.h_src) ||
            !is_zero_ether_addr(nf->tuple.h_dst))
                tpl_mask |= BTPL_MAC;
#endif
#ifdef ENABLE_VLAN
        if (nf->tuple.tag[0]) {
                if (protocol == 9)
                        tpl_mask |= BTPL_VLAN9;
                else {
                        tpl_mask |= BTPL_VLANX;
                        if (nf->tuple.tag[1])
                                tpl_mask |= BTPL_VLANI;
                }
        }
#endif
#if defined(ENABLE_MAC) || defined(ENABLE_VLAN)
        if (nf->ethernetType)
                tpl_mask |= BTPL_ETHERTYPE;
#endif
#ifdef MPLS_DEPTH
        if (nf->tuple.mpls[0])
                tpl_mask |= BTPL_MPLS;
#endif
#ifdef ENABLE_DIRECTION
        if (nf->hooknumx)
                tpl_mask |= BTPL_DIRECTION;
#endif
#ifdef ENABLE_SAMPLER
        if (get_sampler_mode())
                tpl_mask |= (protocol == 9)? BTPL_SAMPLERID : BTPL_SELECTORID;
#endif

#ifdef CONFIG_NF_NAT_NEEDED
ready:
#endif
        ptr = alloc_record_key(tpl_mask, &tpl);
        if (unlikely(!ptr)) {
                NETFLOW_STAT_ADD(pkt_lost, nf->nr_packets);
                NETFLOW_STAT_ADD(traf_lost, nf->nr_bytes);
                NETFLOW_STAT_INC(flow_lost);
                NETFLOW_STAT_TS(lost);
                ipt_netflow_free(nf);
                return;
        }

        /* encode all fields */
        for (i = 0; ; ) {
                int type = tpl->fields[i++];

                if (!type)
                        break;
                add_tpl_field(ptr, type, nf);
                ptr += tpl->fields[i++];
        }

        pdu_data_records++;
        pdu_flow_records++;
        pdu_flowset->length = htons(ntohs(pdu_flowset->length) + tpl->rec_size);

        pdu_packets += nf->nr_packets;
        pdu_traf    += nf->nr_bytes;
        pdu_ts_mod = jiffies;

        ipt_netflow_free(nf);
}

static u64 get_sys_init_time_ms(void)
{
        static u64 sys_init_time = 0;

        if (!sys_init_time)
                sys_init_time = ktime_to_ms(ktime_get_real()) - jiffies_to_msecs(jiffies);
        return sys_init_time;
}

#ifdef ENABLE_SAMPLER
/* http://www.iana.org/assignments/ipfix/ipfix.xml#ipfix-flowselectoralgorithm */
static unsigned char get_flowselectoralgo(void)
{
        switch (get_sampler_mode()) {
        case SAMPLER_DETERMINISTIC:
                return 1; /* Systematic count-based Sampling */
        case SAMPLER_HASH:
        case SAMPLER_RANDOM:
                return 3; /* Random n-out-of-N Sampling */
        default:
                return 0; /* Unassigned */
        }
}
#endif

static void export_stat_st_ts(const unsigned int tpl_mask, struct ipt_netflow_stat *st, struct duration *ts)
{
        unsigned char *ptr;
        struct data_template *tpl;
        int i;

        ptr = alloc_record_key(tpl_mask, &tpl);
        if (unlikely(!ptr))
                return;

        /* encode all fields */
        for (i = 0; ; ) {
                int type = tpl->fields[i++];

                if (!type)
                        break;
                switch (type) {
                case observationDomainId:       put_unaligned_be32(engine_id, ptr); break;
                case exportingProcessId:        put_unaligned_be32(engine_id, ptr); break;
                case observedFlowTotalCount:    put_unaligned_be64(st->notfound, ptr); break;
                case exportedMessageTotalCount: put_unaligned_be64(st->exported_pkt, ptr); break;
                case exportedOctetTotalCount:   put_unaligned_be64(st->exported_traf, ptr); break;
                case exportedFlowRecordTotalCount: put_unaligned_be64(st->exported_flow, ptr); break;
                case ignoredPacketTotalCount:   put_unaligned_be64(st->pkt_drop, ptr); break;
                case ignoredOctetTotalCount:    put_unaligned_be64(st->traf_drop, ptr); break;
                case notSentFlowTotalCount:     put_unaligned_be64(st->flow_lost, ptr); break;
                case notSentPacketTotalCount:   put_unaligned_be64(st->pkt_lost, ptr); break;
                case notSentOctetTotalCount:    put_unaligned_be64(st->traf_lost, ptr); break;
                case flowStartMilliseconds:     put_unaligned_be64(ktime_to_ms(ts->first), ptr); break;
                case flowEndMilliseconds:       put_unaligned_be64(ktime_to_ms(ts->last), ptr); break;
                case systemInitTimeMilliseconds: put_unaligned_be64(get_sys_init_time_ms(), ptr); break;
                case observationDomainName:     memcpy(ptr, version_string, version_string_size + 1); break;
#ifdef ENABLE_SAMPLER
                case FLOW_SAMPLER_ID:
                case selectorId:
                                                *ptr = get_sampler_mode(); break;
                case FLOW_SAMPLER_MODE:
                                                *ptr = get_sampler_mode_nf(); break;
                case flowSelectorAlgorithm:     *ptr = get_flowselectoralgo(); break;
                case samplingSize:
                case samplingFlowInterval:
                                                *ptr = 1 /* always 'one-out-of' */; break;
                case samplingFlowSpacing:
                case samplingPopulation:
                case FLOW_SAMPLER_RANDOM_INTERVAL:
                                                put_unaligned_be16(get_sampler_interval(), ptr); break;
                case selectorIDTotalFlowsObserved: put_unaligned_be64(atomic64_read(&flows_observed), ptr); break;
                case selectorIDTotalFlowsSelected: put_unaligned_be64(atomic64_read(&flows_selected), ptr); break;
                case selectorIdTotalPktsObserved:  put_unaligned_be64(st->pkts_observed, ptr); break;
                case selectorIdTotalPktsSelected:  put_unaligned_be64(st->pkts_selected, ptr); break;
#endif
                default:
                        WARN_ONCE(1, "NETFLOW: Unknown Element id %d\n", type);
                }
                ptr += tpl->fields[i++];
        }

        pdu_data_records++;
        pdu_flowset->length = htons(ntohs(pdu_flowset->length) + tpl->rec_size);

        pdu_ts_mod = jiffies;
}

static inline void export_stat_ts(const unsigned int tpl_mask, struct duration *ts)
{
        export_stat_st_ts(tpl_mask, NULL, ts);
}

static inline void export_stat_st(const unsigned int tpl_mask, struct ipt_netflow_stat *st)
{
        export_stat_st_ts(tpl_mask, st, NULL);
}

static inline void export_stat(const unsigned int tpl_mask)
{
        export_stat_st(tpl_mask, NULL);
}

static void netflow_export_stats(void)
{
        struct ipt_netflow_stat t = { 0 };
        int cpu;

        if (unlikely(!ts_sysinf_last) ||
            time_is_before_jiffies(ts_sysinf_last + SYSINFO_INTERVAL * HZ)) {
                start_ts.last = ktime_get_real();
                export_stat_ts(OTPL_SYSITIME, &start_ts);
                ts_sysinf_last = jiffies;
                pdu_needs_export++;
        }

        if (unlikely(!ts_stat_last))
                ts_stat_last = jiffies;
        if (likely(time_is_after_jiffies(ts_stat_last + STAT_INTERVAL * HZ)))
                return;

        for_each_present_cpu(cpu) {
                struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu);

                t.notfound      += st->notfound; // observedFlowTotalCount
                t.exported_pkt  += st->exported_pkt;  // exportedMessageTotalCount
                t.exported_traf += st->exported_traf; // exportedOctetTotalCount
                t.exported_flow += st->exported_flow; // exportedFlowRecordTotalCount
                t.pkt_drop      += st->pkt_drop;  // ignoredPacketTotalCount
                t.traf_drop     += st->traf_drop; // ignoredOctetTotalCount
                t.flow_lost     += st->flow_lost; // notSentFlowTotalCount
                t.pkt_lost      += st->pkt_lost;  // notSentPacketTotalCount
                t.traf_lost     += st->traf_lost; // notSentOctetTotalCount
#ifdef ENABLE_SAMPLER
                t.pkts_selected += st->pkts_selected;
                t.pkts_observed += st->pkts_observed;
#endif
                t.drop.first_tv64 = min_not_zero(t.drop.first_tv64, st->drop.first_tv64);
                t.drop.last_tv64  = max(t.drop.last_tv64, st->drop.last_tv64);
                t.lost.first_tv64 = min_not_zero(t.lost.first_tv64, st->lost.first_tv64);
                t.lost.last_tv64  = max(t.lost.last_tv64, st->lost.last_tv64);
        }

        export_stat_st(OTPL_MPSTAT, &t);
        if (t.pkt_drop)
                export_stat_st_ts(OTPL_MPRSTAT, &t, &t.drop);
        if (t.pkt_lost)
                export_stat_st_ts(OTPL_EPRSTAT, &t, &t.lost);
#ifdef ENABLE_SAMPLER
        if (protocol == 10) {
                sampling_ts.last = ktime_get_real();
                switch (get_sampler_mode()) {
                case SAMPLER_HASH:
                        export_stat_st_ts(OTPL_SEL_STATH, &t, &sampling_ts);
                        break;
                case SAMPLER_DETERMINISTIC:
                case SAMPLER_RANDOM:
                        export_stat_st_ts(OTPL_SEL_STAT, &t, &sampling_ts);
                }
        }
#endif

        ts_stat_last = jiffies;
        pdu_needs_export++;
}

#ifdef ENABLE_SAMPLER
static void export_sampler_parameters(void)
{
        if (get_sampler_mode() &&
            (unlikely(!ts_sampler_last) ||
             time_is_before_jiffies(ts_sampler_last + SAMPLER_INFO_INTERVAL * HZ))) {
                if (protocol == 9)
                        export_stat(OTPL_SAMPLER);
                else {
                        const unsigned char mode = get_sampler_mode();

                        if (mode == SAMPLER_DETERMINISTIC)
                                export_stat(OTPL_SEL_COUNT);
                        else
                                export_stat(OTPL_SEL_RAND);
                }
                ts_sampler_last = jiffies;
        }
}
#endif

static int ethtool_drvinfo(unsigned char *ptr, size_t size, struct net_device *dev)
{
        struct ethtool_drvinfo info = { 0 };
        const struct ethtool_ops *ops = dev->ethtool_ops;
#ifndef ETHTOOL_GLINKSETTINGS
        struct ethtool_cmd ecmd;
#define _KSETTINGS(x, y) (x)
#else
        struct ethtool_link_ksettings ekmd;
#define _KSETTINGS(x, y) (y)
#endif
        int len = size;
        int n;

        if (len <= 0 || !ops)
                return 0;
        if (ops->begin) {
                /* was not called before __ethtool_get_settings() though */
                if (ops->begin(dev) < 0)
                        return 0;
        }

        /* driver name */
        if (ops->get_drvinfo)
                ops->get_drvinfo(dev, &info);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
        else if (dev->dev.parent && dev->dev.parent->driver) {
                strlcpy(info.driver, dev->dev.parent->driver->name, sizeof(info.driver));
        }
#endif
        n = scnprintf(ptr, len, "%s", info.driver);
        ptr += n;
        len -= n;
        if (!n || len <= 1) /* have room for separator too */
                goto ret;

        /* only get_settings for running devices to not trigger link negotiation */
        if (dev->flags & IFF_UP &&
            dev->flags & IFF_RUNNING &&
            !_KSETTINGS(__ethtool_get_settings(dev, &ecmd), __ethtool_get_link_ksettings(dev, &ekmd))) {
                char *units, *p;
                __u32 speed = _KSETTINGS(ethtool_cmd_speed(&ecmd), ekmd.base.speed);

                if (speed == SPEED_UNKNOWN)
                        units = "";
                else if (speed <= 1000)
                        units = "MbE";
                else {
                        speed /= 1000;
                        units = "GbE";
                }
                switch (_KSETTINGS(ecmd.port, ekmd.base.port)) {
                case PORT_TP:     p = "tp"; break;
                case PORT_AUI:    p = "aui"; break;
                case PORT_MII:    p = "mii"; break;
                case PORT_FIBRE:  p = "fb"; break;
                case PORT_BNC:    p = "bnc"; break;
#ifdef PORT_DA
                case PORT_DA:     p = "da"; break;
#endif
                default:          p = "";
                }
                n = scnprintf(ptr, len, ",%d%s,%s", speed, units, p);
                len -= n;
        }
ret:
        if (ops->complete)
                ops->complete(dev);
        return size - len;
}
#undef _KSETTINGS

static const unsigned short netdev_type[] =
{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_AX25,
        ARPHRD_IEEE802, ARPHRD_ARCNET,
        ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
        ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND,
        ARPHRD_SLIP, ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6,
        ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
        ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
        ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
        ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
        ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
        ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_IRDA,
        ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
        ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
        ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};

static const char *const netdev_type_name[] =
{"NET/ROM", "Ethernet", "AX.25 Level 2",
        "IEEE 802.2 Ethernet", "ARCnet",
        "Frame Relay DLCI", "ATM", "Metricom STRIP",
        "IEEE 1394 IPv4", "EUI-64", "InfiniBand",
        "SLIP", "CSLIP", "SLIP6", "CSLIP6",
        "ROSE", "X.25", "HW X.25",
        "PPP", "Cisco HDLC", "LAPB", "DDCMP",
        "Raw HDLC", "IPIP Tunnel", "IP6IP6 Tunnel", "FRAD",
        "Loopback", "Localtalk", "FDDI",
        "SIT Tunnel", "IP over DDP", "GRE over IP",
        "PISM Register", "HIPPI", "IrDA",
        "IEEE 802.11", "IEEE 802.11 Prism2",
        "IEEE 802.11 Radiotap", "PhoNet", "PhoNet pipe",
        "IEEE 802.15.4", "void", "none"};

static const char *dev_type(int dev_type)
{
        int i;

        BUG_ON(ARRAY_SIZE(netdev_type) != ARRAY_SIZE(netdev_type_name));
        for (i = 0; i < ARRAY_SIZE(netdev_type); i++)
                if (netdev_type[i] == dev_type)
                        return netdev_type_name[i];
        return "";
}

static void export_dev(struct net_device *dev)
{
        unsigned char *ptr;
        struct data_template *tpl;
        int i;

        ptr = alloc_record_key(OTPL_IFNAMES, &tpl);
        if (unlikely(!ptr))
                return;

        /* encode all fields */
        for (i = 0; ; ) {
                int type = tpl->fields[i++];
                int size = tpl->fields[i++];
                int n;

                if (!type)
                        break;
                switch (type) {
                case observationDomainId:
                        put_unaligned_be32(engine_id, ptr);
                        break;
                case IF_NAME:
                        n = scnprintf(ptr, size, "%s", dev->name);
                        memset(ptr + n, 0, size - n);
                        break;
                case IF_DESC:
                        /* manual dev 'alias' setting is a first priority,
                         * then ethtool driver name with basic info,
                         * finally net_device.type is a last resort */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28)
                        if (dev->ifalias)
                                n = scnprintf(ptr, size, "%s", dev->ifalias);
                        else
#endif
                                n = ethtool_drvinfo(ptr, size, dev);
                        if (!n)
                                n = scnprintf(ptr, size, "%s", dev_type(dev->type));
                        memset(ptr + n, 0, size - n);
                        break;
                case INPUT_SNMP:
#ifdef SNMP_RULES
                        rcu_read_lock();
                        put_unaligned_be16(resolve_snmp(dev), ptr);
                        rcu_read_unlock();
#else
                        put_unaligned_be16(dev->ifindex, ptr);
#endif
                        break;
                default:
                        WARN_ONCE(1, "NETFLOW: Unknown Element id %d\n", type);
                }
                ptr += size;
        }

        pdu_data_records++;
        pdu_flowset->length = htons(ntohs(pdu_flowset->length) + tpl->rec_size);

        pdu_ts_mod = jiffies;
}

static void export_ifnames(void)
{
        struct net_device *dev;

        if (likely(ts_ifnames_last) &&
            time_is_after_jiffies(ts_ifnames_last + SYSINFO_INTERVAL * HZ))
                return;

        rtnl_lock();
        for_each_netdev_ns(&init_net, dev) {
                export_dev(dev);
        }
        rtnl_unlock();
        ts_ifnames_last = jiffies;
}

/* under pause_scan_worker() */
static void netflow_switch_version(const int ver)
{
        protocol = ver;
        if (protocol == 5) {
                memset(&pdu, 0, sizeof(pdu));
                pdu_data_used       = NULL;
                pdu_high_wm         = NULL;
                netflow_export_flow = &netflow_export_flow_v5;
                netflow_export_pdu  = &netflow_export_pdu_v5;
        } else if (protocol == 9) {
                pdu_data_used       = pdu.v9.data;
                pdu_high_wm         = (unsigned char *)&pdu + sizeof(pdu.v9);
                netflow_export_flow = &netflow_export_flow_tpl;
                netflow_export_pdu  = &netflow_export_pdu_v9;
        } else { /* IPFIX */
                pdu_data_used       = pdu.ipfix.data;
                pdu_high_wm         = (unsigned char *)&pdu + sizeof(pdu.ipfix);
                netflow_export_flow = &netflow_export_flow_tpl;
                netflow_export_pdu  = &netflow_export_pdu_ipfix;
        }
        pdu.version = htons(protocol);
        free_templates();
        pdu_flow_records = pdu_data_records = pdu_tpl_records = 0;
        pdu_flowset = NULL;
        printk(KERN_INFO "ipt_NETFLOW protocol version %d (%s) enabled.\n",
            protocol, protocol == 10? "IPFIX" : "NetFlow");
}

#ifdef CONFIG_NF_NAT_NEEDED
static void export_nat_event(struct nat_event *nel)
{
        static struct ipt_netflow nf = { { NULL } };

        nf.tuple.l3proto = AF_INET;
        nf.tuple.protocol = nel->protocol;
        nf.nat = nel; /* this is also flag of dummy flow */
        nf.tcp_flags = (nel->nat_event == NAT_DESTROY)? TCP_FIN_RST : TCP_SYN_ACK;
        if (protocol >= 9) {
                nf.nf_ts_obs = nel->ts_ktime;
                nf.tuple.src.ip = nel->pre.s_addr;
                nf.tuple.dst.ip = nel->pre.d_addr;
                nf.tuple.s_port = nel->pre.s_port;
                nf.tuple.d_port = nel->pre.d_port;
                netflow_export_flow(&nf);
        } else { /* v5 */
                /* The weird v5 packet(s).
                 * src and dst will be same as in data flow from the FORWARD chain
                 * where src is pre-nat src ip and dst is post-nat dst ip.
                 * What we lacking here is external src ip for SNAT, or
                 * pre-nat dst ip for DNAT. We will put this into Nexthop field
                 * with port into src/dst AS field. tcp_flags will distinguish it's
                 * start or stop event. Two flows in case of full nat. */
                nf.tuple.src.ip = nel->pre.s_addr;
                nf.tuple.s_port = nel->pre.s_port;
                nf.tuple.dst.ip = nel->post.d_addr;
                nf.tuple.d_port = nel->post.d_port;

                nf.nf_ts_first = nel->ts_jiffies;
                nf.nf_ts_last = nel->ts_jiffies;
                if (nel->pre.s_addr != nel->post.s_addr ||
                    nel->pre.s_port != nel->post.s_port) {
                        nf.nh.ip = nel->post.s_addr;
                        nf.s_as  = nel->post.s_port;
                        nf.d_as  = 0;
                        netflow_export_flow(&nf);
                }
                if (nel->pre.d_addr != nel->post.d_addr ||
                    nel->pre.d_port != nel->post.d_port) {
                        nf.nh.ip = nel->pre.d_addr;
                        nf.s_as  = 0;
                        nf.d_as  = nel->pre.d_port;
                        netflow_export_flow(&nf);
                }
        }
        kfree(nel);
}
#endif /* CONFIG_NF_NAT_NEEDED */

static inline int active_needs_export(const struct ipt_netflow *nf, const long a_timeout,
    const unsigned long j)
{
        return ((j - nf->nf_ts_first) > a_timeout) ||
            nf->nr_bytes >= FLOW_FULL_WATERMARK;
}

/* return flowEndReason (rfc5102) */
/* i_timeout == 0 is flush */
static inline int inactive_needs_export(const struct ipt_netflow *nf, const long i_timeout,
    const unsigned long j)
{
        if (likely(i_timeout)) {
                if (unlikely((j - nf->nf_ts_last) > i_timeout)) {
                        if (nf->tuple.protocol == IPPROTO_TCP &&
                            (nf->tcp_flags & TCP_FIN_RST))
                                return 0x03; /* end of Flow detected */
                        else
                                return 0x01; /* idle timeout */
                } else
                        return 0;
        } else
                return 0x04; /* forced end */
}

/* helper which also record to nf->flowEndReason */
static inline int needs_export_rec(struct ipt_netflow *nf, const long i_timeout,
    const long a_timeout, const unsigned long j)
{
        int reason = inactive_needs_export(nf, i_timeout, j);

        if (!reason && active_needs_export(nf, a_timeout, j))
                reason = 0x02; /* active timeout or just active flow */
        return (nf->flowEndReason = reason);
}

/* could be called with zero to flush cache and pdu */
/* this function is guaranteed to be called non-concurrently */
/* return number of pdus sent */
static int netflow_scan_and_export(const int flush)
{
        const long i_timeout = flush? 0 : inactive_timeout * HZ;
        const long a_timeout = active_timeout * HZ;
#ifdef HAVE_LLIST
        struct llist_node *node;
#endif
        const int pdu_c = pdu_count;
        LIST_HEAD(export_list);
        struct ipt_netflow *nf, *tmp;
        int i;
#ifdef ENABLE_SAMPLER
        unsigned char mode;
#endif

        if (protocol >= 9) {
                netflow_export_stats();
#ifdef ENABLE_SAMPLER
                export_sampler_parameters();
#endif
                export_ifnames();
        }

        read_lock_bh(&htable_rwlock);
        for (i = 0; i < LOCK_COUNT; i++) {
                struct stripe_entry *stripe = &htable_stripes[i];

                if (!spin_trylock(&stripe->lock)) {
                        ++wk_trylock;
                        continue;
                }
                list_for_each_entry_safe_reverse(nf, tmp, &stripe->list, flows_list) {
                        ++wk_count;
                        if (needs_export_rec(nf, i_timeout, a_timeout, jiffies)) {
                                hlist_del(&nf->hlist);
                                list_del(&nf->flows_list);
                                list_add(&nf->flows_list, &export_list);
                        } else {
                                /* all flows which need to be exported is always at the tail
                                 * so if no more exportable flows we can break */
                                break;
                        }
                }
                spin_unlock(&stripe->lock);
        }
        read_unlock_bh(&htable_rwlock);

#ifdef HAVE_LLIST
        node = llist_del_all(&export_llist);
        while (node) {
                struct llist_node *next = node->next;
                nf = llist_entry(node, struct ipt_netflow, flows_llnode);
                ++wk_llist;
                list_add(&nf->flows_list, &export_list);
                node = next;
        }
#endif

#ifdef ENABLE_SAMPLER
        mode = get_sampler_mode();
#endif
        set_jiffies_base();
        list_for_each_entry_safe(nf, tmp, &export_list, flows_list) {
                NETFLOW_STAT_ADD(pkt_out, nf->nr_packets);
                NETFLOW_STAT_ADD(traf_out, nf->nr_bytes);
                list_del(&nf->flows_list);
#ifdef ENABLE_SAMPLER
                if (mode) {
                        const unsigned int interval = get_sampler_interval();
                        unsigned int val; /* [0..interval) */

                        atomic64_inc(&flows_observed);
                        NETFLOW_STAT_ADD_ATOMIC(pkts_observed, nf->nr_packets);
                        switch (mode) {
                        case SAMPLER_DETERMINISTIC:
                                val = nf->sampler_count % interval;
                                break;
                        case SAMPLER_RANDOM:
                                val = prandom_u32_max(interval);
                                break;
                        default: /* SAMPLER_HASH */
                                val = 0;
                        }
                        if (val) {
                                ipt_netflow_free(nf);
                                continue;
                        }
                        atomic64_inc(&flows_selected);
                        NETFLOW_STAT_ADD_ATOMIC(pkts_selected, nf->nr_packets);
                }
#endif
                netflow_export_flow(nf);
        }

#ifdef CONFIG_NF_NAT_NEEDED
        spin_lock_bh(&nat_lock);
        while (!list_empty(&nat_list)) {
                struct nat_event *nel;

                nel = list_entry(nat_list.next, struct nat_event, list);
                list_del(&nel->list);
                spin_unlock_bh(&nat_lock);
                export_nat_event(nel);
                spin_lock_bh(&nat_lock);
        }
        spin_unlock_bh(&nat_lock);
#endif
        /* flush flows stored in pdu if there no new flows for too long */
        /* Note: using >= to allow flow purge on zero timeout */
        if ((jiffies - pdu_ts_mod) >= i_timeout || pdu_needs_export) {
                netflow_export_pdu();
                pdu_needs_export = 0;
        }

        return pdu_count - pdu_c;
}

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
static void netflow_work_fn(void *dummy)
#else
static void netflow_work_fn(struct work_struct *dummy)
#endif
{
        int pdus;

        wk_count = 0;
        wk_trylock = 0;
        wk_llist = 0;
        wk_cpu = smp_processor_id();
        wk_start = jiffies;

        pdus = netflow_scan_and_export(DONT_FLUSH);

        _schedule_scan_worker(pdus);
        wk_busy = jiffies - wk_start;
}

#define RATESHIFT 2
#define SAMPLERATE (RATESHIFT*RATESHIFT)
#define NUMSAMPLES(minutes) (minutes * 60 / SAMPLERATE)
#define _A(v, m) (v) * (1024 * 2 / (NUMSAMPLES(m) + 1)) >> 10
// x * (1024 / y) >> 10 is because I can not just divide long long integer

// Note that CALC_RATE arguments should never be unsigned.
#define CALC_RATE(ewma, cur, minutes) ewma += _A(cur - ewma, minutes)

// calculate EWMA throughput rate for whole module
static void rate_timer_calc(unsigned long dummy)
{
        static u64 old_pkt_total = 0;
        static u64 old_traf_total = 0;
        static u64 old_searched = 0;
        static u64 old_found = 0;
        static u64 old_notfound = 0;
        u64 searched = 0;
        u64 found = 0;
        u64 notfound = 0;
        int dsrch, dfnd, dnfnd;
        u64 pkt_total = 0;
        u64 traf_total = 0;
        int cpu;

        for_each_present_cpu(cpu) {
                int metrt;
                struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu);
                u64 pkt_t = st->pkt_total;

                pkt_total += pkt_t;
                st->pkt_total_rate = (pkt_t - st->pkt_total_prev) >> RATESHIFT;
                st->pkt_total_prev = pkt_t;
                traf_total += st->traf_total;
                searched += st->searched;
                found += st->found;
                notfound += st->notfound;
                st->exported_rate = (st->exported_traf - st->exported_trafo) >> RATESHIFT;
                st->exported_trafo = st->exported_traf;
                /* calculate hash metric per cpu */
                dsrch = st->searched - st->old_searched;
                dfnd  = st->found - st->old_found;
                dnfnd = st->notfound - st->old_notfound;
                /* zero values are not accounted, becasue only usage is interesting, not nonusage */
                metrt = (dfnd + dnfnd)? 100 * (dsrch + dfnd + dnfnd) / (dfnd + dnfnd) : st->metric;
                CALC_RATE(st->metric, metrt, 1);
                st->old_searched = st->searched;
                st->old_found    = st->found;
                st->old_notfound = st->notfound;
        }

        sec_prate = (pkt_total - old_pkt_total) >> RATESHIFT;
        CALC_RATE(min5_prate, sec_prate, 5);
        CALC_RATE(min_prate, sec_prate, 1);
        old_pkt_total = pkt_total;

        sec_brate = ((traf_total - old_traf_total) * 8) >> RATESHIFT;
        CALC_RATE(min5_brate, sec_brate, 5);
        CALC_RATE(min_brate, sec_brate, 1);
        old_traf_total = traf_total;

        /* hash stat */
        dsrch = searched - old_searched;
        dfnd  = found - old_found;
        dnfnd = notfound - old_notfound;
        old_searched = searched;
        old_found    = found;
        old_notfound = notfound;
        /* if there is no access to hash keep rate steady */
        metric = (dfnd + dnfnd)? 100 * (dsrch + dfnd + dnfnd) / (dfnd + dnfnd) : metric;
        CALC_RATE(min15_metric, metric, 15);
        CALC_RATE(min5_metric, metric, 5);
        CALC_RATE(min_metric, metric, 1);

        /* yes, timer delay is not accounted, but this stat is just estimational */
        mod_timer(&rate_timer, jiffies + (HZ * SAMPLERATE));
}

#ifdef CONFIG_NF_NAT_NEEDED
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
static struct nf_ct_event_notifier *saved_event_cb __read_mostly = NULL;
static int netflow_conntrack_event(const unsigned int events, struct nf_ct_event *item)
#else
static int netflow_conntrack_event(struct notifier_block *this, unsigned long events, void *ptr)
#endif
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
        struct nf_conn *ct = item->ct;
#else
        struct nf_conn *ct = (struct nf_conn *)ptr;
#endif
        struct nat_event *nel;
        const struct nf_conntrack_tuple *t;
        int ret = NOTIFY_DONE;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
        struct nf_ct_event_notifier *notifier;

        /* Call netlink first. */
        notifier = rcu_dereference(saved_event_cb);
        if (likely(notifier))
                ret = notifier->fcn(events, item);
#endif
        if (unlikely(!natevents))
                return ret;

        if (!(events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED) | (1 << IPCT_DESTROY))))
                return ret;

        if (!(ct->status & IPS_NAT_MASK))
                return ret;

        if (unlikely(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num != AF_INET ||
                    ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num != AF_INET)) {
                /* Well, there is no linux NAT for IPv6 anyway. */
                return ret;
        }

        if (!(nel = kmalloc(sizeof(struct nat_event), GFP_ATOMIC))) {
                printk(KERN_ERR "ipt_NETFLOW: can't kmalloc nat event\n");
                return ret;
        }
        memset(nel, 0, sizeof(struct nat_event));
        nel->ts_ktime = ktime_get_real();
        nel->ts_jiffies = jiffies;
        t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
        nel->protocol = t->dst.protonum;
        nel->pre.s_addr = t->src.u3.ip;
        nel->pre.d_addr = t->dst.u3.ip;
        nel->pre.s_port = t->src.u.all;
        nel->pre.d_port = t->dst.u.all;
        t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
        /* reply is reversed */
        nel->post.s_addr = t->dst.u3.ip;
        nel->post.d_addr = t->src.u3.ip;
        nel->post.s_port = t->dst.u.all;
        nel->post.d_port = t->src.u.all;
        if (events & (1 << IPCT_DESTROY)) {
                nel->nat_event = NAT_DESTROY;
                nat_events_stop++;
        } else {
                nel->nat_event = NAT_CREATE;
                nat_events_start++;
        }

        spin_lock_bh(&nat_lock);
        list_add_tail(&nel->list, &nat_list);
        spin_unlock_bh(&nat_lock);

        return ret;
}

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31)
static struct notifier_block ctnl_notifier = {
        .notifier_call = netflow_conntrack_event
};
#else
static struct nf_ct_event_notifier ctnl_notifier = {
        .fcn = netflow_conntrack_event
};
#endif /* since 2.6.31 */
#endif /* CONFIG_NF_NAT_NEEDED */

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) && \
    LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35)
static bool
#else
static int
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
netflow_target_check(const char *tablename, const void *entry, const struct xt_target *target,
    void *targinfo,
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
    unsigned int targinfosize,
#endif
    unsigned int hook_mask)
{
#else
netflow_target_check(const struct xt_tgchk_param *par)
{
        const char *tablename = par->table;
        const struct xt_target *target = par->target;
#endif
        if (strcmp("nat", tablename) == 0) {
                /* In the nat table we only see single packet per flow, which is useless. */
                printk(KERN_ERR "%s target: is not valid in %s table\n", target->name, tablename);
                return CHECK_FAIL;
        }
        if (target->family == AF_INET6 && protocol == 5) {
                printk(KERN_ERR "ip6tables NETFLOW target is meaningful for protocol 9 or 10 only.\n");
                return CHECK_FAIL;
        }
        return CHECK_OK;
}

#define SetXBit(x) (0x8000 >> (x)) /* Proper bit for htons later. */
static inline __u16 observed_hdrs(const __u8 currenthdr)
{
        switch (currenthdr) {
        case IPPROTO_TCP:
        case IPPROTO_UDP:
                /* For speed, in case switch is not optimized. */
                return 0;
        case IPPROTO_DSTOPTS:  return SetXBit(0);
        case IPPROTO_HOPOPTS:  return SetXBit(1);
        case IPPROTO_ROUTING:  return SetXBit(5);
        case IPPROTO_MH:       return SetXBit(12);
        case IPPROTO_ESP:      return SetXBit(13);
        case IPPROTO_AH:       return SetXBit(14);
        case IPPROTO_COMP:     return SetXBit(15);
        case IPPROTO_FRAGMENT: /* Handled elsewhere. */
                /* Next is known headers. */
        case IPPROTO_ICMPV6:
        case IPPROTO_UDPLITE:
        case IPPROTO_IPIP:
        case IPPROTO_PIM:
        case IPPROTO_GRE:
        case IPPROTO_SCTP:
#ifdef IPPROTO_L2TP
        case IPPROTO_L2TP:
#endif
        case IPPROTO_DCCP:
               return 0;
        }
        return SetXBit(3); /* Unknown header. */
}

/* http://www.iana.org/assignments/ip-parameters/ip-parameters.xhtml */
static const __u8 ip4_opt_table[] = {
        [7]     = 0,    /* RR */ /* parsed manually because of 0 */
        [134]   = 1,    /* CIPSO */
        [133]   = 2,    /* E-SEC */
        [68]    = 3,    /* TS */
        [131]   = 4,    /* LSR */
        [130]   = 5,    /* SEC */
        [1]     = 6,    /* NOP */
        [0]     = 7,    /* EOOL */
        [15]    = 8,    /* ENCODE */
        [142]   = 9,    /* VISA */
        [205]   = 10,   /* FINN */
        [12]    = 11,   /* MTUR */
        [11]    = 12,   /* MTUP */
        [10]    = 13,   /* ZSU */
        [137]   = 14,   /* SSR */
        [136]   = 15,   /* SID */
        [151]   = 16,   /* DPS */
        [150]   = 17,   /* NSAPA */
        [149]   = 18,   /* SDB */
        [147]   = 19,   /* ADDEXT */
        [148]   = 20,   /* RTRALT */
        [82]    = 21,   /* TR */
        [145]   = 22,   /* EIP */
        [144]   = 23,   /* IMITD */
        [30]    = 25,   /* EXP */
        [94]    = 25,   /* EXP */
        [158]   = 25,   /* EXP */
        [222]   = 25,   /* EXP */
        [25]    = 30,   /* QS */
        [152]   = 31,   /* UMP */
};
/* Parse IPv4 Options array int ipv4Options IPFIX value. */
static inline __u32 ip4_options(const u_int8_t *p, const unsigned int optsize)
{
        __u32 ret = 0;
        unsigned int i;

        for (i = 0; likely(i < optsize); ) {
                u_int8_t op = p[i++];

                if (op == 7) /* RR: bit 0 */
                        ret |= 1;
                else if (likely(op < ARRAY_SIZE(ip4_opt_table))) {
                        /* Btw, IANA doc is messed up in a crazy way:
                         *   http://www.ietf.org/mail-archive/web/ipfix/current/msg06008.html (2011)
                         * I decided to follow IANA _text_ description from
                         *   http://www.iana.org/assignments/ipfix/ipfix.xhtml (2013-09-18)
                         *
                         * Set proper bit for htonl later. */
                        if (ip4_opt_table[op])
                                ret |= 1 << (32 - ip4_opt_table[op]);
                }
                if (likely(i >= optsize || op == 0))
                        break;
                else if (unlikely(op == 1))
                        continue;
                else if (unlikely(p[i] < 2))
                        break;
                else
                        i += p[i] - 1;
        }
        return ret;
}

#define TCPHDR_MAXSIZE (4 * 15)
/* List of options: http://www.iana.org/assignments/tcp-parameters/tcp-parameters.xhtml */
static inline __u32 tcp_options(const struct sk_buff *skb, const unsigned int ptr, const struct tcphdr *th)
{
        const unsigned int optsize = th->doff * 4 - sizeof(struct tcphdr);
        __u8 _opt[TCPHDR_MAXSIZE];
        const u_int8_t *p;
        __u32 ret;
        unsigned int i;

        p = skb_header_pointer(skb, ptr + sizeof(struct tcphdr), optsize, _opt);
        if (unlikely(!p))
                return 0;
        ret = 0;
        for (i = 0; likely(i < optsize); ) {
                u_int8_t opt = p[i++];

                if (likely(opt < 32)) {
                        /* IANA doc is messed up, see above. */
                        ret |= 1 << (32 - opt);
                }
                if (likely(i >= optsize || opt == 0))
                        break;
                else if (unlikely(opt == 1))
                        continue;
                else if (unlikely(p[i] < 2)) /* "silly options" */
                        break;
                else
                        i += p[i] - 1;
        }
        return ret;
}

/* check if data region is in header boundary */
inline static int skb_in_header(const struct sk_buff *skb, const void *ptr, size_t off)
{
        return ((unsigned char *)ptr + off) <= skb->data;
}

static inline int eth_p_vlan(__be16 eth_type)
{
        return eth_type == htons(ETH_P_8021Q) ||
                eth_type == htons(ETH_P_8021AD);
}

/* Extract all L2 header data, currently (in iptables) skb->data is
 * pointing to network_header, so we use mac_header instead. */
/* Parse eth header, then vlans, then mpls. */
static void parse_l2_header(const struct sk_buff *skb, struct ipt_netflow_tuple *tuple)
{
#if defined(ENABLE_MAC) || defined(ENABLE_VLAN) || defined(MPLS_DEPTH)
#define ENABLE_L2
        unsigned char *mac_header = skb_mac_header(skb);
# if defined(ENABLE_VLAN) || defined(MPLS_DEPTH)
        unsigned int hdr_depth;
        __be16 proto;
# endif
# ifdef ENABLE_VLAN
        int tag_num = 0;

        /* get vlan tag that is saved in skb->vlan_tci */
        if (vlan_tx_tag_present(skb))
                tuple->tag[tag_num++] = htons(vlan_tx_tag_get(skb));
# endif
        if (mac_header < skb->head ||
            mac_header + ETH_HLEN > skb->data)
                return;
# ifdef ENABLE_MAC
        memcpy(&tuple->h_dst, eth_hdr(skb)->h_dest, ETH_ALEN);
        memcpy(&tuple->h_src, eth_hdr(skb)->h_source, ETH_ALEN);
# endif
# if defined(ENABLE_VLAN) || defined(MPLS_DEPTH)
        hdr_depth = ETH_HLEN;
        proto = eth_hdr(skb)->h_proto;
        if (eth_p_vlan(proto)) {
                do {
                        const struct vlan_hdr *vh;

                        vh = (struct vlan_hdr *)(mac_header + hdr_depth);
                        if (!skb_in_header(skb, vh, VLAN_HLEN))
                                return;
                        proto = vh->h_vlan_encapsulated_proto;
#  ifdef ENABLE_VLAN
                        if (tag_num < MAX_VLAN_TAGS)
                                tuple->tag[tag_num++] = vh->h_vlan_TCI;
#  endif
                        hdr_depth += VLAN_HLEN;
                } while (eth_p_vlan(proto));
        }
#  ifdef MPLS_DEPTH
        if (eth_p_mpls(proto)) {
                const struct mpls_label *mpls;
                int label_num = 0;

                do {
                        mpls = (struct mpls_label *)(mac_header + hdr_depth);
                        if (!skb_in_header(skb, mpls, MPLS_HLEN))
                                return;
                        if (label_num < MPLS_DEPTH)
                                tuple->mpls[label_num++] = mpls->entry;
                        hdr_depth += MPLS_HLEN;
                } while (!(mpls->entry & htonl(MPLS_LS_S_MASK)));
        }
#  endif
# endif /* defined(ENABLE_VLAN) || defined(MPLS_DEPTH) */
#endif /* defined(ENABLE_MAC) || defined(ENABLE_VLAN) || defined(MPLS_DEPTH) */
}

/* packet receiver */
static unsigned int netflow_target(
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
                           struct sk_buff **pskb,
#else
                           struct sk_buff *skb,
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
                           const struct net_device *if_in,
                           const struct net_device *if_out,
                           unsigned int hooknum,
# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,17)
                           const struct xt_target *target,
# endif
# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
                           const void *targinfo,
                           void *userinfo
# else
                           const void *targinfo
# endif
#else /* since 2.6.28 */
# define if_in  xt_in(par)
# define if_out xt_out(par)
# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35)
                           const struct xt_target_param *par
# else
                           const struct xt_action_param *par
# endif
#endif
                )
{
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
# ifndef ENABLE_L2
        /* pskb_may_pull() may modify skb */
        const
# endif
                struct sk_buff *skb = *pskb;
#endif
        union {
                struct iphdr ip;
                struct ipv6hdr ip6;
        } _iph, *iph;
        u_int32_t hash;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
        const int family = target->family;
#else
# ifdef ENABLE_DIRECTION
        const int hooknum = xt_hooknum(par);
# endif
        const int family = xt_family(par);
#endif
        struct ipt_netflow_tuple tuple;
        struct ipt_netflow *nf;
        __u8 tcp_flags;
#ifdef ENABLE_AGGR
        struct netflow_aggr_n *aggr_n;
        struct netflow_aggr_p *aggr_p;
#endif
        __u8 s_mask, d_mask;
        unsigned int ptr;
        int fragment;
        size_t pkt_len;
        int options = 0;
        int tcpoptions = 0;
        struct stripe_entry *stripe;

        if (unlikely(
#ifdef ENABLE_L2
            /* to ensure that full L2 headers are present */
            unlikely(!pskb_may_pull(skb, 0)) ||
#endif
            !(iph = skb_header_pointer(skb, 0,
                            (likely(family == AF_INET))? sizeof(_iph.ip) : sizeof(_iph.ip6),
                            &iph)))) {
                NETFLOW_STAT_INC(truncated);
                NETFLOW_STAT_INC(pkt_drop);
                NETFLOW_STAT_ADD(traf_drop, skb->len);
                NETFLOW_STAT_TS(drop);
                return IPT_CONTINUE;
        }

        memset(&tuple, 0, sizeof(tuple));
        tuple.l3proto = family;
#ifdef ENABLE_PHYSDEV_OVER
        if (skb->nf_bridge && skb->nf_bridge->physindev)
                tuple.i_ifc = skb->nf_bridge->physindev->ifindex;
        else /* FALLTHROUGH */
#endif
        tuple.i_ifc     = if_in? if_in->ifindex : -1;
        tcp_flags       = 0;
        s_mask          = 0;
        d_mask          = 0;
        parse_l2_header(skb, &tuple);

        if (likely(family == AF_INET)) {
                tuple.src       = (union nf_inet_addr){ .ip = iph->ip.saddr };
                tuple.dst       = (union nf_inet_addr){ .ip = iph->ip.daddr };
                tuple.tos       = iph->ip.tos;
                tuple.protocol  = iph->ip.protocol;
                fragment        = unlikely(iph->ip.frag_off & htons(IP_OFFSET));
                ptr             = iph->ip.ihl * 4;
                pkt_len         = ntohs(iph->ip.tot_len);

#define IPHDR_MAXSIZE (4 * 15)
                if (unlikely(iph->ip.ihl * 4 > sizeof(struct iphdr))) {
                        u_int8_t _opt[IPHDR_MAXSIZE - sizeof(struct iphdr)];
                        const u_int8_t *op;
                        unsigned int optsize = iph->ip.ihl * 4 - sizeof(struct iphdr);

                        op = skb_header_pointer(skb, sizeof(struct iphdr), optsize, _opt);
                        if (likely(op))
                                options = ip4_options(op, optsize);
                }
        } else { /* AF_INET6 */
                __u8 currenthdr;

                tuple.src.in6   = iph->ip6.saddr;
                tuple.dst.in6   = iph->ip6.daddr;
                tuple.tos       = iph->ip6.priority;
                fragment        = 0;
                ptr             = sizeof(struct ipv6hdr);
                pkt_len         = ntohs(iph->ip6.payload_len) + sizeof(struct ipv6hdr);

                currenthdr      = iph->ip6.nexthdr;
                while (currenthdr != NEXTHDR_NONE && ipv6_ext_hdr(currenthdr)) {
                        struct ipv6_opt_hdr _hdr;
                        const struct ipv6_opt_hdr *hp;
                        unsigned int hdrlen = 0;

                        options |= observed_hdrs(currenthdr);
                        hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
                        if (hp == NULL) {
                                /* We have src/dst, so must account something. */
                                tuple.protocol = currenthdr;
                                fragment = 3;
                                goto do_protocols;
                        }

                        switch (currenthdr) {
                        case IPPROTO_FRAGMENT: {
                                struct frag_hdr _fhdr;
                                const struct frag_hdr *fh;

                                fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
                                                &_fhdr);
                                if (fh == NULL) {
                                        tuple.protocol = currenthdr;
                                        fragment = 2;
                                        goto do_protocols;
                                }
                                fragment = 1;
#define FRA0 SetXBit(4) /* Fragment header - first fragment */
#define FRA1 SetXBit(6) /* Fragmentation header - not first fragment */
                                options |= (ntohs(fh->frag_off) & 0xFFF8)? FRA1 : FRA0;
                                hdrlen = 8;
                                break;
                        }
                        case IPPROTO_AH: {
                                struct ip_auth_hdr _ahdr, *ap;

                                if (likely(ap = skb_header_pointer(skb, ptr, 8, &_ahdr)))
                                        SAVE_SPI(tuple, ap->spi);
                                hdrlen = (ap->hdrlen + 2) << 2;
                                break;
                        }
                        case IPPROTO_ESP:
                                /* After this header everything is encrypted. */
                                tuple.protocol = currenthdr;
                                goto do_protocols;
                        default:
                                hdrlen = ipv6_optlen(hp);
                        }
                        currenthdr = hp->nexthdr;
                        ptr += hdrlen;
                }
                tuple.protocol  = currenthdr;
                options |= observed_hdrs(currenthdr);
        }

do_protocols:
        if (fragment) {
                /* if conntrack is enabled it should defrag on pre-routing and local-out */
                NETFLOW_STAT_INC(frags);
        } else {
                switch (tuple.protocol) {
                    case IPPROTO_TCP: {
                        struct tcphdr _hdr, *hp;

                        if (likely(hp = skb_header_pointer(skb, ptr, 14, &_hdr))) {
                                tuple.s_port = hp->source;
                                tuple.d_port = hp->dest;
                                tcp_flags = (u_int8_t)(ntohl(tcp_flag_word(hp)) >> 16);

                                if (unlikely(hp->doff * 4 > sizeof(struct tcphdr)))
                                        tcpoptions = tcp_options(skb, ptr, hp);
                        }
                        break;
                    }
                    case IPPROTO_UDP:
                    case IPPROTO_UDPLITE:
                    case IPPROTO_SCTP: {
                        struct udphdr _hdr, *hp;

                        if (likely(hp = skb_header_pointer(skb, ptr, 4, &_hdr))) {
                                tuple.s_port = hp->source;
                                tuple.d_port = hp->dest;
                        }
                        break;
                    }
                    case IPPROTO_ICMP: {
                        struct icmphdr _hdr, *hp;

                        if (likely(family == AF_INET) &&
                                    likely(hp = skb_header_pointer(skb, ptr, 2, &_hdr)))
                                tuple.d_port = htons((hp->type << 8) | hp->code);
                        break;
                    }
                    case IPPROTO_ICMPV6: {
                        struct icmp6hdr _icmp6h, *ic;

                        if (likely(family == AF_INET6) &&
                                    likely(ic = skb_header_pointer(skb, ptr, 2, &_icmp6h)))
                                tuple.d_port = htons((ic->icmp6_type << 8) | ic->icmp6_code);
                        break;
                    }
                    case IPPROTO_IGMP: {
                        struct igmphdr _hdr, *hp;

                        if (likely(hp = skb_header_pointer(skb, ptr, 1, &_hdr)))
                                tuple.d_port = hp->type;
                        break;
                    }
                    case IPPROTO_AH: { /* IPSEC */
                        struct ip_auth_hdr _hdr, *hp;

                        /* This is for IPv4 only. IPv6 it's parsed above. */
                        if (likely(family == AF_INET) &&
                                    likely(hp = skb_header_pointer(skb, ptr, 8, &_hdr)))
                                SAVE_SPI(tuple, hp->spi);
                        break;
                    }
                    case IPPROTO_ESP: {
                        struct ip_esp_hdr _hdr, *hp;

                        /* This is for both IPv4 and IPv6. */
                        if (likely(hp = skb_header_pointer(skb, ptr, 4, &_hdr)))
                                SAVE_SPI(tuple, hp->spi);
                        break;
                    }
                }
        } /* not fragmented */

#ifdef ENABLE_AGGR
        /* aggregate networks */
        read_lock(&aggr_lock);
        if (family == AF_INET) {
                list_for_each_entry(aggr_n, &aggr_n_list, list)
                        if (unlikely((ntohl(tuple.src.ip) & aggr_n->mask) == aggr_n->addr)) {
                                tuple.src.ip &= htonl(aggr_n->aggr_mask);
                                s_mask = aggr_n->prefix;
                                atomic_inc(&aggr_n->usage);
                                break;
                        }
                list_for_each_entry(aggr_n, &aggr_n_list, list)
                        if (unlikely((ntohl(tuple.dst.ip) & aggr_n->mask) == aggr_n->addr)) {
                                tuple.dst.ip &= htonl(aggr_n->aggr_mask);
                                d_mask = aggr_n->prefix;
                                atomic_inc(&aggr_n->usage);
                                break;
                        }
        }

        if (tuple.protocol == IPPROTO_TCP ||
            tuple.protocol == IPPROTO_UDP ||
            tuple.protocol == IPPROTO_SCTP ||
            tuple.protocol == IPPROTO_UDPLITE) {
                /* aggregate ports */
                list_for_each_entry(aggr_p, &aggr_p_list, list)
                        if (unlikely(ntohs(tuple.s_port) >= aggr_p->port1 &&
                            ntohs(tuple.s_port) <= aggr_p->port2)) {
                                tuple.s_port = htons(aggr_p->aggr_port);
                                atomic_inc(&aggr_p->usage);
                                break;
                        }

                list_for_each_entry(aggr_p, &aggr_p_list, list)
                        if (unlikely(ntohs(tuple.d_port) >= aggr_p->port1 &&
                            ntohs(tuple.d_port) <= aggr_p->port2)) {
                                tuple.d_port = htons(aggr_p->aggr_port);
                                atomic_inc(&aggr_p->usage);
                                break;
                        }
        }
        read_unlock(&aggr_lock);
#endif

#ifdef SAMPLING_HASH
        hash = __hash_netflow(&tuple);
        {
                struct sampling hs = samp;

                if (hs.mode == SAMPLER_HASH) {
                        NETFLOW_STAT_INC(pkts_observed);
                        if ((u32)(((u64)hash * hs.interval) >> 32))
                                return IPT_CONTINUE;
                        NETFLOW_STAT_INC(pkts_selected);
                }
        }
        hash %= htable_size;
#else /* !SAMPLING_HASH */
        hash = hash_netflow(&tuple);
#endif
        read_lock(&htable_rwlock);
        stripe = &htable_stripes[hash & LOCK_COUNT_MASK];
        spin_lock(&stripe->lock);
        /* record */
        nf = ipt_netflow_find(&tuple, hash);
        if (unlikely(!nf)) {
                struct rtable *rt;

                if (unlikely(maxflows > 0 && atomic_read(&ipt_netflow_count) >= maxflows)) {
                        /* This is DOS attack prevention */
                        NETFLOW_STAT_INC(maxflows_err);
                        NETFLOW_STAT_INC(pkt_drop);
                        NETFLOW_STAT_ADD(traf_drop, pkt_len);
                        NETFLOW_STAT_TS(drop);
                        goto unlock_return;
                }

                nf = ipt_netflow_alloc(&tuple);
                if (unlikely(!nf || IS_ERR(nf))) {
                        NETFLOW_STAT_INC(alloc_err);
                        NETFLOW_STAT_INC(pkt_drop);
                        NETFLOW_STAT_ADD(traf_drop, pkt_len);
                        NETFLOW_STAT_TS(drop);
                        goto unlock_return;
                }
                hlist_add_head(&nf->hlist, &htable[hash]);

#ifdef ENABLE_SAMPLER
                /* I only increment if deterministic sampler is enabled to
                 * avoid cache conflict by default. */
                if (get_sampler_mode() == SAMPLER_DETERMINISTIC)
                        nf->sampler_count = atomic_inc_return(&flow_count);
#endif
                nf->nf_ts_first = jiffies;
                nf->tcp_flags = tcp_flags;
                nf->o_ifc = if_out? if_out->ifindex : -1;
#ifdef ENABLE_PHYSDEV_OVER
                if (skb->nf_bridge && skb->nf_bridge->physoutdev)
                        nf->o_ifc = skb->nf_bridge->physoutdev->ifindex;
#endif

#ifdef SNMP_RULES
                rcu_read_lock();
#else
# define resolve_snmp(dev) ((dev)? (dev)->ifindex : -1)
#endif
/* copy and snmp-resolve device with physdev overriding normal dev */
#define copy_dev(out, physdev, dev) \
                if (skb->nf_bridge && skb->nf_bridge->physdev) \
                        out = resolve_snmp(skb->nf_bridge->physdev); \
                else \
                        out = resolve_snmp(dev);
#ifdef ENABLE_PHYSDEV
                copy_dev(nf->o_ifphys, physoutdev, if_out);
                copy_dev(nf->i_ifphys, physindev, if_in);
#endif
#ifdef SNMP_RULES
# ifdef ENABLE_PHYSDEV_OVER
                copy_dev(nf->o_ifcr, physoutdev, if_out);
                copy_dev(nf->i_ifcr, physindev, if_in);
# else
                nf->o_ifcr = resolve_snmp(if_out);
                nf->i_ifcr = resolve_snmp(if_in);
# endif
                rcu_read_unlock();

#endif
                nf->s_mask = s_mask;
                nf->d_mask = d_mask;

#if defined(ENABLE_MAC) || defined(ENABLE_VLAN)
                nf->ethernetType = skb->protocol;
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
                rt = (struct rtable *)skb->dst;
#else /* since 2.6.26 */
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31)
                rt = skb->rtable;
#else /* since 2.6.31 */
                rt = skb_rtable(skb);
#endif
#endif
#ifdef ENABLE_DIRECTION
                nf->hooknumx = hooknum + 1;
#endif
                if (likely(family == AF_INET)) {
                        if (rt)
                                nf->nh.ip = rt->rt_gateway;
                } else {
                        if (rt)
                                nf->nh.in6 = ((struct rt6_info *)rt)->rt6i_gateway;
                        nf->flow_label = (iph->ip6.flow_lbl[0] << 16) |
                                (iph->ip6.flow_lbl[1] << 8) | (iph->ip6.flow_lbl[2]);
                }
#if 0
                if (unlikely(debug > 2))
                        printk(KERN_INFO "ipt_NETFLOW: new (%u) %hd:%hd SRC=%u.%u.%u.%u:%u DST=%u.%u.%u.%u:%u\n",
                               atomic_read(&ipt_netflow_count),
                               tuple.i_ifc, nf->o_ifc,
                               NIPQUAD(tuple.src.ip), ntohs(tuple.s_port),
                               NIPQUAD(tuple.dst.ip), ntohs(tuple.d_port));
#endif
        }

        nf->nr_packets++;
        nf->nr_bytes += pkt_len;
        nf->nf_ts_last = jiffies;
        nf->tcp_flags |= tcp_flags;
        nf->options |= options;
        if (tuple.protocol == IPPROTO_TCP)
                nf->tcpoptions |= tcpoptions;

        NETFLOW_STAT_INC(pkt_total);
        NETFLOW_STAT_ADD(traf_total, pkt_len);

#define LIST_IS_NULL(name) (!(name)->next)

        if (unlikely(active_needs_export(nf, active_timeout * HZ, jiffies))) {
                /* ok, if this is active flow to be exported */
#ifdef HAVE_LLIST
                /* delete from hash and add to the export llist */
                hlist_del(&nf->hlist);
                if (!LIST_IS_NULL(&nf->flows_list))
                        list_del(&nf->flows_list);
                llist_add(&nf->flows_llnode, &export_llist);
#else
                /* bubble it to the tail */
                if (LIST_IS_NULL(&nf->flows_list))
                        list_add_tail(&nf->flows_list, &stripe->list);
                else
                        list_move_tail(&nf->flows_list, &stripe->list);
#endif
                /* Blog: I thought about forcing timer to wake up sooner if we have
                 * enough exportable flows, but in fact this doesn't have much sense,
                 * because this would only move flow data from one memory to another
                 * (from our buffers to socket buffers, and socket buffers even have
                 * limited size). But yes, this is disputable. */
        } else {
                /* most recently accessed flows go to the head, old flows remain at the tail */
                if (LIST_IS_NULL(&nf->flows_list))
                        list_add(&nf->flows_list, &stripe->list);
                else
                        list_move(&nf->flows_list, &stripe->list);
        }

unlock_return:
        spin_unlock(&stripe->lock);
        read_unlock(&htable_rwlock);

        return IPT_CONTINUE;
}

#ifdef CONFIG_NF_NAT_NEEDED
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
        /* Below 2.6.31 we don't need to handle callback chain manually. */

#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0)
#define NET_STRUCT struct net *net
#define NET_ARG net,
#define nf_conntrack_event_cb net->ct.nf_conntrack_event_cb
#else
#define NET_STRUCT void
#define NET_ARG
#endif
static int set_notifier_cb(NET_STRUCT)
{
        struct nf_ct_event_notifier *notifier;

        notifier = rcu_dereference(nf_conntrack_event_cb);
        if (notifier == NULL) {
                /* Polite mode. */
                nf_conntrack_register_notifier(NET_ARG &ctnl_notifier);
        } else if (notifier != &ctnl_notifier) {
                if (!saved_event_cb)
                        saved_event_cb = notifier;
                else if (saved_event_cb != notifier)
                        printk(KERN_ERR "natevents_net_init: %p != %p (report error.)\n",
                            saved_event_cb, notifier);
                rcu_assign_pointer(nf_conntrack_event_cb, &ctnl_notifier);
        } else
                printk(KERN_ERR "ipt_NETFLOW: natevents already enabled.\n");
        return 0;
}
static void unset_notifier_cb(NET_STRUCT)
{
        struct nf_ct_event_notifier *notifier;

        notifier = rcu_dereference(nf_conntrack_event_cb);
        if (notifier == &ctnl_notifier) {
                if (saved_event_cb == NULL)
                        nf_conntrack_unregister_notifier(NET_ARG &ctnl_notifier);
                else
                        rcu_assign_pointer(nf_conntrack_event_cb, saved_event_cb);
        } else
                printk(KERN_ERR "ipt_NETFLOW: natevents already disabled.\n");
}

#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0)
#undef nf_conntrack_event_cb
static struct pernet_operations natevents_net_ops = {
        .init = set_notifier_cb,
        .exit = unset_notifier_cb
};
#endif
#endif /* since 2.6.31 */

static DEFINE_MUTEX(events_lock);
/* Both functions may be called multiple times. */
static void register_ct_events(void)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
#define NETLINK_M "nf_conntrack_netlink"
        struct module *netlink_m;
        static int referenced = 0;
#endif

        printk(KERN_INFO "ipt_NETFLOW: enable natevents.\n");
        mutex_lock(&events_lock);

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
        /* Pre-load netlink module who will be first notifier
         * user, and then hijack nf_conntrack_event_cb from it. */
        if (
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0)
            !rcu_dereference(nf_conntrack_event_cb) ||
#endif
            !(netlink_m = find_module(NETLINK_M))) {
                printk("Loading " NETLINK_M "\n");
                request_module(NETLINK_M);
        }
        /* Reference netlink module to prevent it's unsafe unload before us. */
        if (!referenced && (netlink_m = find_module(NETLINK_M))) {
                referenced++;
                use_module(THIS_MODULE, netlink_m);
        }

        /* Register ct events callback. */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0)
        register_pernet_subsys(&natevents_net_ops);
#else
        set_notifier_cb();
#endif
#else /* below v2.6.31 */
        if (!natevents && nf_conntrack_register_notifier(&ctnl_notifier) < 0)
                printk(KERN_ERR "Can't register conntrack notifier, natevents disabled.\n");
        else
#endif
        natevents = 1;
        mutex_unlock(&events_lock);
}

static void unregister_ct_events(void)
{
        printk(KERN_INFO "ipt_NETFLOW: disable natevents.\n");
        mutex_lock(&events_lock);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0)
        unregister_pernet_subsys(&natevents_net_ops);
#else /* < v3.2 */
        unset_notifier_cb();
#endif /* v3.2 */
        rcu_assign_pointer(saved_event_cb, NULL);
#else /* < v2.6.31 */
        nf_conntrack_unregister_notifier(&ctnl_notifier);
#endif
        natevents = 0;
        mutex_unlock(&events_lock);
}
#endif /* CONFIG_NF_NAT_NEEDED */

static struct ipt_target ipt_netflow_reg[] __read_mostly = {
        {
                .name           = "NETFLOW",
                .target         = netflow_target,
                .checkentry     = netflow_target_check,
                .family         = AF_INET,
                .hooks          =
                        (1 << NF_IP_PRE_ROUTING) |
                        (1 << NF_IP_LOCAL_IN) |
                        (1 << NF_IP_FORWARD) |
                        (1 << NF_IP_LOCAL_OUT) |
                        (1 << NF_IP_POST_ROUTING),
                .me             = THIS_MODULE
        },
        {
                .name           = "NETFLOW",
                .target         = netflow_target,
                .checkentry     = netflow_target_check,
                .family         = AF_INET6,
                .hooks          =
                        (1 << NF_IP_PRE_ROUTING) |
                        (1 << NF_IP_LOCAL_IN) |
                        (1 << NF_IP_FORWARD) |
                        (1 << NF_IP_LOCAL_OUT) |
                        (1 << NF_IP_POST_ROUTING),
                .me             = THIS_MODULE
        },
};

#ifdef CONFIG_PROC_FS
static int register_stat(const char *name, struct file_operations *fops)
{
        struct proc_dir_entry *proc_stat;

        printk(KERN_INFO "netflow: registering: /proc/net/stat/%s\n", name);

# if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
        proc_stat = create_proc_entry(name, S_IRUGO, INIT_NET(proc_net_stat));
# else
        proc_stat = proc_create(name, S_IRUGO, INIT_NET(proc_net_stat), fops);
# endif
        if (!proc_stat) {
                printk(KERN_ERR "Unable to create /proc/net/stat/%s entry\n", name);
                return 0;
        }
# if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
        proc_stat->proc_fops = fops;
# endif
# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30)
        proc_stat->owner = THIS_MODULE;
# endif
        printk(KERN_INFO "netflow: registered: /proc/net/stat/%s\n", name);
        return 1;
}
#else
# define register_stat(x, y) 1
#endif

static int __init ipt_netflow_init(void)
{
        int i;

        printk(KERN_INFO "ipt_NETFLOW version %s, srcversion %s\n",
                IPT_NETFLOW_VERSION, THIS_MODULE->srcversion);

        version_string_size = scnprintf(version_string, sizeof(version_string),
                "ipt_NETFLOW " IPT_NETFLOW_VERSION " %s", THIS_MODULE->srcversion);
        tpl_element_sizes[observationDomainName] = version_string_size + 1;

        start_ts.first = ktime_get_real();
        clear_ipt_netflow_stat();

        if (!hashsize) {
                /* use 1/1024 of memory, 1M for hash table on 1G box */
                unsigned long memksize = (num_physpages << PAGE_SHIFT) / 1024;

                if (memksize > (5 * 1024 * 1024))
                        memksize = 5 * 1024 * 1024;
                hashsize = memksize / sizeof(struct hlist_head);
        }
        if (hashsize < LOCK_COUNT)
                hashsize = LOCK_COUNT;
        printk(KERN_INFO "ipt_NETFLOW: hashsize %u (%luK)\n", hashsize,
                hashsize * sizeof(struct hlist_head) / 1024);

        htable_size = hashsize;
        htable = alloc_hashtable(htable_size);
        if (!htable) {
                printk(KERN_ERR "Unable to create ipt_neflow_hash\n");
                goto err;
        }

#ifdef MPLS_DEPTH
        /* template_mpls is terminated on the MPLS_DEPTH mark, so, it
         * never send Element which can access mpls labels array above
         * its defined MPLS_DEPTH value. */
        if (MPLS_DEPTH >= 0 && MPLS_DEPTH < 10)
                template_mpls.types[MPLS_LABELS_BASE_INDEX + MPLS_DEPTH] = 0;
#endif

        for (i = 0; i < LOCK_COUNT; i++) {
                spin_lock_init(&htable_stripes[i].lock);
                INIT_LIST_HEAD(&htable_stripes[i].list);
        }

        ipt_netflow_cachep = kmem_cache_create("ipt_netflow",
                                                sizeof(struct ipt_netflow), 0,
                                                0, NULL
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
                                                , NULL
#endif
                                              );
        if (!ipt_netflow_cachep) {
                printk(KERN_ERR "Unable to create ipt_netflow slab cache\n");
                goto err_free_hash;
        }

        if (!register_stat("ipt_netflow", &nf_seq_fops))
                goto err_free_netflow_slab;
        if (!register_stat("ipt_netflow_snmp", &snmp_seq_fops))
                goto err_free_proc_stat1;
        if (!register_stat("ipt_netflow_flows", &flows_seq_fops))
                goto err_free_proc_stat2;

#ifdef CONFIG_SYSCTL
        ctl_table_renumber(netflow_sysctl_table);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
        netflow_sysctl_header = register_sysctl_table(netflow_net_table
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21)
                                                      , 0 /* insert_at_head */
#endif
                                                      );
#else /* 2.6.25 */
        netflow_sysctl_header = register_sysctl_paths(netflow_sysctl_path, netflow_sysctl_table);
#endif
        if (!netflow_sysctl_header) {
                printk(KERN_ERR "netflow: can't register to sysctl\n");
                goto err_free_proc_stat3;
        } else
                printk(KERN_INFO "netflow: registered: sysctl net.netflow\n");
#endif

        if (!destination)
                destination = destination_buf;
        if (destination != destination_buf) {
                strlcpy(destination_buf, destination, sizeof(destination_buf));
                destination = destination_buf;
        }
        if (add_destinations(destination) < 0)
                goto err_free_sysctl;

#ifdef ENABLE_AGGR
        if (!aggregation)
                aggregation = aggregation_buf;
        if (aggregation != aggregation_buf) {
                strlcpy(aggregation_buf, aggregation, sizeof(aggregation_buf));
                aggregation = aggregation_buf;
        }
        add_aggregation(aggregation);
#endif

#ifdef ENABLE_SAMPLER
        if (!sampler)
                sampler = sampler_buf;
        if (sampler != sampler_buf) {
                strlcpy(sampler_buf, sampler, sizeof(sampler_buf));
                sampler = sampler_buf;
        }
        parse_sampler(sampler);
#ifdef SAMPLING_HASH
        hash_seed = prandom_u32();
#endif
#endif

#ifdef SNMP_RULES
        if (!snmp_rules)
                snmp_rules = snmp_rules_buf;
        if (snmp_rules != snmp_rules_buf) {
                strlcpy(snmp_rules_buf, snmp_rules, sizeof(snmp_rules_buf));
                snmp_rules = snmp_rules_buf;
        }
        add_snmp_rules(snmp_rules);
#endif

#ifdef ENABLE_PROMISC
        {
                int newpromisc = promisc;

                promisc = 0;
                switch_promisc(newpromisc);
        }
#endif

        netflow_switch_version(protocol);
        _schedule_scan_worker(0);
        setup_timer(&rate_timer, rate_timer_calc, 0);
        mod_timer(&rate_timer, jiffies + (HZ * SAMPLERATE));

        peakflows_at = jiffies;
        if (xt_register_targets(ipt_netflow_reg, ARRAY_SIZE(ipt_netflow_reg)))
                goto err_stop_timer;

#ifdef CONFIG_NF_NAT_NEEDED
        if (natevents)
                register_ct_events();
#endif

        printk(KERN_INFO "ipt_NETFLOW is loaded.\n");
        return 0;

err_stop_timer:
        _unschedule_scan_worker();
        netflow_scan_and_export(AND_FLUSH);
        del_timer_sync(&rate_timer);
        free_templates();
        destination_removeall();
#ifdef ENABLE_AGGR
        aggregation_remove(&aggr_n_list);
        aggregation_remove(&aggr_p_list);
#endif
err_free_sysctl:
#ifdef CONFIG_SYSCTL
        unregister_sysctl_table(netflow_sysctl_header);
#endif
err_free_proc_stat3:
#ifdef CONFIG_PROC_FS
        remove_proc_entry("ipt_netflow_flows", INIT_NET(proc_net_stat));
err_free_proc_stat2:
        remove_proc_entry("ipt_netflow_snmp", INIT_NET(proc_net_stat));
err_free_proc_stat1:
        remove_proc_entry("ipt_netflow", INIT_NET(proc_net_stat));
err_free_netflow_slab:
#endif
        kmem_cache_destroy(ipt_netflow_cachep);
err_free_hash:
        vfree(htable);
err:
        printk(KERN_INFO "ipt_NETFLOW is not loaded.\n");
        return -ENOMEM;
}

static void __exit ipt_netflow_fini(void)
{
        printk(KERN_INFO "ipt_NETFLOW unloading..\n");

#ifdef CONFIG_SYSCTL
        unregister_sysctl_table(netflow_sysctl_header);
#endif
#ifdef CONFIG_PROC_FS
        remove_proc_entry("ipt_netflow_flows", INIT_NET(proc_net_stat));
        remove_proc_entry("ipt_netflow_snmp", INIT_NET(proc_net_stat));
        remove_proc_entry("ipt_netflow", INIT_NET(proc_net_stat));
#endif
#ifdef ENABLE_PROMISC
        switch_promisc(0);
#endif
        xt_unregister_targets(ipt_netflow_reg, ARRAY_SIZE(ipt_netflow_reg));
#ifdef CONFIG_NF_NAT_NEEDED
        if (natevents)
                unregister_ct_events();
#endif
        _unschedule_scan_worker();
        netflow_scan_and_export(AND_FLUSH);
        del_timer_sync(&rate_timer);

        synchronize_sched();

        free_templates();
        destination_removeall();
#ifdef ENABLE_AGGR
        aggregation_remove(&aggr_n_list);
        aggregation_remove(&aggr_p_list);
#endif
#ifdef SNMP_RULES
        kfree(snmp_ruleset);
#endif

        kmem_cache_destroy(ipt_netflow_cachep);
        vfree(htable);

        printk(KERN_INFO "ipt_NETFLOW unloaded.\n");
}

module_init(ipt_netflow_init);
module_exit(ipt_netflow_fini);

/* vim: set sw=8: */
ipt-netflow-2.2/raw_promisc.patch0000644000000000000000000000147613213006644015700 0ustar  rootroot
 This simple hack will allow to see promisc traffic in raw table of
 iptables. Of course you will need to enable promisc on the interface.
 Refer to README.promisc for details.

 Example how to catch desired traffic:
   iptables -A PREROUTING -t raw -i eth2 -j NETFLOW


--- linux-2.6.26/net/ipv4/ip_input.old.c        2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.26/net/ipv4/ip_input.c        2008-08-06 14:02:16.000000000 +0400
@@ -378,12 +378,6 @@
        struct iphdr *iph;
        u32 len;
 
-       /* When the interface is in promisc. mode, drop all the crap
-        * that it receives, do not try to analyse it.
-        */
-       if (skb->pkt_type == PACKET_OTHERHOST)
-               goto drop;
-
        IP_INC_STATS_BH(IPSTATS_MIB_INRECEIVES);
 
        if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
ipt-netflow-2.2/testing.sh0000755000000000000000000000171713213006644014344 0ustar  rootroot#!/bin/bash

set -e

if [ "$1" = "" ]; then
  echo Maintainer only tool.
  exit 1
elif [ "$1" = all ]; then
  exec bash $0 linux-2.6.18 centos5 linux-3.11.2 centos6 linux-3.4.66 linux-3.9.11 centos7 linux-3.14 linux-3.17 linux-3.19
  exit 1
fi

smilint IPT-NETFLOW-MIB.my

cfg=()
echo -n Testing for:
for k in "$@"; do
  if [ ! -d /usr/src/$k ]; then continue; fi
  echo -n " $k"
  cfg+=("./configure --kdir=/usr/src/$k")
done
echo

readarray -t opts <<EOF
  --disable-snmp-agent
  --enable-aggregation
  --enable-natevents
  --enable-snmp-rules
  --enable-macaddress
  --enable-vlan
  --promisc-mpls
  --enable-direction
  --enable-sampler
  --enable-sampler=hash
  --enable-promisc --promisc-mpls
  --enable-physdev
  --enable-physdev-override
EOF
if [ "$SHORT" ]; then
  opts=("$SHORT")
fi

colorecho() {
  echo -e "\033[1;32m$@\033[m"
}
for i in "${cfg[@]}"; do
  for j in "${opts[@]}"; do
    echo
    colorecho == $i $j
    echo
    $i $j -Werror
    make
  done
done

ipt-netflow-2.2/NEWS0000644000000000000000000000552713213006644013032 0ustar  rootrootipt-netflow NEWS
================

2.2 (2016-02-21)

    * Minor feature and Maintenance release.
       - Decapsulate MPLS in promisc mode and MPLS-aware NetFlow feature.
       - Export flowEndReason for IPFIX.
       - Promics mode improvements.
       - Allow export destination to be IPv6 address.
       - Move flows list from debugfs to proc.
       - Compilation compatibility with latest kernels.
       - Code is Coverity scanned.

2.1 (2014-02-08)

    * Options Templates support (V9 and IPFIX). Which let to implement:
       - Flow Sampling (random, deterministic, and hash modes) for all types
         of NetFlow protocols (V5, V9, IPFIX).
       - Export Statistics (metering, exporting, sampling) and Configuration.
       - Export Interface list (ifName, ifDescr).
    * Promisc hack (no need to patch kernel anymore).
    * SNMP monitoring interface and agent (via net-snmp dlmod).
    * More compilation compatibility with recent kernels, grsecurity kernels,
      Gentoo, Debian, Centos. DKMS install support.
    * Minor features: IPSec flows, Direction Element. Removed support for
      CONNMARK. Bug fixes and improvements.
    * irqtop tool (ruby).

2.0.1 (2014-09-04)

    * Minor fixes for 2.0 release.

2.0 (2014-08-07)

    * This is major release with a lot of new features and improvements, such
      as:
        - Support of NetFlow v9 and IPFIX.
        - IPv6 support.
        - NAT translation events (NEL).
        - Additional options is SNMP-index translation rules, Ethernet Type,
          VLAN, and MAC addresses exporting.
        - Performance improvements (tested to work well on 10Gbit load).
        - Stability improvements and bug fixes.

1.8 (2012-07-02)

    * This is minor bug fix release with small improvements.

1.7.1 (2011-04-04)

    * This is minor release with improved compilation compatibility and small
      improvements.

1.7 (2011-01-30)

    * This version have improved compilation compatibility with latest Linux
      kernels (2.6.36.3 and 2.6.27) and bunch of small improvements.

    * Since version 1.7 ipt-netflow's repository moved to Git SCM. Use of CVS
      repository is deprecated.

1.5.1 (2009-03-14)

    * This version have improved compliance to NetFlow standard and
      compatibility with iptables/xtables 1.4.x. Added options to configure
      script for manual customization.

1.4 (2008-12-23)

    * This version have restored compatibility with popular kernel 2.6.18,
      added configure script, statistics improvements, some fixes for 64-bit
      platforms, and minor performance tune up.

1.2 (2008-11-15)

    * This version have stability enhancements, documentation improvements, IP
      frag support, better statistics.

1.1 (2008-08-06)

    * This version updated compatibility with latest (2.6.26) Linux kernel, have
      more support for promisc patch, and minor fixes.

1.0 (2008-07-12)

    * First release tested in production environment.
ipt-netflow-2.2/libipt_NETFLOW.c0000644000000000000000000000500713213006644015151 0ustar  rootroot/*
 * iptables helper for NETFLOW target
 * <abc@telekom.ru>
 *
 *
 *   This file is part of NetFlow exporting module.
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <getopt.h>
#include <net/if.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>

#define __EXPORTED_HEADERS__
#ifdef XTABLES
#include <xtables.h>
#else
#include <iptables.h>
#endif

#ifdef XTABLES_VERSION_CODE     // since 1.4.1
#define MOD140
#define iptables_target         xtables_target
#endif

#ifdef iptables_target          // only in 1.4.0
#define MOD140
#endif

#ifdef MOD140
#define ipt_entry_target        xt_entry_target
#define register_target         xtables_register_target
#define _IPT_ENTRY              void
#define _IPT_IP                 void
#ifndef IPT_ALIGN
#define IPT_ALIGN               XT_ALIGN
#endif
#else // before 1.3.x
#define _IPT_ENTRY struct ipt_entry
#define _IPT_IP struct ipt_ip
#endif

#ifndef IPTABLES_VERSION
#define IPTABLES_VERSION XTABLES_VERSION
#endif

static struct option opts[] = {
  { 0 }
};

static void help(void)
{
        printf("NETFLOW target\n");
}

static int parse(int c, char **argv, int invert, unsigned int *flags,
             const _IPT_ENTRY  *entry,
             struct ipt_entry_target **targetinfo)

{
        return 1;
}

static void final_check(unsigned int flags)
{
}

static void save(const _IPT_IP *ip, const struct ipt_entry_target *match)
{
}

static void print(const _IPT_IP *ip,
      const struct ipt_entry_target *target,
      int numeric)
{
        printf("NETFLOW ");
}

static struct iptables_target netflow = { 
        .next           = NULL,
        .name           = "NETFLOW",
        .version        = IPTABLES_VERSION,
        .size           = IPT_ALIGN(0),
        .userspacesize  = IPT_ALIGN(0),
        .help           = &help,
        .parse          = &parse,
        .final_check    = &final_check,
        .print          = &print,
        .save           = &save,
        .extra_opts     = opts
};

#ifndef _init
#define _init __attribute__((constructor)) _INIT
#endif
void _init(void)
{
        register_target(&netflow);
}
ipt-netflow-2.2/configure0000755000000000000000000004317513213006644014243 0ustar  rootroot#!/bin/sh

PATH=$PATH:/bin:/usr/bin:/usr/sbin:/sbin:/usr/local/sbin

case "$1" in
  --from-dkms-conf*)
    KDKMS=`echo "$1" | sed 's/[^=]*.//'`
    # restore options from existing Makefile, if present
    if [ -e Makefile ]; then
      set -- `sed -n 's/^CARGS = \(.*\)/\1/p' Makefile`
      FROMDKMSCONF=1
    fi
    ;;
esac

error() {
  printf "! Error: $*\n"
  exit 1
}

iptables_src_version() {
  test "$IPTSRC" || return 1

  echo -n "Checking iptables sources version: "
  SRC="$IPTSRC/Makefile"
  test -s "$SRC" || error "Please build iptables first."
  VER=`sed -n 's/^\(IPTABLES_\)\?VERSION[ :]= \?//p' "$SRC"`
  test "$VER" || error "Unknown version of iptables."
  if [ "$VER" = "$IPTVER" ]; then
    echo "$VER (ok)"
  else
    echo "$VER"
    error "Source version ($VER) doesn't match binary ($IPTVER)"
  fi
}

get_lib_dir() {
  test -s "$1" && LIB=`sed -n 's/.*_LIB_DIR "\(.*\)"/\1/p' "$1"`
  if [ "$LIB" ]; then
    IPTLIB=$LIB
    echo "$IPTLIB (from sources)"
    return 0
  fi
  return 1
}

get_lib_from_bin() {
  LIB=`strings $IPTBIN | grep ^/.*lib.*/.*tables`
  if [ "$LIB" ]; then
    IPTLIB=$LIB
    echo "$IPTLIB (from binary)"
    return 0
  fi
  return 1
}

get_lib_from_lib() {
  XLIB=`/usr/bin/ldd $IPTBIN | grep libxtables | sed -n 's!.* \(/[^ ]\+\).*!\1!p'`
  test "$XLIB" || return 1
  LIB=`strings $XLIB | grep ^/.*lib.*/.*tables`
  if [ "$LIB" ]; then
    IPTLIB=$LIB
    echo "$IPTLIB (from libxtables.so, from binary)"
    return 0
  fi
  return 1
}

iptables_inc() {
  echo -n "Iptables include flags: "
  if [ "$IPTINC" ]; then
    echo "$IPTINC (user specified)"
  elif [ "$PKGVER" ]; then
    IPTINC="$PKGINC"
    echo "$IPTINC (pkg-config)"
  elif [ "$NOIPTSRC" ]; then
    IPTINC=
    echo "none (default)"
  else
    IPTINC="$IPTSRC/include"
    IPTINC="-I$IPTINC"
    echo "$IPTINC (from source)"
  fi
}

iptables_modules() {
  echo -n "Iptables module path: "
  if [ "$IPTLIB" ]; then
    echo "$IPTLIB (user specified)"
  else
    if [ "$PKGLIB" ]; then
      IPTLIB="$PKGLIB"
      echo "$IPTLIB (pkg-config)"
    else
      get_lib_dir "$IPTSRC/include/iptables.h" && return 0
      get_lib_dir "$IPTSRC/include/xtables.h" && return 0
      get_lib_dir "$IPTSRC/xtables/internal.h" && return 0
      get_lib_from_bin && return 0
      get_lib_from_lib && return 0
      error "can not find, try setting it with --ipt-lib="
    fi
  fi
}

try_dir() {
  if [ -d "$1/include" ]; then
    echo "Found iptables sources at $1"
    IPTSRC=$1
    return 0
  fi
  return 1
}

try_dirg() {
  try_dir "$1" && return 0
  try_dir "$1.git" && return 0
}

try_dir2() {
  test -d "$1" && try_dir `dirname $1` && return 0
}

check_pkg_config() {
  test "$PKGWARN" && return 1
  if ! which pkg-config >/dev/null 2>&1; then
    echo "! You don't have pkg-config, it may be useful to install it."
    PKGWARN=1
    return 1
  fi
  return 0
}
iptables_find_version() {
  echo -n "Iptables binary version: "
  if [ "$IPTVER" ]; then
    echo "$IPTVER (user specified)"
  else
    IPTVER=`$IPTBIN -V 2>/dev/null | sed -n s/iptables.v//p`
    if [ "$IPTVER" ]; then
      echo "$IPTVER (detected from $IPTBIN)"
      return
    else
      echo "no iptables binary found"
    fi
    check_pkg_config
    PKGVER=`pkg-config --modversion xtables 2>/dev/null`
    if [ "$PKGVER" ]; then
      IPTVER="$PKGVER"
      echo "Xtables version: $IPTVER (detected from `which pkg-config`)"
      return
    fi
    error "Can not find iptables version, try setting it with --ipt-ver="
  fi
}

compiler_presence_test() {
  echo -n "Check for working gcc: "
  $CC -v >/dev/null 2>&1
  if [ $? = 0 ]; then
    echo Yes "($CC)"
  else
    echo No

    echo "! You need gcc to install module from source"
    if [ -s /etc/debian_version ]; then
      NAME=Debian
      if [ -e /etc/os-release ]; then
        . /etc/os-release >/dev/null 2>&1
      fi
      echo "! "
      echo "! Under $NAME try to run this:"
      echo "!   root# apt-get install gcc"
      echo "! "
    elif [ -s /etc/redhat-release ]; then
      echo "! "
      echo "! Under Centos try to run this:"
      echo "!   root# yum install gcc"
      echo "! "
    fi
    exit 1
  fi
}

compile_libitp_test() {
  local FLAGS
  local MSG
  echo -n "Checking for presence of $@... "
  if [ "$IPTINC" ]; then
    FLAGS=$IPTINC
    MSG="(using ipt-inc)"
  elif [ "$PKGINC" ]; then
    FLAGS=$PKGINC
    MSG="(using pkg-config)"
  else
    FLAGS=
    MSG=
  fi
  echo "
#define __EXPORTED_HEADERS__
#include <$*>" > test.c
  $CC -c test.c $FLAGS >/dev/null 2>&1
  RET=$?
  if [ $RET = 0 ]; then
    echo Yes $MSG;
  else
    echo No;
  fi
  rm -f test.c test.o
  return $RET
}

iptables_try_pkgconfig() {
  if [ ! "$PKGVER" ]; then
    check_pkg_config
    PKGVER=`pkg-config --modversion xtables 2>/dev/null`
    TRYPKGVER=`pkg-config --modversion xtables 2>/dev/null`
    echo -n "pkg-config for version $IPTVER exists: "
    pkg-config --exact-version=$IPTVER xtables 2>/dev/null
    if [ $? = 0 ]; then
      echo "Yes"
      PKGVER=$TRYPKGVER
    else
      if [ "$TRYPKGVER" ]; then
        echo "No (reported: $TRYPKGVER)"
      else
        echo "No"
      fi
      PKGVER=
    fi
  fi
  if [ "$PKGVER" ]; then
    check_pkg_config
    PKGVER=`pkg-config --modversion xtables 2>/dev/null`
    PKGINC=`pkg-config --cflags xtables`
    PKGLIB=`pkg-config --variable=xtlibdir xtables`
  elif expr "$IPTVER" : '^1\.3' >/dev/null; then
    echo "! This version of iptables ($IPTVER) will be treated as old version."
    # Newer versions of iptables should not have -I/kernel/include!
    # So I assume that newer version will have correct pkg-config set up
    # and if not, then it's older who need it.
    IPTCFLAGS="-I$KDIR/include -DIPTABLES_VERSION=\\\\\"$IPTVER\\\\\""
  fi
  compiler_presence_test
  if compile_libitp_test xtables.h; then
    IPTCFLAGS="-DXTABLES $IPTCFLAGS"
  elif ! compile_libitp_test iptables.h; then
    echo "! Iptables headers not found. You may need to specify --ipt-inc=..."
    if [ -s /etc/debian_version ]; then
      echo "! "
      echo "! Under Debian simply run this:"
      echo "!   root# apt-get install iptables-dev pkg-config"
    elif [ -s /etc/redhat-release ]; then
      echo "! "
      arch=.`uname -m`
      echo "! Under Centos simply run this:"
      echo "!   root# yum install iptables-devel$arch pkgconfig"
    fi
    exit 1
  fi

}

iptables_find_src() {
  test "$IPTINC" && return 1
  test "$PKGVER" && return 1

  VER="iptables-$IPTVER"
  if [ "$IPTSRC" ]; then
    echo "User specified source directory: $IPTSRC"
    try_dir $IPTSRC || error "Specified directory is not iptables source.."
  else
    echo "Searching for $VER sources.."
    try_dir "./$VER" && return 0
    try_dir "../$VER" && return 0
    try_dir "/usr/src/$VER" && return 0
    try_dirg "iptables" && return 0
    try_dirg "../iptables" && return 0
    try_dirg "/usr/src/iptables" && return 0
    try_dir2 `locate $VER/extensions 2>/dev/null | head -1` && return 0
    echo "! Can not find iptables source directory, you may try setting it with --ipt-src="
    echo "! This is not fatal error, yet. Will be just using default include dir."
    NOIPTSRC=1
  fi
}

show_help() {
  echo "Possible options:"
  echo "  --ipt-ver=..  iptables version (ex.: 1.4.2)"
  echo "  --ipt-bin=..  iptables binary to use (ex.: /usr/sbin/iptables)"
  echo "  --ipt-src=..  directory for iptable source (ex.: ../iptables-1.4.2)"
  echo "  --ipt-lib=..  iptable modules path (ex.: /usr/libexec/xtables)"
  echo "  --ipt-inc=..  directory for iptable headers (ex.: /usr/include)"
  echo "  --kver=..     kernel version (ex.: 2.6.30-std-def-alt15)"
  echo "  --kdir=..     directory for kernel source (ex.: /usr/src/kernel)"
  echo "  --enable-natevents     enables natevents support"
  echo "  --enable-snmp-rules    enables SNMP-index conversion rules"
  echo "  --enable-macaddress    enables MAC address for v9/IPFIX"
  echo "  --enable-vlan          enables VLAN Ids for v9/IPFIX"
  echo "  --enable-direction     enables flowDirection(61) Element"
  echo "  --enable-sampler       enables Flow Sampling"
  echo "  --enable-sampler=hash  enables Hash sampler"
  echo "  --enable-aggregation   enables aggregation rules"
  echo "  --enable-promisc       enables promisc hack mode"
  echo "  --promisc-mpls         decapsulate MPLS in promisc mode"
  echo "  --promisc-mpls=N       -- and record N labels (default 3)"
  echo "  --enable-physdev       enables physdev reporting"
  echo "  --enable-physdev-override      to override interfaces"
  echo "  --disable-snmp-agent   disables net-snmp agent"
  echo "  --disable-dkms         disables DKMS support completely"
  echo "  --disable-dkms-install  no DKMS install but still create dkms.conf"
  exit 0
}

CARGS="$@"
for ac_option
do
  case "$ac_option" in
    -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
    *) ac_optarg= ;;
  esac

  case "$ac_option" in
    --ipt-bin=*) IPTBIN="$ac_optarg" ;;
    --ipt-lib=*) IPTLIB="$ac_optarg" ;;
    --ipt-src=*) IPTSRC="$ac_optarg" ;;
    --ipt-ver=*) IPTVER="$ac_optarg" ;;
    --ipt-inc=*) IPTINC="-I$ac_optarg" ;;
    --kver=*)  KVERSION="$ac_optarg" ;;
    --kdir=*)      KDIR="$ac_optarg" ;;
    --enable-nat*)   KOPTS="$KOPTS -DENABLE_NAT" ;;
    --enable-mac*)   KOPTS="$KOPTS -DENABLE_MAC" ;;
    --enable-vlan*)  KOPTS="$KOPTS -DENABLE_VLAN" ;;
    --enable-direc*) KOPTS="$KOPTS -DENABLE_DIRECTION" ;;
    --enable-sampl*hash) KOPTS="$KOPTS -DENABLE_SAMPLER -DSAMPLING_HASH" ;;
    --enable-sampl*) KOPTS="$KOPTS -DENABLE_SAMPLER" ;;
    --enable-aggr*)  KOPTS="$KOPTS -DENABLE_AGGR" ;;
    --enable-promi*)   ENABLE_PROMISC=1 ;;
    --promisc-mpls*)   ENABLE_PROMISC=1; PROMISC_MPLS=1; MPLS_DEPTH=${ac_optarg:-3} ;;
    --enable-snmp-r*)  KOPTS="$KOPTS -DSNMP_RULES" ;;
    --enable-physdev)       KOPTS="$KOPTS -DENABLE_PHYSDEV" ;;
    --enable-physdev-over*) KOPTS="$KOPTS -DENABLE_PHYSDEV_OVER" ;;
    --disable-snmp-a*)   SKIPSNMP=1 ;;
    --disable-net-snmp*) SKIPSNMP=1 ;;
    --disable-dkms*)     SKIPDKMS=1 ;;
    --from-dkms-conf*) ;;
    --make) echo called from make; CARGS=`echo $CARGS | sed s/--make//g` ;;
    -Werror) KOPTS="$KOPTS -Werror" ;;
    --help|-h) show_help ;;
    -*) echo Invalid option: $ac_option; exit 1 ;;
#    *) ni="$ni $ac_option" ;;
  esac
done

if [ "$ENABLE_PROMISC" = 1 ]; then KOPTS="$KOPTS -DENABLE_PROMISC"; fi
if [ "$PROMISC_MPLS" = 1 ]; then
  KOPTS="$KOPTS -DPROMISC_MPLS"
  case "$MPLS_DEPTH" in (*[!0-9]*|"") MPLS_DEPTH=1 ;; esac
  if [ "$MPLS_DEPTH" -lt 1 ]; then
    echo "! Requested MPLS stack depth is too small, limiting to 1."
  elif [ "$MPLS_DEPTH" -gt 10 ]; then
    echo "! Requested MPLS stack depth is too big, limiting to 10."
    MPLS_DEPTH=10;
  fi
  if [ "$MPLS_DEPTH" -ge 1 ]; then KOPTS="$KOPTS -DMPLS_DEPTH=$MPLS_DEPTH"; fi
fi

kernel_find_version() {
  KHOW=requested
  test "$KVERSION" && return 0

  if grep -q '#.*Debian' /proc/version; then
    KHOW=proc
    KVERSION=`sed -n 's/.*#.*Debian \([0-9\.]\+\)-.*/\1/p' /proc/version`
    KLIBMOD=`uname -r`
  else
    KHOW=uname
    KVERSION=`uname -r`
  fi
  test "$KDIR" || return 0

  test -s $KDIR/Makefile || return 1
  test -s $KDIR/include/config/kernel.release || return 1
  KVERSION=`cat $KDIR/include/config/kernel.release`
  KHOW=sources
}

kernel_check_src() {
  if [ -s "$1/Makefile" ]; then
    KDIR="$1"
    return 0
  fi
  return 1
}

kernel_check_src2() {
  if kernel_check_src $1/source; then
    KSRC=$KDIR
  fi
  kernel_check_src $1/build
}

kernel_find_source() {
  if [ "$KDKMS" ]; then
    # dkms args is highest priority
    KDIR=$KDKMS
    KSHOW=dkms
    return 0
  fi
  KSHOW=requested
  test "$KDIR" && return 0
  KSHOW=found
  kernel_check_src2 /lib/modules/$KLIBMOD  && return 0
  kernel_check_src2 /lib/modules/$KVERSION && return 0
  kernel_check_src  /usr/src/kernels/$KVERSION && return 0
  kernel_check_src  /usr/src/linux-$KVERSION && return 0
  echo "! Linux source not found. Don't panic. You may specify kernel source"
  echo "! directory with --kdir=..., or try to install kernel-devel package,"
  echo "! or just raw sources for linux-$KVERSION from kernel.org."
  if grep -q -i centos /proc/version 2>/dev/null; then
    echo "! "
    arch=.`uname -m`
    echo "! Under Centos simply run this:"
    echo "!   root# yum install kernel-devel iptables-devel$arch pkgconfig"
  fi
  if grep -q -i debian /proc/version 2>/dev/null; then
    echo "! "
    echo "! Under Debian simply run this:"
    echo "!   root# apt-get install module-assistant iptables-dev pkg-config"
    echo "!   root# m-a prepare"
  fi
  exit 1
}

kernel_check_consistency() {
  if [ -s $KDIR/include/config/kernel.release ]; then
    SRCVER=`cat $KDIR/include/config/kernel.release`
    if [ "$KVERSION" != "$SRCVER" ]; then
      echo "! Warning: $KHOW kernel version ($KVERSION) and $KSHOW version of kernel source ($SRCVER) doesn't match!"
      echo "!   You may try to specify only kernel source tree with --kdir=$KDIR"
      echo "!   and configure will pick up version properly."
      echo "! Assuming you want to build for $SRCVER"
      KVERSION=$SRCVER
    fi
  fi
  test -e "$KDIR/.config" || error ".config in kernel source not found, run  make menuconfig  in $KDIR"
  test -d "$KDIR/include/config" || error "kernel is not prepared, run  make prepare modules_prepare  in $KDIR"
}

kconfig() {
  KCONFIG=$KDIR/.config
  if ! grep -q "^$1=" $KCONFIG 2>/dev/null; then
    if [ "$KCONFIGREPORTED" != true ]; then
      KCONFIGREPORTED=true
      echo Kernel config file checked: $KCONFIG
      echo
    fi
    echo "! Attention: $1 is undefined in your kernel configuration"
    echo "!   Without this option enabled $2 will not work."
    echo
  fi
}

kernel_check_config() {
  kconfig CONFIG_SYSCTL                 "sysctl interface"
  kconfig CONFIG_PROC_FS                "proc interface"
  kconfig CONFIG_NF_NAT_NEEDED          "natevents"
  kconfig CONFIG_NF_CONNTRACK_EVENTS    "natevents"
  kconfig CONFIG_IPV6                   "IPv6"
  kconfig CONFIG_IP6_NF_IPTABLES        "ip6tables target"
}

kernel_check_include() {
  echo -n "Checking for presence of $1... "
  if [ "$KSRC" -a -e $KSRC/$1 ]; then
    echo Yes
    KOPTS="$KOPTS $2"
  elif [ -e $KDIR/$1 ]; then
    echo Yes
    KOPTS="$KOPTS $2"
  else
    echo No
  fi
}

kernel_check_features() {
  kernel_check_include include/linux/llist.h -DHAVE_LLIST
  kernel_check_include include/linux/grsecurity.h -DHAVE_GRSECURITY_H
}

snmp_check() {
  SNMPTARGET=
  SNMPINSTALL=
  test "$SKIPSNMP" && return

  echo -n "Searching for net-snmp-config... "
  if which net-snmp-config >/dev/null 2>&1; then
    echo Yes `which net-snmp-config`
  else
    echo No.
    SNMPCONFIG=no
  fi

  echo -n "Searching for net-snmp agent... "
  if [ -s /etc/redhat-release ]; then
    if ! rpm --quiet -q net-snmp; then
      echo No.
      SNMPADD="do:  yum install net-snmp"
      if [ "$SNMPCONFIG" ]; then
        SNMPADD="$SNMPADD net-snmp-devel"
      fi
    else
      echo Yes.
    fi
    if [ "$SNMPCONFIG" ]; then
      SNMPCONFIG="run:  yum install net-snmp-devel"
    fi
  elif [ -s /etc/debian_version ]; then
    if ! dpkg -s snmpd >/dev/null 2>&1; then
      echo No.
      SNMPADD="do:  apt-get install snmpd"
      if [ "$SNMPCONFIG" ]; then
        SNMPADD="$SNMPADD libsnmp-dev"
      fi
    else
      echo Yes.
    fi
    if [ "$SNMPCONFIG" ]; then
      SNMPCONFIG="run:  apt-get install libsnmp-dev"
    fi
  elif [ -s /etc/snmp/snmpd.conf ]; then
    echo Yes.
  else
    echo No.
    SNMPADD="install net-snmp (www.net-snmp.org)"
    SNMPCONFIG="reinstall net-snmp with agent support."
  fi

  if [ "$SNMPADD" ]; then
    echo " Assuming you don't want net-snmp agent support".
    echo " Otherwise $SNMPADD"
    return
  elif [ "$SNMPCONFIG" ]; then
    echo "! You have net-snmp agent but not development package."
    echo "! net-snmp agent will not be built, to fix:"
    echo "!   $SNMPCONFIG"
    return
  fi

  SNMPTARGET=snmp_NETFLOW.so
  SNMPINSTALL=sinstall
}

dkms_check() {
  DKMSINSTALL=
  test "$SKIPDKMS" && return

  echo -n "Checking for DKMS... "
  if ! which dkms >/dev/null 2>&1; then
    echo "No. (It may be useful to install it.)"
    echo "! "
    echo "! DKMS is method of installing kernel modules, that will"
    echo "! automatically recompile module after kernel upgrade."
    if [ -s /etc/debian_version ]; then
      echo "! "
      echo "! To install it under Debian simply run this:"
      echo "!   root# apt-get install dkms"
      echo "! "
    elif [ -s /etc/redhat-release ]; then
      echo "! "
      echo "! To install it under Centos enable EPEL or RPMforge repository,"
      echo "! then run this:"
      echo "!   root# yum install dkms"
      echo "! "
    fi
    return
  fi
  echo Yes.
  DKMSINSTALL=dinstall
  test "$FROMDKMSCONF" && return
  if dkms status | grep ^ipt-netflow, >/dev/null; then
    echo "! You are already have module installed via DKMS"
    echo "!   it will be uninstalled on 'make install' and"
    echo "!   current version of module installed afterwards."
    echo "! Use --disable-dkms option if don't want this."
  fi
}

kernel_find_version     #KVERSION
test "$KLIBMOD" || KLIBMOD=$KVERSION
echo "Kernel version: $KVERSION ($KHOW)"
kernel_find_source      #KDIR
echo "Kernel sources: $KDIR ($KSHOW)"
kernel_check_consistency
kernel_check_config
kernel_check_features

CC=${CC:-gcc}
test "$IPTBIN" || IPTBIN=`which iptables`

iptables_find_version   #IPTVER
iptables_try_pkgconfig  #try to configure from pkg-config
iptables_find_src       #IPTSRC
iptables_src_version    #check that IPTSRC match to IPTVER
iptables_inc            #IPTINC
iptables_modules        #IPTLIB

snmp_check
dkms_check

REPLACE="\
s!@CARGS@!$CARGS!;\
s!@KVERSION@!$KVERSION!;\
s!@KDIR@!$KDIR!;\
s!@KOPTS@!$KOPTS!;\
s!@SNMPTARGET@!$SNMPTARGET!;\
s!@SNMPINSTALL@!$SNMPINSTALL!;\
s!@DKMSINSTALL@!$DKMSINSTALL!;\
s!@IPTABLES_VERSION@!$IPTVER!;\
s!@IPTABLES_CFLAGS@!$IPTCFLAGS $IPTINC!;\
s!@IPTABLES_MODULES@!$IPTLIB!"

echo -n "Creating Makefile.. "
 sed "$REPLACE" Makefile.in > Makefile
 echo done.
echo
echo "  If you need some options enabled run ./configure --help"
echo "  Now run: make all install"
echo

ipt-netflow-2.2/openwrt/0000755000000000000000000000000013213006644014020 5ustar  rootrootipt-netflow-2.2/openwrt/Readme.md0000644000000000000000000000360113213006644015537 0ustar  rootrootCross-compiling and packages for openwrt
===

Place Makefile in `packages/network/ipt-netflow` directory in OpenWRT bouldroot.
Run `make menuconfig` and select package in Network/Netflow menu. Configure args partially supported.

Run `make` to build full firmware or `make package/network/ipt-netflow/{clean,prepare,configure,compile,install}` to rebuild packages.

To make git version uncomment two lines in Makefile.

Tested to work on Chaos Calmer and Designated Driver with Atheros AR7xxx/AR9xxx target.

For ipt-netflow 2.2 patches are needed, drop it for next version or git master to build.

Making and installilng
===

```shell
mkdir debian-toolchain
sudo debootstrap jessie debian-toolchain
sudo chroot debian-toolchain

. /etc/profile
apt update
apt install git ssh-client build-essential mercurial subversion \
   binutils flex bzip2 asciidoc ncurses-dev libssl-dev gawk zlib1g-dev fastjar

adduser user
su user
. /etc/profile
cd ~

git clone https://github.com/openwrt/openwrt.git openwrt-trunk
git clone https://github.com/aabc/ipt-netflow.git

cd openwrt-trunk
./scripts/feeds update -a
ln -s ~/ipt-netflow/openwrt/ package/network/ipt-netflow


make menuconfig
  #select target and device
  #go to network/netflow and check both

make
  #and go for dinner or a walk ;)
  #after five hours

scp bin/ar71xx/packages/kernel/kmod-ipt-netflow_4.4.14+2.2-2_ar71xx.ipk  \
   root@192.168.236.79:/tmp/
scp bin/ar71xx/packages/base/iptables-mod-netflow_2.2-2_ar71xx.ipk \
   root@192.168.236.79:/tmp/
scp bin/ar71xx/packages/base/kernel_4.4.14-1-abf9cc6feb410252d667326556dae184_ar71xx.ipk   \
   root@192.168.236.79:/tmp/

   #goto router
ssh root@192.168.236.79

opkg install /tmp/*.ipk

insmod /lib/modules/4.4.14/ipt_NETFLOW.ko
sysctl -w net.netflow.protocol=5
sysctl -w net.netflow.destination=192.168.236.34:2055

iptables -I FORWARD -j NETFLOW
iptables -I INPUT -j NETFLOW
iptables -I OUTPUT -j NETFLOW

```
ipt-netflow-2.2/openwrt/Makefile0000644000000000000000000001001113213006644015451 0ustar  rootrootinclude $(TOPDIR)/rules.mk
include $(INCLUDE_DIR)/kernel.mk

PKG_NAME:=ipt-netflow
PKG_RELEASE:=2

PKG_SOURCE_URL:=https://github.com/aabc/$(PKG_NAME).git
PKG_VERSION:=2.2
PKG_SOURCE_VERSION:=v$(PKG_VERSION)

#TO BUILD development version uncomment 2 rows below and remove patches
#PKG_VERSION:=$(shell (git ls-remote $(PKG_SOURCE_URL) | grep refs/heads/master | cut -f 1 | head -c 7))
#PKG_SOURCE_VERSION:=HEAD

PKG_SOURCE_PROTO:=git
PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.gz

PKG_SOURCE_SUBDIR:=$(PKG_NAME)-$(PKG_VERSION)

PKG_BUILD_DIR := $(KERNEL_BUILD_DIR)/$(PKG_NAME)-$(PKG_VERSION)
PKG_DEPENDS:=iptables

include $(INCLUDE_DIR)/package.mk


define KernelPackage/ipt-netflow
        SECTION:=net
        CATEGORY:=Network
        SUBMENU:=Netflow
        TITLE:=Netflow iptables module for Linux kernel
        URL:=http://ipt-netflow.sourceforge.net/
        FILES:=$(PKG_BUILD_DIR)/ipt_NETFLOW.ko
        DEPENDS:=+iptables +iptables-mod-netflow
endef


define Package/iptables-mod-netflow
        SECTION:=net
        CATEGORY:=Network
        SUBMENU:=Netflow
        TITLE:=Netflow iptables module for Linux kernel
        URL:=http://ipt-netflow.sourceforge.net/
        #DEPENDS:=+kmod-ipt-netflow
        DEPENDS:=+iptables
endef

CONFIGURE_ARGS:= \
        --kdir="$(LINUX_DIR)" 

define Package/iptables-mod-netflow/config
        menu "Configuration"
                depends on PACKAGE_iptables-mod-netflow
        config PACKAGE_iptables-mod-netflow_natevents
                bool "enables natevents support"
                default n
        config PACKAGE_iptables-mod-netflow_snmp-rules
                bool "enables SNMP-index conversion rules"
                default n
        config PACKAGE_iptables-mod-netflow_macaddress
                bool "enables MAC address for v9/IPFIX"
                default n
        config PACKAGE_iptables-mod-netflow_vlan
                bool "enables VLAN Ids for v9/IPFIX"
                default n
        config PACKAGE_iptables-mod-netflow_direction
                bool "enables flowDirection(61) Element"
                default n
        config PACKAGE_iptables-mod-netflow_sampler
                bool "enables Flow Sampling"
                default n
        config PACKAGE_iptables-mod-netflow_aggregation
                bool "enables aggregation rules"
                default n
        config PACKAGE_iptables-mod-netflow_promisc
                bool "enables promisc hack mode"
                default n
        config PACKAGE_iptables-mod-netflow_promisc-mpls
                bool "decapsulate MPLS in promisc mode"
                default n
        config PACKAGE_iptables-mod-netflow_physdev
                bool "enables physdev reporting"
                default n
        config PACKAGE_iptables-mod-netflow_physdev-override
                bool "to override interfaces"
                default n
        config PACKAGE_iptables-mod-netflow_snmp-agent
                bool "disables net-snmp agent"
                default y
        config PACKAGE_iptables-mod-netflow_dkms
                bool "disables DKMS support completely"
                default y
        config PACKAGE_iptables-mod-netflow_dkms-install
                bool "no DKMS install but still create dkms.conf"
                default n
        endmenu
endef

CONFIGURE_ARGS += \
        $(if $(CONFIG_PACKAGE_iptables-mod-netflow_natevents),--enable-natevents) \
        $(if $(CONFIG_PACKAGE_iptables-mod-netflow_snmp-rules),--enable-snmp-rules) \
        $(if $(CONFIG_PACKAGE_iptables-mod-netflow_macaddress),--enable-macaddress) \
        $(if $(CONFIG_PACKAGE_iptables-mod-netflow_vlan),--enable-vlan) \
        $(if $(CONFIG_PACKAGE_iptables-mod-netflow_direction),--enable-direction) \
        $(if $(CONFIG_PACKAGE_iptables-mod-netflow_sampler),--enable-sampler) \
        $(if $(CONFIG_PACKAGE_iptables-mod-netflow_aggregation),--enable-aggregation) \
        $(if $(CONFIG_PACKAGE_iptables-mod-netflow_promisc),--enable-promisc) \
        $(if $(CONFIG_PACKAGE_iptables-mod-netflow_promisc-mpls),--promisc-mpls) \
        $(if $(CONFIG_PACKAGE_iptables-mod-netflow_physdev),--enable-physdev) \
        $(if $(CONFIG_PACKAGE_iptables-mod-netflow_physdev-override),--enable-physdev-override) \
        $(if $(CONFIG_PACKAGE_iptables-mod-netflow_snmp-agent),--disable-snmp-agent) \
        $(if $(CONFIG_PACKAGE_iptables-mod-netflow_dkms),--disable-dkms) \
        $(if $(CONFIG_PACKAGE_iptables-mod-netflow_dkms-install),--disable-dkms-install) 

#TODO: --enable-sampler=hash --promisc-mpls=N


$(eval $(call KernelPackage,ipt-netflow))

$(eval $(call Package,kmod-ipt-netflow))

define Package/iptables-mod-netflow/install
        $(MAKE) -C $(PKG_BUILD_DIR) DESTDIR=$(1) linstall
        #TODO: snmp install, dkms install
endef

$(eval $(call BuildPackage,iptables-mod-netflow))
ipt-netflow-2.2/openwrt/patches/0000755000000000000000000000000013213006644015447 5ustar  rootrootipt-netflow-2.2/openwrt/patches/310-Makefile_crosscompile.patch0000644000000000000000000000111713213006644023270 0ustar  rootrootIndex: ipt-netflow-2.2/Makefile.in
===================================================================
--- ipt-netflow-2.2.orig/Makefile.in
+++ ipt-netflow-2.2/Makefile.in
@@ -64,10 +64,10 @@ sinstall: | snmp_NETFLOW.so IPT-NETFLOW-
        fi
 
 %_sh.o: libipt_NETFLOW.c
-       gcc -O2 -Wall -Wunused $(IPTABLES_CFLAGS) -fPIC -o $@ -c libipt_NETFLOW.c
+       $(CC) $(CFLAGS) -O2 -Wall -Wunused $(IPTABLES_CFLAGS) -fPIC -o $@ -c libipt_NETFLOW.c
 
 %.so: %_sh.o
-       gcc -shared -o $@ $<
+       $(CC) -shared -o $@ $<
 
 version.h: ipt_NETFLOW.c ipt_NETFLOW.h compat.h Makefile
        @./version.sh --define > version.h
ipt-netflow-2.2/irqtop0000755000000000000000000003676013213006644013602 0ustar  rootroot#!/usr/bin/ruby
# Obeserve irq and softirq in top fashion
# (c) 2014 <abc@telekom.ru>
# License: GPL.

require 'getoptlong'
require 'curses'
require 'stringio'

@imode = :both
@omode = :table
@color = true
@showrps = false

GetoptLong.new(
    ["--help",     "-h", GetoptLong::NO_ARGUMENT],
    ["--batch",    "-b", GetoptLong::NO_ARGUMENT],
    ["--delay",    "-d", GetoptLong::REQUIRED_ARGUMENT],
    ["--top",      "-t", GetoptLong::NO_ARGUMENT],
    ["--table",    "-x", GetoptLong::NO_ARGUMENT],
    ["--soft",     "-s", GetoptLong::NO_ARGUMENT],
    ["--softirq",        GetoptLong::NO_ARGUMENT],
    ["--softirqs",       GetoptLong::NO_ARGUMENT],
    ["--irq",      "-i", GetoptLong::NO_ARGUMENT],
    ["--irqs",           GetoptLong::NO_ARGUMENT],
    ["--reverse",  "-r", GetoptLong::NO_ARGUMENT],
    ["--nocolor",  "-C", GetoptLong::NO_ARGUMENT],
    ["--eth",      "-e", "--pps", GetoptLong::NO_ARGUMENT],
    ["--rps",      "-R", "--xps", GetoptLong::NO_ARGUMENT]
).each do |opt, arg|
  case opt
  when '--help'
    puts " Shows interrupt rates (per second) per cpu."
    puts " Also shows irq affinity ('.' for disabled cpus),"
    puts " and rps/xps affinity ('+' rx, '-' tx, '*' tx/rx)."
    puts " Can show packet rate per eth queue."
    puts
    puts " Usage: #{$0} [-h] [-d #{@delay}] [-b] [-t|-x] [-i|-s] [-r]"
    puts "    -d  --delay=n  refresh interval"
    puts "    -s  --softirq  select softirqs only"
    puts "    -i  --irq      select hardware irqs only"
    puts "    -e  --eth      show extra eth stats (from ethtool)"
    puts "    -R  --rps      enable display of rps/xps"
    puts "    -x  --table    output in table mode (default)"
    puts "    -t  --top      output in flat top mode"
    puts "    -b  --batch    output non-interactively"
    puts "    -r  --reverse  reverse sort order"
    puts "    -C  --nocolor  disable colors"
    puts
    puts " Rates marked as '.' is forbidden by smp_affinity mask."
    exit 0
  when '--reverse'
    @reverse = !@reverse
  when '--batch'
    @batch = true
    @reverse = !@reverse if @omode == :top
  when '--delay'
    @delay = arg.to_i
  when '--top'
    @omode = :top
  when '--table'
    @omode = :table
  when /--irq/
    @imode = :irq
  when /--soft/
    @imode = :soft
  when /--pps/
    @pps = true
  when /--nocolor/
    @color = false
  when /--rps/
    @showrps = !@showrps
  end
end
if !@delay && ARGV[0].to_f > 0
  @delay = ARGV.shift.to_f
else
  @delay = 5
end
@count = ARGV.shift.to_f if ARGV[0].to_i > 0

def read_table(tag, file)
  @cpus = []
  lines = IO.readlines(file)
  @cpus = lines[0].scan(/CPU\d+/)
  @icpus = @cpus if tag == 'i'
  lines[2..-1].each do |li|
    irq, stat, desc = li.match(/^\s*(\S+):((?:\s+\d+)+)(.*)$/).captures
    stat = stat.scan(/\d+/)
    @irqs << [tag, irq, desc]
    stat.each_with_index do |val, i|
      # interruptsN, 's|i', irq'N', 'cpuX', 'descr...'
      @stats << [val.to_i, tag, irq, @cpus[i], desc.strip]
    end
  end
end

def read_procstat
  @cstat = {}
  lines = IO.readlines("/proc/stat").grep(/^cpu\d+ /)
  lines.each do |li|
    c, *d = li.split(" ")
    d = d.map {|e| e.to_i}
    @cstat[c] = d
  end
end

def read_affinity
  @aff = {}
  Dir.glob("/proc/irq/*/smp_affinity").each do |af|
    irq = af[%r{\d+}].to_i
    a = IO.read(af).strip.to_i(16)
    @aff[irq] = a
  end
end

# list ethernet devices
def net_devices_pci
  Dir['/sys/class/net/*'].reject do |f|
    f += "/device" unless File.symlink?(f)
    if File.symlink?(f)
      !(File.readlink(f) =~ %r{devices/pci})
    else
      false
    end
  end.map {|f| File.basename(f)}
end

@devlist = net_devices_pci
@devre = Regexp.union(@devlist)
def get_rps(desc)
  @rps = @xps = 0
  return unless @showrps
  return if @devlist.empty?
  dev = desc[/\b(#{@devre})\b/, 1]
  return unless dev
  return unless desc =~ /-(tx|rx)+-\d+/i
  qnr = desc[/-(\d+)\s*$/, 1]
  return unless qnr
  begin
    @rps = IO.read("/sys/class/net/#{dev}/queues/rx-#{qnr}/rps_cpus").hex if desc =~ /rx/i
    @xps = IO.read("/sys/class/net/#{dev}/queues/tx-#{qnr}/xps_cpus").hex if desc =~ /tx/i
  rescue
  end
end

def calc_rps(cpu)
  m = 0
  m |= 1 if @rps & (1 << cpu) != 0
  m |= 2 if @xps & (1 << cpu) != 0
  " +-*".slice(m, 1)
end

# ethtool -S eth0
def ethtool_grab_stat(dev = nil)
  unless dev
    @esto = @est if @est
    @est = Hash.new { |h,k| h[k] = Hash.new(&h.default_proc) }
    @devlist = net_devices_pci
    @devre = Regexp.union(@devlist)
    # own time counter because this stat could be paused
    @ehts = @ets if @ets
    @ets = @ts
    @edt = @ets - @ehts if @ehts
    @devlist.each {|e| ethtool_grab_stat(e)}
    return
  end
  h = Hash.new {|k,v| k[v] = Array.new}
  t = `ethtool -S #{dev} 2>/dev/null`
  return if t == ''
  t.split("\n").map { |e|
    e.split(':')
  }.reject { |e|
    !e[1]
  }.each { |k,v|
    k.strip!
    v = v.strip.to_i
    if k =~ /^.x_queue_(\d+)_/
      t = k.split('_', 4)
      qdir = t[0]
      qnr  = t[2]
      qk   = t[3]
      @est[dev][qdir][qnr][qk] = v
    else
      @est[dev][k] = v
    end
  }
end

def e_queue_stat(dev, qdir, qnr, k)
  n = @est[dev][qdir][qnr][k]
  o = @esto[dev][qdir][qnr][k]
  d = (n - o) / @edt
  if d > 0
    "%s:%d" % [qdir, d]
  else
    nil
  end
end

def e_dev_stat(dev, k, ks)
  n = @est[dev][k]
  o = @esto[dev][k]
  r = (n - o) / @edt
  ks = k unless ks
  "%s:%d" % [ks, r]
end

def e_queue_stat_err(dev, qdir, qnr)
  r = []
  ek = @est[dev][qdir][qnr].keys.reject{|e| e =~ /^(bytes|packets)$/}
  ek.each do |k|
    n = @est[dev][qdir][qnr][k]
    o = @esto[dev][qdir][qnr][k]
    d = n - o
    r << "%s_%s:%d" % [qdir, k, d] if d.to_i > 0
  end
  r
end

# this is not rate
def e_dev_stat_sum(dev, rk, ks)
  ek = @est[dev].keys.reject{|ek| !(ek =~ rk)}
  n = ek.inject(0) {|sum,k| sum += @est[dev][k].to_i}
  o = ek.inject(0) {|sum,k| sum += @esto[dev][k].to_i rescue 0}
  r = (n - o)
  if r > 0
    "%s:%d" % [ks, r]
  else
    nil
  end
end

def print_ethstat(desc)
  return if @devlist.empty?
  dev = desc[/\b(#{@devre})\b/, 1]
  return unless dev
  unless @esto && @est
    print ' []'
    return
  end
  t = []
  if desc =~ /-(tx|rx)+-\d+/i
    qnr = desc[/-(\d+)\s*$/, 1]
    if qnr
      if desc =~ /rx/i
        t << e_queue_stat(dev, "rx", qnr, "packets")
        t += e_queue_stat_err(dev, "rx", qnr)
      end
      if desc =~ /tx/i
        t << e_queue_stat(dev, "tx", qnr, "packets")
        t += e_queue_stat_err(dev, "tx", qnr)
      end
    end
  else
    t << e_dev_stat(dev, "rx_packets", 'rx')
    t << e_dev_stat(dev, "tx_packets", 'tx')
    t << e_dev_stat_sum(dev, /_err/, 'err')
    t << e_dev_stat_sum(dev, /_drop/, 'drop')
  end
  t.delete(nil)
  print ' [' + t.join(' ') + ']'
end

def grab_stat
  # @h[istorical]
  @hstats = @stats
  @hcstat = @cstat
  @hts = @ts

  @stats = []
  @irqs = []
  @ts = Time.now
  @dt = @ts - @hts if @hts

  read_table 'i', "/proc/interrupts"
  read_table 's', "/proc/softirqs"
  read_affinity
  read_procstat
  ethtool_grab_stat if @pps
end

def calc_speed
  s = []
  # calc speed
  h = Hash.new(0)
  @hstats.each do |v, t, i, c, d|
    h[[t, i, c]] = v
  end
  # output
  @h = {}
  @t = Hash.new(0) # rate per cpu
  @w = Hash.new(0) # irqs per irqN
  @s = @stats.map do |v, t, i, c, d|
    rate = (v - h[[t, i, c]]) / @dt
    @t[c] += rate if t == 'i'
    @w[[t, i]] += (v - h[[t, i, c]])
    @h[[t, i, c]] = rate
    [rate, v, t, i, c, d]
  end
end

def calc_cpu
  @cBusy  = Hash.new(0)
  @cHIrq  = Hash.new(0)
  @cSIrq  = Hash.new(0)
  # user, nice, system, [3] idle, [4] iowait, irq, softirq, etc.
  @cstat.each do |c, d|
    d = d.zip(@hcstat[c]).map {|a, b| a - b}
    c = c.upcase
    sum = d.reduce(:+)
    @cBusy[c] = 100 - (d[3] + d[4]).to_f / sum * 100
    @cHIrq[c] = (d[5]).to_f / sum * 100
    @cSIrq[c] = (d[6]).to_f / sum * 100
  end
end

def show_top
  @s.sort!.reverse!
  @s.reverse! if @reverse
  rej = nil
  rej = 's' if @imode == :irq
  rej = 'i' if @imode == :soft
  @s.each do |s, v, t, i, c, d|
    next if t == rej
    if s > 0
      print "%9.1f  %s  %s  <%s>  %s" % [s, c.downcase, t, i, d]
      print_ethstat(d) if @pps
      puts
    end
  end
end

@ifilter = {}
def show_interrupts
  maxlen = 7
  @irqs.reverse! if @reverse
  print "%s %*s  " % [" ", maxlen, " "]
  @icpus.each { |c| print " %6s" % c }
  puts

  # load
  print "%*s: " % [maxlen + 2, "cpuUtil"]
  @icpus.each { |c| print " %6.1f" % @cBusy[c] }
  puts "   total CPU utilization %"
  #
  print "%*s: " % [maxlen + 2, "%irq"]
  @icpus.each { |c| print " %6.1f" % @cHIrq[c] }
  puts "   hardware IRQ CPU util%"
  print "%*s: " % [maxlen + 2, "%sirq"]
  @icpus.each { |c| print " %6.1f" % @cSIrq[c] }
  puts "   software IRQ CPU util%"

  # total
  print "%*s: " % [maxlen + 2, "irqTotal"]
  @icpus.each { |c| print " %6d" % @t[c] }
  puts "   total hardware IRQs"

  rej = nil
  rej = 's' if @imode == :irq
  rej = 'i' if @imode == :soft
  @irqs.each do |t, i, desc|
    next if t == rej

    # include incrementally and all eth
    unless @ifilter[[t, i]] || @showall
      next unless @w[[t, i]] > 0 || desc =~ /eth/
      @ifilter[[t, i]] = true
    end

    print "%s %*s:  " % [t.to_s, maxlen, i.slice(0, maxlen)]
    rps = get_rps(desc)
    @icpus.each do |c|
      cpu = c[/\d+/].to_i
      aff = @aff[i.to_i]
      off = ((aff & 1 << cpu) ==0)? true : false if aff
      fla = calc_rps(cpu)
      begin
        v = @h[[t, i, c]]
        if v > 0 || !off
          print "%6d%c" % [v, fla]
        elsif aff
          print "%6s%c" % [".", fla]
        end
      rescue
      end
    end
    print desc
    print_ethstat(desc) if @pps
    puts
  end
end

def select_output
  if @omode == :top
    show_top
  else
    show_interrupts
  end
end

def curses_choplines(text)
  cols = Curses.cols - 1
  rows = Curses.lines - 2
  lines = text.split("\n").map {|e| e.slice(0, cols)}.slice(0, rows)
  text = lines.join("\n")
  text << "\n" * (rows - lines.size) if lines.size < rows
  text
end

def show_help
  puts "irqtop help:"
  puts
  puts "  In table view, cells marked with '.' mean this hw irq is"
  puts "     disabled via /proc/irq/<irq>/smp_affinity"
  puts "  Interactive keys:"
  puts "    i     Toggle (hardware) irqs view"
  puts "    s     Toggle software irqs (softirqs) view"
  puts "    e     Show eth stat per queue"
  puts "    R     Show rps/xps affinity"
  puts "    t     Flat top display mode"
  puts "    x     Table display mode"
  puts "    r     Reverse rows order"
  puts "    c     Toggle colors (for eth)"
  puts "    a     Show lines with zero rate (all)"
  puts "    A     Clear lines with zero rates"
  puts "    .     Pause screen updating"
  puts "    h,?   This help screen"
  puts "    q     Quit."
  puts "  Any other key will update display."
  puts
  puts "Press any key to continue."
end

hostname = `hostname`.strip
#
grab_stat
sleep 0.5

COLOR_GREEN  = "\033[0;32m"
COLOR_YELLOW = "\033[0;33m"
COLOR_CYAN   = "\033[0;36m"
COLOR_RED    = "\033[0;31m"
COLOR_OFF    = "\033[m"
def tty_printline(t)
  latr = nil # line color
  if t =~ /-rx-/
    latr = COLOR_GREEN
  elsif t =~ /-tx-/
    latr = COLOR_YELLOW
  elsif t =~ /\beth/
    latr = COLOR_CYAN
  end
  print latr if latr

  if t =~ /cpuUtil:|irq:|sirq:/
    # colorize percentage values
    t.scan(/\s+\S+/) do |e|
      eatr = nil
      if e =~ /^\s*[\d.]+$/
        if e.to_i >= 90
          eatr = COLOR_RED
        elsif e.to_i <= 10
          eatr = COLOR_GREEN
        else
          eatr = COLOR_YELLOW
        end
      end
      print eatr if eatr
      print e
      print (latr)? latr : COLOR_OFF if eatr
    end
  elsif latr && t =~ / \[[^\]]+\]$/
    # colorize eth stats
    print $`
    print COLOR_OFF if latr
    $&.scan(/(.*?)(\w+)(:)(\d+)/) do |e|
      eatr = nil
      case e[1]
      when 'rx'
        eatr = COLOR_GREEN
      when 'tx'
        eatr = COLOR_YELLOW
      else
        eatr = COLOR_RED
      end
      eatr = nil if e[3].to_i == 0

      print e[0]
      print eatr if eatr
      print e[1..-1].join
      print (latr)? latr : COLOR_OFF if eatr
    end
    print $'
  else
    print t
  end

  print COLOR_OFF if latr
  puts
end
def tty_output
  if @color
    $stdout = StringIO.new
    yield
    $stdout.rewind
    txt = $stdout.read
    $stdout = STDOUT

    txt.split("\n", -1).each do |li|
      tty_printline(li)
    end
  else
    yield
  end
end

if @batch
  @color = @color && $stdout.tty?
  loop do
    grab_stat
    calc_speed
    calc_cpu
    puts "#{hostname} - irqtop - #{Time.now}"
    tty_output {
      select_output
    }
    $stdout.flush
    break if @count && (@count -= 1) == 0
    sleep @delay
  end
  exit 0
end

Curses.init_screen
Curses.start_color
Curses.cbreak
Curses.noecho
Curses.nonl
Curses.init_pair(1, Curses::COLOR_GREEN,  Curses::COLOR_BLACK);
Curses.init_pair(2, Curses::COLOR_YELLOW, Curses::COLOR_BLACK);
Curses.init_pair(3, Curses::COLOR_CYAN,   Curses::COLOR_BLACK);
Curses.init_pair(4, Curses::COLOR_RED,    Curses::COLOR_BLACK);
$stdscr = Curses.stdscr
$stdscr.keypad(true)

def curses_printline(t)
  latr = nil # line color
  if t =~ /-rx-/
    latr = Curses.color_pair(1)
  elsif t =~ /-tx-/
    latr = Curses.color_pair(2)
  elsif t =~ /\beth/
    latr = Curses.color_pair(3)
  end
  $stdscr.attron(latr)  if latr

  if t =~ /cpuUtil:|irq:|sirq:/
    # colorize percentage values
    t.scan(/\s+\S+/) do |e|
      eatr = nil
      if e =~ /^\s*[\d.]+$/
        if e.to_i >= 90
          eatr = Curses.color_pair(4)
        elsif e.to_i <= 10
          eatr = Curses.color_pair(1)
        else
          eatr = Curses.color_pair(2)
        end
      end
      $stdscr.attron(eatr)  if eatr
      $stdscr.addstr("#{e}")
      $stdscr.attroff(eatr) if eatr
    end
  elsif latr && t =~ / \[[^\]]+\]$/
    # colorize eth stats
    $stdscr.addstr($`)
    $stdscr.attroff(latr) if latr
    $&.scan(/(.*?)(\w+)(:)(\d+)/) do |e|
      eatr = nil
      case e[1]
      when 'rx'
        eatr = Curses.color_pair(1)
      when 'tx'
        eatr = Curses.color_pair(2)
      else
        eatr = Curses.color_pair(4)
      end
      eatr = nil if e[3].to_i == 0

      $stdscr.addstr(e[0])
      $stdscr.attron(eatr)  if eatr
      $stdscr.addstr(e[1..-1].join)
      $stdscr.attroff(eatr) if eatr
    end
    $stdscr.addstr($' + "\n")
  else
    $stdscr.addstr("#{t}\n")
  end

  $stdscr.attroff(latr) if latr
end

def curses_output
  $stdout = StringIO.new
  yield
  $stdout.rewind
  text = $stdout.read
  $stdout = STDOUT
  txt = curses_choplines(text)
  if @color
    txt.split("\n", -1).each_with_index do |li, i|
      $stdscr.setpos(i, 0)
      curses_printline(li)
    end
  else
    $stdscr.setpos(0, 0)
    $stdscr.addstr(txt)
  end
  $stdscr.setpos(1, 0)
  Curses.refresh
end

def curses_enter(text, echo = true)
  $stdscr.setpos(1, 0)
  $stdscr.addstr(text + "\n")
  $stdscr.setpos(1, 0)
  Curses.attron(Curses::A_BOLD)
  $stdscr.addstr(text)
  Curses.attroff(Curses::A_BOLD)
  Curses.refresh
  Curses.echo if echo
  Curses.timeout = -1
  line = Curses.getstr
  Curses.noecho
  line
end

loop do
  grab_stat
  calc_speed
  calc_cpu

  curses_output {
   puts "#{hostname} - irqtop - #{Time.now}"
   select_output
  }

  Curses.timeout = @delay * 1000
  ch = Curses.getch.chr rescue nil
  case ch
  when "\f"
    Curses.clear
  when "q", "Z", "z"
    break
  when 'i'
    @imode = (@imode == :both)? :soft : :both
  when 's'
    @imode = (@imode == :both)? :irq : :both
  when 't'
    @omode = (@omode == :top)? :table : :top
  when 'x'
    @omode = (@omode == :table)? :top : :table
  when 'e', 'p'
    @pps = !@pps
  when 'r'
    @reverse = !@reverse
  when 'c'
    @color = !@color
  when 'A'
    @ifilter = {}
  when 'a'
    @ifilter = {}
    @showall = !@showall
  when 'R'
    @showrps = !@showrps
  when '.'
    curses_enter("Pause, press enter to to continue: ", false)
  when 'd'
    d = curses_enter("Enter display interval: ")
    @delay = d.to_f if d.to_f > 0
  when 'h', '?'
    curses_output { show_help }
    Curses.timeout = -1
    ch = Curses.getch.chr rescue nil
    break if ch == 'q'
  end
end

ipt-netflow-2.2/snmp_NETFLOW.c0000644000000000000000000004172613213006644014653 0ustar  rootroot/*
 * dlmod plugin for net-snmp for monitoring
 * ipt_NETFLOW module via IPT-NETFLOW-MIB.
 *
 * (c) 2014 <abc@telekom.ru>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>

#include <net-snmp/net-snmp-config.h>
#include <net-snmp/net-snmp-includes.h>
#include <net-snmp/agent/net-snmp-agent-includes.h>

#define iptNetflowMIB_oid 1, 3, 6, 1, 4, 1, 37476, 9000, 10, 1 /* .1.3.6.1.4.1.37476.9000.10.1 */

/* iptNetflowObjects */
static oid iptNetflowModule_oid[]    = { iptNetflowMIB_oid, 1, 1 };
static oid iptNetflowSysctl_oid[]    = { iptNetflowMIB_oid, 1, 2 };
/* iptNetflowStatistics */
static oid iptNetflowTotals_oid[]    = { iptNetflowMIB_oid, 2, 1 };
static oid iptNetflowCpuTable_oid[]  = { iptNetflowMIB_oid, 2, 2 };
static oid iptNetflowSockTable_oid[] = { iptNetflowMIB_oid, 2, 3 };

struct snmp_vars {
        int obj;
        int type;
        char *name;

        time_t ts; /* when value last read */
        long long val64;
};

struct snmp_vars modinfos[] = {
        {1, ASN_OCTET_STR, "name"},
        {2, ASN_OCTET_STR, "version"},
        {3, ASN_OCTET_STR, "srcversion"},
        {4, ASN_OCTET_STR, "loadTime"}, /* DateAndTime */
        {5, ASN_INTEGER,   "refcnt"},
        { 0 }
};
#define MODINFO_NAME "ipt_NETFLOW"
#define MODINFO_NAME_ID 1
#define MODINFO_DATE_ID 4

struct snmp_vars sysctls[] = {
        {1,  ASN_INTEGER,   "protocol"},
        {2,  ASN_INTEGER,   "hashsize"},
        {3,  ASN_INTEGER,   "maxflows"},
        {4,  ASN_INTEGER,   "active_timeout"},
        {5,  ASN_INTEGER,   "inactive_timeout"},
        {6,  ASN_INTEGER,   "sndbuf"},
        {7,  ASN_OCTET_STR, "destination"},
        {8,  ASN_OCTET_STR, "aggregation"},
        {9,  ASN_OCTET_STR, "sampler"},
        {10, ASN_INTEGER,   "natevents"},
        {11, ASN_INTEGER,   "promisc"},
        {12, ASN_OCTET_STR, "snmp-rules"},
        {13, ASN_INTEGER,   "scan-min"},
        { 0 }
};

struct snmp_vars totals[] = {
        {1,  ASN_COUNTER64, "inBitRate"},
        {2,  ASN_GAUGE,     "inPacketRate"},
        {3,  ASN_COUNTER64, "inFlows"},
        {4,  ASN_COUNTER64, "inPackets"},
        {5,  ASN_COUNTER64, "inBytes"},
        {6,  ASN_GAUGE,     "hashMetric"},
        {7,  ASN_GAUGE,     "hashMemory"},
        {8,  ASN_GAUGE,     "hashFlows"},
        {9,  ASN_GAUGE,     "hashPackets"},
        {10, ASN_COUNTER64, "hashBytes"},
        {11, ASN_COUNTER64, "dropPackets"},
        {12, ASN_COUNTER64, "dropBytes"},
        {13, ASN_GAUGE,     "outByteRate"},
        {14, ASN_COUNTER64, "outFlows"},
        {15, ASN_COUNTER64, "outPackets"},
        {16, ASN_COUNTER64, "outBytes"},
        {17, ASN_COUNTER64, "lostFlows"},
        {18, ASN_COUNTER64, "lostPackets"},
        {19, ASN_COUNTER64, "lostBytes"},
        {20, ASN_COUNTER,   "errTotal"},
        {21, ASN_COUNTER,   "sndbufPeak"},
        { 0 }
};
#define TOTALS_METRIC_ID 6

static netsnmp_table_data_set *cpu_data_set;
static netsnmp_cache *stat_cache = NULL;

struct snmp_vars cputable[] = {
        {1,  ASN_INTEGER,   "cpuIndex"},
        {2,  ASN_GAUGE,     "cpuInPacketRate"},
        {3,  ASN_COUNTER64, "cpuInFlows"},
        {4,  ASN_COUNTER64, "cpuInPackets"},
        {5,  ASN_COUNTER64, "cpuInBytes"},
        {6,  ASN_GAUGE,     "cpuHashMetric"},
        {7,  ASN_COUNTER64, "cpuDropPackets"},
        {8,  ASN_COUNTER64, "cpuDropBytes"},
        {9,  ASN_COUNTER,   "cpuErrTrunc"},
        {10, ASN_COUNTER,   "cpuErrFrag"},
        {11, ASN_COUNTER,   "cpuErrAlloc"},
        {12, ASN_COUNTER,   "cpuErrMaxflows"},
        { 0 }
};

static netsnmp_table_data_set *sock_data_set;
struct snmp_vars socktable[] = {
        {1,  ASN_INTEGER,   "sockIndex"},
        {2,  ASN_OCTET_STR, "sockDestination"},
        {3,  ASN_INTEGER,   "sockActive"},
        {4,  ASN_COUNTER,   "sockErrConnect"},
        {5,  ASN_COUNTER,   "sockErrFull"},
        {6,  ASN_COUNTER,   "sockErrCberr"},
        {7,  ASN_COUNTER,   "sockErrOther"},
        {8,  ASN_GAUGE,     "sockSndbuf"},
        {9,  ASN_GAUGE,     "sockSndbufFill"},
        {10, ASN_GAUGE,     "sockSndbufPeak"},
        { 0 }
};

static time_t totals_ts; /* when statistics last read from kernel */

static int var_max(struct snmp_vars *head)
{
        struct snmp_vars *sys;
        int max = 0;

        for (sys = head; sys->obj; sys++)
                if (max < sys->obj)
                        max = sys->obj;
        return max;
}

static struct snmp_vars *find_varinfo(struct snmp_vars *head, const int obj)
{
        struct snmp_vars *sys;

        for (sys = head; sys->obj; sys++) {
                if (sys->obj == obj)
                        return sys;
        }
        return NULL;
}

static struct snmp_vars *find_varinfo_str(struct snmp_vars *head, const char *name)
{
        struct snmp_vars *sys;

        for (sys = head; sys->obj; sys++) {
                if (!strcmp(sys->name, name))
                        return sys;
        }
        return NULL;
}

static void modinfo_fname(char *name, char *fname, size_t flen)
{
        snprintf(fname, flen, "/sys/module/" MODINFO_NAME "/%s", name);
}

static void sysctl_fname(char *name, char *fname, size_t flen)
{
        snprintf(fname, flen, "/proc/sys/net/netflow/%s", name);
}

static int sysctl_access_ok(char *name)
{
        char fname[64];

        sysctl_fname(name, fname, sizeof(fname));
        if (access(fname, W_OK) < 0)
                return 0;
        return 1;
}

static char *file_read_string(char *name, char *buf, size_t size)
{
        int fd = open(name, O_RDONLY);
        if (fd < 0)
                return NULL;
        int n = read(fd, buf, size - 1);
        if (n < 0) {
                close(fd);
                return NULL;
        }
        buf[n] = '\0';
        close(fd);
        return buf;
}

static char *modinfo_read_string(char *name, char *buf, size_t size)
{
        char fname[64];

        modinfo_fname(name, fname, sizeof(fname));
        return file_read_string(fname, buf, size);
}

static char *sysctl_read_string(char *name, char *buf, size_t size)
{
        char fname[64];

        sysctl_fname(name, fname, sizeof(fname));
        return file_read_string(fname, buf, size);
}

static int sysctl_write_string(char *name, char *buf, size_t size)
{
        char fname[64];
        int fd;
        int n;

        sysctl_fname(name, fname, sizeof(fname));
        fd = open(fname, O_RDWR, 0644);
        if (fd < 0)
                return fd;
        n = write(fd, buf, size);
        close(fd);
        return n;
}

static int sysctl_read(netsnmp_request_info *request, int obj)
{
        struct snmp_vars *sys = find_varinfo(sysctls, obj);
        char buf[225];
        char *p;
        long value;

        if (!sys)
                goto nosuchobject;

        p = sysctl_read_string(sys->name, buf, sizeof(buf));
        if (!p)
                goto nosuchobject;

        switch (sys->type) {
        case ASN_INTEGER:
                value = atoi(p);
                snmp_set_var_typed_value(request->requestvb,
                    sys->type,
                    (u_char *)&value, sizeof(value));
                return SNMP_ERR_NOERROR;
        case ASN_OCTET_STR:
                snmp_set_var_typed_value(request->requestvb,
                    sys->type,
                    (u_char *)p, strcspn(p, "\n"));
                return SNMP_ERR_NOERROR;
        }
nosuchobject:
        netsnmp_request_set_error(request, SNMP_NOSUCHOBJECT);
        return SNMP_ERR_NOERROR;
}

static int sysctl_write(netsnmp_request_info *request, int obj)
{
        struct snmp_vars *sys = find_varinfo(sysctls, obj);
        char buf[225];
        int len;

        if (!sys) {
                netsnmp_request_set_error(request, SNMP_NOSUCHOBJECT);
                return SNMP_ERR_NOERROR;
        }
        switch (sys->type) {
        case ASN_INTEGER:
                snprintf(buf, sizeof(buf), "%ld\n", *(request->requestvb->val.integer));
                break;
        case ASN_UNSIGNED:
                snprintf(buf, sizeof(buf), "%lu\n", *(request->requestvb->val.integer));
                break;
        case ASN_OCTET_STR:
                snprintf(buf, sizeof(buf), "%s\n", request->requestvb->val.string);
                break;
        default:
                netsnmp_request_set_error(request, SNMP_ERR_WRONGTYPE);
                return SNMP_ERR_NOERROR;
        }
        len = strlen(buf);
        if (sysctl_write_string(sys->name, buf, len) < len)
                netsnmp_request_set_error(request, SNMP_ERR_BADVALUE);
        return SNMP_ERR_NOERROR;
}

static int iptNetflowModule_handler(
    netsnmp_mib_handler          *handler,
    netsnmp_handler_registration *reginfo,
    netsnmp_agent_request_info   *reqinfo,
    netsnmp_request_info         *request)
{
        struct snmp_vars *sys;
        oid obj;
        char buf[225];
        char *p = NULL;
        long value;

        obj = request->requestvb->name[request->requestvb->name_length - 2];
        sys = find_varinfo(modinfos, obj);
        if (!sys) {
                netsnmp_request_set_error(request, SNMP_ERR_NOSUCHNAME);
                return SNMP_ERR_NOERROR;
        }
        if (reqinfo->mode != MODE_GET) {
                netsnmp_request_set_error(request, SNMP_ERR_READONLY);
                return SNMP_ERR_NOERROR;
        }
        switch (obj) {
        case MODINFO_NAME_ID:
                p = MODINFO_NAME;
                break;
        case MODINFO_DATE_ID: {
                size_t len;
                struct stat st;

                modinfo_fname(".", buf, sizeof(buf));
                if (stat(buf, &st) < 0)
                        break;
                p = (char *)date_n_time(&st.st_mtime, &len);
                snmp_set_var_typed_value(request->requestvb, ASN_OCTET_STR, p, len);
                return SNMP_ERR_NOERROR;
        }
        default:
                p = modinfo_read_string(sys->name, buf, sizeof(buf));
        }
        if (!p) {
                netsnmp_request_set_error(request, SNMP_ERR_NOSUCHNAME);
                return SNMP_ERR_NOERROR;
        }

        switch (sys->type) {
        case ASN_INTEGER:
                value = atoi(p);
                snmp_set_var_typed_value(request->requestvb,
                    sys->type,
                    (u_char *)&value, sizeof(value));
                break;
        case ASN_OCTET_STR:
                snmp_set_var_typed_value(request->requestvb,
                    sys->type,
                    (u_char *)p, strcspn(p, "\n"));
                break;
        default:
                netsnmp_request_set_error(request, SNMP_ERR_WRONGTYPE);

        }
        return SNMP_ERR_NOERROR;
}

static int iptNetflowSysctl_handler(
    netsnmp_mib_handler          *handler,
    netsnmp_handler_registration *reginfo,
    netsnmp_agent_request_info   *reqinfo,
    netsnmp_request_info         *request)
{
        struct snmp_vars *sys;
        oid obj;

        obj = request->requestvb->name[request->requestvb->name_length - 2];
        switch (reqinfo->mode) {
        case MODE_GET:
                return sysctl_read(request, obj);
        case MODE_SET_RESERVE1:
                sys = find_varinfo(sysctls, obj);
                if (!sys || !sysctl_access_ok(sys->name))
                        netsnmp_request_set_error(request, SNMP_ERR_NOSUCHNAME);
                if (sys && request->requestvb->type != sys->type)
                        netsnmp_request_set_error(request, SNMP_ERR_WRONGTYPE);
                break;
        case MODE_SET_RESERVE2:
        case MODE_SET_FREE:
        case MODE_SET_UNDO:
        case MODE_SET_COMMIT:
                return SNMP_ERR_NOERROR;
        case MODE_SET_ACTION:
                return sysctl_write(request, obj);
        default:
                return SNMP_ERR_GENERR;

        }
        return SNMP_ERR_NOERROR;
}

#define TOTAL_INTERVAL 1

static void clear_data_set(netsnmp_table_data_set *data_set)
{
        netsnmp_table_row *row, *nextrow;

        for (row = netsnmp_table_data_set_get_first_row(data_set); row; row = nextrow) {
                nextrow = netsnmp_table_data_set_get_next_row(data_set, row);
                netsnmp_table_dataset_remove_and_delete_row(data_set, row);
        }
}

static void parse_table_row(
    int                     cpu,
    char                    *p,
    struct snmp_vars        *sys,
    netsnmp_table_data_set  *data_set)
{
        netsnmp_table_row *row;

        row = netsnmp_create_table_data_row();
        netsnmp_table_row_add_index(row, ASN_INTEGER, (u_char *)&cpu, sizeof(cpu));

        if (sys == cputable) {
                /* add cpuIndex as column too to break SMIv2 */
                netsnmp_set_row_column(row, 1, sys->type, (char *)&cpu, sizeof(cpu));
        }
        for (++sys; p && sys->obj; sys++) {
                char             *val;
                long long        val64;
                unsigned int     uval32;
                int              val32;
                struct counter64 c64;

                p += strspn(p, " \t");
                val = p;
                if ((p = strpbrk(p, " \t")))
                        *p++ = '\0';
                if (index(val, '.')) {
                        double d = strtod(val, NULL);

                        val64 = (long long)(d * 100);
                } else
                        val64 = strtoll(val, NULL, 10);

                switch (sys->type) {
                case ASN_OCTET_STR:
                        netsnmp_set_row_column(row, sys->obj,
                            sys->type, (char *)val, strlen(val));
                        break;
                case ASN_INTEGER:
                case ASN_GAUGE:
                        val32 = (int)val64;
                        netsnmp_set_row_column(row, sys->obj,
                            sys->type, (char *)&val32, sizeof(val32));
                        break;
                case ASN_COUNTER:
                        uval32 = (unsigned int)val64;
                        netsnmp_set_row_column(row, sys->obj,
                            sys->type, (char *)&uval32, sizeof(uval32));
                        break;
                case ASN_COUNTER64:
                        c64.low = (uint32_t)val64;
                        c64.high = val64 >> 32;
                        netsnmp_set_row_column(row, sys->obj,
                            sys->type, (char *)&c64, sizeof(c64));
                        break;
                default:
                        netsnmp_table_dataset_delete_row(row);
                        continue;
                }

        }
        netsnmp_table_data_add_row(data_set->table, row);
}

static void grab_ipt_netflow_snmp(time_t now)
{
        static char buf[4096];
        int fd;
        int n;
        char *p = buf;

        if ((now - totals_ts) < (TOTAL_INTERVAL + 1))
                return;

        if ((fd = open("/proc/net/stat/ipt_netflow_snmp", O_RDONLY)) < 0)
                return;

        n = read(fd, buf, sizeof(buf) - 1);
        close(fd);
        if (n <= 0)
                return;
        buf[n] = '\0';

        DEBUGMSGTL(("netflow", "%s\n", buf));
        clear_data_set(cpu_data_set);
        clear_data_set(sock_data_set);
        while (*p) {
                struct snmp_vars *sys;
                char *name = p;
                char *val;

                if (!(p = strpbrk(p, " \t")))
                        break;
                *p++ = '\0';
                val = p + strspn(p, " \t");
                p = index(p, '\n');
                *p++ = '\0';

                if (!strncmp(name, "cpu", 3)) {
                        parse_table_row(atoi(name + 3), val, cputable, cpu_data_set);
                        continue;
                } else if (!strncmp(name, "sock", 4)) {
                        parse_table_row(atoi(name + 4), val, socktable, sock_data_set);
                        continue;
                }
                if (!(sys = find_varinfo_str(totals, name)))
                    continue;
                if (index(val, '.')) {
                        double d = strtod(val, NULL);
                        sys->val64 = (long long)(d * 100);
                } else
                        sys->val64 = strtoll(val, NULL, 10);
                sys->ts = now;
        }
        totals_ts = now;
}

static int iptNetflowTotals_handler(
    netsnmp_mib_handler          *handler,
    netsnmp_handler_registration *reginfo,
    netsnmp_agent_request_info   *reqinfo,
    netsnmp_request_info         *request)
{
        struct snmp_vars *sys;
        time_t now = time(NULL);
        oid obj;
        unsigned int     uval32;
        int              val32;
        struct counter64 c64;

        grab_ipt_netflow_snmp(now);

        obj = request->requestvb->name[request->requestvb->name_length - 2];
        sys = find_varinfo(totals, obj);
        if (!sys || ((now - sys->ts) > (TOTAL_INTERVAL * 2 + 3))) {
                netsnmp_request_set_error(request, SNMP_ERR_NOSUCHNAME);
                return SNMP_ERR_NOERROR;
        }
        if (reqinfo->mode != MODE_GET) {
                netsnmp_request_set_error(request, SNMP_ERR_READONLY);
                return SNMP_ERR_NOERROR;
        }
        switch (sys->type) {
        case ASN_GAUGE:
                val32 = (int)sys->val64;
                snmp_set_var_typed_value(request->requestvb,
                    sys->type, (u_char *)&val32, sizeof(val32));
                break;
        case ASN_COUNTER:
                uval32 = (unsigned int)sys->val64;
                snmp_set_var_typed_value(request->requestvb,
                    sys->type, (u_char *)&uval32, sizeof(uval32));
                break;
        case ASN_COUNTER64:
                c64.low = (uint32_t)sys->val64;
                c64.high = sys->val64 >> 32;
                snmp_set_var_typed_value(request->requestvb,
                    ASN_COUNTER64, (u_char *)&c64, sizeof(c64));
                break;
        default:
                return SNMP_ERR_GENERR;
        }
        return SNMP_ERR_NOERROR;
}

static int stat_cache_load(netsnmp_cache *cache, void *x)
{
        grab_ipt_netflow_snmp(time(NULL));
        return 0;
}

static void dummy_cache_free(netsnmp_cache *cache, void *x)
{
        /* free_cache callback is not always checked for NULL
         * pointer. */
}

void init_netflow(void)
{
        netsnmp_handler_registration *reg;
        struct snmp_vars *sys;

        /* snmpd -f -L -Dnetflow,dlmod */
        DEBUGMSGTL(("netflow", "init_netflow\n"));

        netsnmp_register_scalar_group(
            netsnmp_create_handler_registration(
                    "iptNetflowModule",
                    iptNetflowModule_handler,
                    iptNetflowModule_oid,
                    OID_LENGTH(iptNetflowModule_oid),
                    HANDLER_CAN_RONLY),
            1, var_max(modinfos));

        netsnmp_register_scalar_group(
            netsnmp_create_handler_registration(
                    "iptNetflowSysctl",
                    iptNetflowSysctl_handler,
                    iptNetflowSysctl_oid,
                    OID_LENGTH(iptNetflowSysctl_oid),
                    HANDLER_CAN_RWRITE),
            1, var_max(sysctls));

        netsnmp_register_scalar_group(
            netsnmp_create_handler_registration(
                    "iptNetflowTotals",
                    iptNetflowTotals_handler,
                    iptNetflowTotals_oid,
                    OID_LENGTH(iptNetflowTotals_oid),
                    HANDLER_CAN_RONLY),
            1, var_max(totals));

        /* Register first table. */
        reg = netsnmp_create_handler_registration(
            "iptNetflowCpuTable", /* no handler */ NULL,
            iptNetflowCpuTable_oid, OID_LENGTH(iptNetflowCpuTable_oid),
            HANDLER_CAN_RONLY);

        /* set up columns */
        cpu_data_set = netsnmp_create_table_data_set("iptNetflowCpuDataSet");
        netsnmp_table_set_add_indexes(cpu_data_set, ASN_INTEGER, 0);
        /* I include cpuIndex into columns, which is not SMIv2'ish */
        for (sys = cputable; sys->obj; sys++)
                netsnmp_table_set_add_default_row(cpu_data_set, sys->obj, sys->type, 0, NULL, 0);
        netsnmp_register_table_data_set(reg, cpu_data_set, NULL);

        /* cache handler will load actual data, and it needs to be
         * injected in front of dataset handler to be called first */
        stat_cache = netsnmp_cache_create(
            /* no timeout */ -1,
            stat_cache_load, dummy_cache_free,
            iptNetflowCpuTable_oid, OID_LENGTH(iptNetflowCpuTable_oid));
        netsnmp_inject_handler(reg, netsnmp_cache_handler_get(stat_cache));

        /* Register second table. */
        reg = netsnmp_create_handler_registration(
            "iptNetflowSockTable", /* no handler */ NULL,
            iptNetflowSockTable_oid, OID_LENGTH(iptNetflowSockTable_oid),
            HANDLER_CAN_RONLY);

        /* set up columns */
        sock_data_set = netsnmp_create_table_data_set("iptNetflowSockDataSet");
        /* I don't include sockIndex into columns, which is more SMIv2'ish */
        netsnmp_table_set_add_indexes(sock_data_set, ASN_INTEGER, 0);
        for (sys = &socktable[1]; sys->obj; sys++)
                netsnmp_table_set_add_default_row(sock_data_set, sys->obj, sys->type, 0, NULL, 0);
        netsnmp_register_table_data_set(reg, sock_data_set, NULL);

        /* as before, cache handler will load actual data, and it needs
         * to be injected in front of dataset handler to be called first */
        stat_cache = netsnmp_cache_create(
            /* no timeout */ -1,
            stat_cache_load, dummy_cache_free,
            iptNetflowSockTable_oid, OID_LENGTH(iptNetflowSockTable_oid));
        netsnmp_inject_handler(reg, netsnmp_cache_handler_get(stat_cache));
}

void deinit_netflow(void)
{
        DEBUGMSGTL(("netflow", "deinit_netflow\n"));
}

ipt-netflow-2.2/dkms.conf0000644000000000000000000000042713213006644014132 0ustar  rootrootPACKAGE_NAME=ipt-netflow
pushd `dirname $BASH_SOURCE`
PACKAGE_VERSION=`./version.sh`
popd
BUILT_MODULE_NAME[0]=ipt_NETFLOW
DEST_MODULE_LOCATION[0]=/kernel/extra
STRIP[0]=no
MAKE[0]="make ipt_NETFLOW.ko"
PRE_BUILD="./configure --from-dkms-conf=$kernel_source_dir"
AUTOINSTALL=yes
ipt-netflow-2.2/ipt_NETFLOW.h0000644000000000000000000003536313213006644014477 0ustar  rootroot/*
 *   This file is part of NetFlow exporting module.
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

#ifndef _IPT_NETFLOW_H
#define _IPT_NETFLOW_H

/*
 * Some tech info:
 * http://www.cisco.com/en/US/products/ps6601/prod_white_papers_list.html
 * http://www.cisco.com/en/US/products/sw/netmgtsw/ps1964/products_implementation_design_guide09186a00800d6a11.html
 */

#define NETFLOW5_RECORDS_MAX 30

struct netflow5_record {
        __be32          s_addr;
        __be32          d_addr;
        __be32          nexthop;
        __be16          i_ifc;
        __be16          o_ifc;
        __be32          nr_packets;
        __be32          nr_octets;
        __be32          first_ms;
        __be32          last_ms;
        __be16          s_port;
        __be16          d_port;
        __u8            reserved;
        __u8            tcp_flags;
        __u8            protocol;
        __u8            tos;
        __be16          s_as;
        __be16          d_as;
        __u8            s_mask;
        __u8            d_mask;
        __u16           padding;
} __attribute__ ((packed));

/* NetFlow v5 packet */
struct netflow5_pdu {
        __be16                  version;
        __be16                  nr_records;
        __be32                  ts_uptime; /* ms */
        __be32                  ts_usecs;  /* s  */
        __be32                  ts_unsecs; /* ns */
        __be32                  seq;
        __u8                    eng_type;
        __u8                    eng_id;
        __u16                   sampling;
        struct netflow5_record  flow[NETFLOW5_RECORDS_MAX];
} __attribute__ ((packed));
#define NETFLOW5_HEADER_SIZE (sizeof(struct netflow5_pdu) - NETFLOW5_RECORDS_MAX * sizeof(struct netflow5_record))

#define IF_NAME_SZ      IFNAMSIZ
#define IF_DESC_SZ      32

/* NetFlow v9   http://www.ietf.org/rfc/rfc3954.txt */
/* IPFIX        http://www.iana.org/assignments/ipfix/ipfix.xhtml */
/* v9 elements are uppercased, IPFIX camel cased. */
#define one(id, name, len) name = id,
#define two(id, a, b, len)              \
                one(id, a, len) \
                one(id, b, len)
#define Elements \
        two(1,   IN_BYTES, octetDeltaCount, 4) \
        two(2,   IN_PKTS, packetDeltaCount, 4) \
        two(4,   PROTOCOL, protocolIdentifier, 1) \
        two(5,   TOS, ipClassOfService, 1) \
        two(6,   TCP_FLAGS, tcpControlBits, 1) \
        two(7,   L4_SRC_PORT, sourceTransportPort, 2) \
        two(8,   IPV4_SRC_ADDR, sourceIPv4Address, 4) \
        two(9,   SRC_MASK, sourceIPv4PrefixLength, 1) \
        two(10,  INPUT_SNMP, ingressInterface, 2) \
        two(11,  L4_DST_PORT, destinationTransportPort, 2) \
        two(12,  IPV4_DST_ADDR, destinationIPv4Address, 4) \
        two(13,  DST_MASK, destinationIPv4PrefixLength, 1) \
        two(14,  OUTPUT_SNMP, egressInterface, 2) \
        two(15,  IPV4_NEXT_HOP, ipNextHopIPv4Address, 4) \
        two(21,  LAST_SWITCHED, flowEndSysUpTime, 4) \
        two(22,  FIRST_SWITCHED, flowStartSysUpTime, 4) \
        one(25,  minimumIpTotalLength, 2) \
        one(26,  maximumIpTotalLength, 2) \
        two(27,  IPV6_SRC_ADDR, sourceIPv6Address, 16) \
        two(28,  IPV6_DST_ADDR, destinationIPv6Address, 16) \
        two(31,  IPV6_FLOW_LABEL, flowLabelIPv6, 3) \
        two(32,  ICMP_TYPE, icmpTypeCodeIPv4, 2) \
        two(33,  MUL_IGMP_TYPE, igmpType, 1) \
        two(40,  TOTAL_BYTES_EXP, exportedOctetTotalCount, 8) \
        two(41,  TOTAL_PKTS_EXP, exportedMessageTotalCount, 8) \
        two(42,  TOTAL_FLOWS_EXP, exportedFlowRecordTotalCount, 8) \
        two(48,  FLOW_SAMPLER_ID, samplerId, 1) \
        two(49,  FLOW_SAMPLER_MODE, samplerMode, 1) \
        two(50,  FLOW_SAMPLER_RANDOM_INTERVAL, samplerRandomInterval, 2) \
        one(52,  minimumTTL, 1) \
        one(53,  maximumTTL, 1) \
        two(56,  SRC_MAC, sourceMacAddress, 6) \
        two(57,  DST_MAC, postDestinationMacAddress, 6) \
        two(58,  SRC_VLAN, vlanId, 2) \
        two(61,  DIRECTION, flowDirection, 1) \
        two(62,  IPV6_NEXT_HOP, ipNextHopIPv6Address, 16) \
        two(64,  IPV6_OPTION_HEADERS, ipv6ExtensionHeaders, 2) \
        two(70,  MPLS_LABEL_1,  mplsTopLabelStackSection, 3) \
        two(71,  MPLS_LABEL_2,  mplsLabelStackSection2,   3) \
        two(72,  MPLS_LABEL_3,  mplsLabelStackSection3,   3) \
        two(73,  MPLS_LABEL_4,  mplsLabelStackSection4,   3) \
        two(74,  MPLS_LABEL_5,  mplsLabelStackSection5,   3) \
        two(75,  MPLS_LABEL_6,  mplsLabelStackSection6,   3) \
        two(76,  MPLS_LABEL_7,  mplsLabelStackSection7,   3) \
        two(77,  MPLS_LABEL_8,  mplsLabelStackSection8,   3) \
        two(78,  MPLS_LABEL_9,  mplsLabelStackSection9,   3) \
        two(79,  MPLS_LABEL_10, mplsLabelStackSection10,  3) \
        one(80,  destinationMacAddress, 6) \
        two(82,  IF_NAME, interfaceName, IF_NAME_SZ) \
        two(83,  IF_DESC, interfaceDescription, IF_DESC_SZ) \
        one(136, flowEndReason, 1) \
        one(138, observationPointId, 4) \
        one(139, icmpTypeCodeIPv6, 2) \
        one(141, LineCardId, 4) \
        one(142, portId, 4) \
        one(143, meteringProcessId, 4) \
        one(144, exportingProcessId, 4) \
        one(145, TemplateId, 2) \
        one(149, observationDomainId, 4) \
        one(152, flowStartMilliseconds, 8) \
        one(153, flowEndMilliseconds, 8) \
        one(154, flowStartMicroseconds, 8) \
        one(155, flowEndMicroseconds, 8) \
        one(160, systemInitTimeMilliseconds, 8) \
        one(163, observedFlowTotalCount, 8) \
        one(164, ignoredPacketTotalCount, 8) \
        one(165, ignoredOctetTotalCount, 8) \
        one(166, notSentFlowTotalCount, 8) \
        one(167, notSentPacketTotalCount, 8) \
        one(168, notSentOctetTotalCount, 8) \
        one(200, mplsTopLabelTTL, 1) \
        one(201, mplsLabelStackLength, 1) \
        one(202, mplsLabelStackDepth, 1) \
        one(208, ipv4Options, 4) \
        one(209, tcpOptions, 4) \
        one(225, postNATSourceIPv4Address, 4) \
        one(226, postNATDestinationIPv4Address, 4) \
        one(227, postNAPTSourceTransportPort, 2) \
        one(228, postNAPTDestinationTransportPort, 2) \
        one(230, natEvent, 1) \
        one(243, dot1qVlanId, 2) \
        one(244, dot1qPriority, 1) \
        one(245, dot1qCustomerVlanId, 2) \
        one(246, dot1qCustomerPriority, 1) \
        one(252, ingressPhysicalInterface, 2) \
        one(253, egressPhysicalInterface, 2) \
        one(256, ethernetType, 2) \
        one(295, IPSecSPI, 4) \
        one(300, observationDomainName, 128) \
        one(302, selectorId, 1) \
        one(309, samplingSize, 1) \
        one(310, samplingPopulation, 2) \
        one(318, selectorIdTotalPktsObserved, 8) \
        one(319, selectorIdTotalPktsSelected, 8) \
        one(323, observationTimeMilliseconds, 8) \
        one(324, observationTimeMicroseconds, 8) \
        one(325, observationTimeNanoseconds, 8) \
        one(390, flowSelectorAlgorithm, 1) \
        one(394, selectorIDTotalFlowsObserved, 8) \
        one(395, selectorIDTotalFlowsSelected, 8) \
        one(396, samplingFlowInterval, 1) \
        one(397, samplingFlowSpacing, 2)

enum {
        Elements
};
#undef one
#undef two

enum {
        FLOWSET_TEMPLATE = 0,
        FLOWSET_OPTIONS = 1,
        IPFIX_TEMPLATE = 2,
        IPFIX_OPTIONS = 3,
        FLOWSET_DATA_FIRST = 256,
};

enum {                          /* v9 scopes */
        V9_SCOPE_SYSTEM = 1,
        V9_SCOPE_INTERFACE = 2,
        V9_SCOPE_LINECARD = 3,
        V9_SCOPE_CACHE = 4,
        V9_SCOPE_TEMPLATE = 5,
};

struct flowset_template {
        __be16  flowset_id;
        __be16  length;         /* (bytes) */
        __be16  template_id;
        __be16  field_count;    /* (items) */
} __attribute__ ((packed));

struct flowset_data {
        __be16  flowset_id;     /* corresponds to template_id */
        __be16  length;         /* (bytes) */
} __attribute__ ((packed));

/* http://tools.ietf.org/html/rfc3954#section-6.1 */
struct flowset_opt_tpl_v9 {
        __be16  flowset_id;
        __be16  length;
        __be16  template_id;
        __be16  scope_len;      /* (bytes) */
        __be16  opt_len;        /* (bytes) */
} __attribute__ ((packed));

/* http://tools.ietf.org/html/rfc5101#section-3.4.2.2 */
struct flowset_opt_tpl_ipfix {
        __be16  flowset_id;
        __be16  length;
        __be16  template_id;
        __be16  field_count;    /* total (items) */
        __be16  scope_count;    /* (items) must not be zero */
} __attribute__ ((packed));

/* NetFlow v9 packet. */
struct netflow9_pdu {
        __be16          version;
        __be16          nr_records;     /* (items) */
        __be32          sys_uptime_ms;
        __be32          export_time_s;
        __be32          seq;
        __be32          source_id;      /* Exporter Observation Domain */
        __u8            data[1400];
} __attribute__ ((packed));

/* IPFIX packet. */
struct ipfix_pdu {
        __be16          version;
        __be16          length;         /* (bytes) */
        __be32          export_time_s;
        __be32          seq;
        __be32          odomain_id;     /* Observation Domain ID */
        __u8            data[1400];
} __attribute__ ((packed));

/* Maximum bytes flow can have, after it's reached flow will become
 * not searchable and will be exported soon. */
#define FLOW_FULL_WATERMARK 0xffefffff

#define EXTRACT_SPI(tuple)      ((tuple.s_port << 16) | tuple.d_port)
#define SAVE_SPI(tuple, spi)    { tuple.s_port = spi >> 16; \
                                  tuple.d_port = spi; }
#define MAX_VLAN_TAGS   2

/* hashed data which identify unique flow */
/* 16+16 + 2+2 + 2+1+1+1 = 41 */
struct ipt_netflow_tuple {
        union nf_inet_addr src;
        union nf_inet_addr dst;
        __be16          s_port; // Network byte order
        __be16          d_port; // -"-
#ifdef MPLS_DEPTH
        __be32          mpls[MPLS_DEPTH]; /* Network byte order */
#endif
        __u16           i_ifc;  // Host byte order
#ifdef ENABLE_VLAN
        __be16          tag[MAX_VLAN_TAGS]; // Network byte order (outer tag first)
#endif
        __u8            protocol;
        __u8            tos;
        __u8            l3proto;
#ifdef ENABLE_MAC
        __u8            h_dst[ETH_ALEN];
        __u8            h_src[ETH_ALEN];
#endif
} __attribute__ ((packed));

/* hlist[2] + tuple[]: 8+8 + 41 = 57 (less than usual cache line, 64) */
struct ipt_netflow {
        struct hlist_node hlist; // hashtable search chain

        /* unique per flow data (hashed, NETFLOW_TUPLE_SIZE) */
        struct ipt_netflow_tuple tuple;

        /* volatile data */
        union nf_inet_addr nh;
#if defined(ENABLE_MAC) || defined(ENABLE_VLAN)
        __be16          ethernetType; /* Network byte order */
#endif
        __u16           o_ifc;
#ifdef ENABLE_PHYSDEV
        __u16           i_ifphys;
        __u16           o_ifphys;
#endif
#ifdef SNMP_RULES
        __u16           i_ifcr; /* translated interface numbers*/
        __u16           o_ifcr;
#endif
        __u8            s_mask;
        __u8            d_mask;
        __u8            tcp_flags; /* `OR' of all tcp flags */
        __u8            flowEndReason;
#ifdef ENABLE_DIRECTION
        __u8            hooknumx; /* hooknum + 1 */
#endif
        /* flow statistics */
        u_int32_t       nr_packets;
        u_int32_t       nr_bytes;
#ifdef ENABLE_SAMPLER
        unsigned int    sampler_count; /* for deterministic sampler only */
#endif
        union {
                struct {
                        unsigned long first;
                        unsigned long last;
                } ts;
                ktime_t ts_obs;
        } _ts_un;
#define nf_ts_first _ts_un.ts.first
#define nf_ts_last  _ts_un.ts.last
#define nf_ts_obs   _ts_un.ts_obs
        u_int32_t       flow_label; /* IPv6 */
        u_int32_t       options; /* IPv4(16) & IPv6(32) Options */
        u_int32_t       tcpoptions;
#ifdef CONFIG_NF_NAT_NEEDED
        __be32          s_as;
        __be32          d_as;
        struct nat_event *nat;
#endif
        union {
                struct list_head list; /* all flows in ipt_netflow_list */
#ifdef HAVE_LLIST
                struct llist_node llnode; /* purged flows */
#endif
        } _flow_list;
#define flows_list  _flow_list.list
#define flows_llnode _flow_list.llnode
};

#ifdef CONFIG_NF_NAT_NEEDED
enum {
        NAT_CREATE = 1, NAT_DESTROY = 2, NAT_POOLEXHAUSTED = 3
};
struct nat_event {
        struct list_head list;
        struct {
                __be32  s_addr;
                __be32  d_addr;
                __be16  s_port;
                __be16  d_port;
        } pre, post;
        ktime_t         ts_ktime;
        unsigned long   ts_jiffies;
        __u8    protocol;
        __u8    nat_event;
};
#define IS_DUMMY_FLOW(nf) (nf->nat)
#else
#define IS_DUMMY_FLOW(nf) 0
#endif

static inline int ipt_netflow_tuple_equal(const struct ipt_netflow_tuple *t1,
                                    const struct ipt_netflow_tuple *t2)
{
        return (!memcmp(t1, t2, sizeof(struct ipt_netflow_tuple)));
}

struct ipt_netflow_sock {
        struct list_head list;
        struct socket *sock;
        struct sockaddr_storage addr;   // destination
        struct sockaddr_storage saddr;  // source
        char   sdev[IFNAMSIZ];          // source device
        atomic_t wmem_peak;             // sk_wmem_alloc peak value
        unsigned int err_connect;       // connect errors
        unsigned int err_full;          // socket filled error
        unsigned int err_other;         // other socket errors
        unsigned int err_cberr;         // async errors, icmp
        unsigned int pkt_exp;           // pkts expoted to this dest
        u64 bytes_exp;                  // bytes -"-
        u64 bytes_exp_old;              // for rate calculation
        unsigned int bytes_rate;        // bytes per second
        unsigned int pkt_sent;          // pkts sent to this dest
        unsigned int pkt_fail;          // pkts failed to send to this dest
};

struct netflow_aggr_n {
        struct list_head list;
        atomic_t usage;
        __u32 mask;
        __u32 addr;
        __u32 aggr_mask;
        __u8 prefix;
};

struct netflow_aggr_p {
        struct list_head list;
        atomic_t usage;
        __u16 port1;
        __u16 port2;
        __u16 aggr_port;
};

#define NETFLOW_STAT_INC(count) (__get_cpu_var(ipt_netflow_stat).count++)
#define NETFLOW_STAT_ADD(count, val) (__get_cpu_var(ipt_netflow_stat).count += (unsigned long long)val)
#define NETFLOW_STAT_SET(count, val) (__get_cpu_var(ipt_netflow_stat).count = (unsigned long long)val)
#define NETFLOW_STAT_TS(count)                                                   \
        do {                                                                     \
                ktime_t kts = ktime_get_real();                                  \
                if (!(__get_cpu_var(ipt_netflow_stat)).count.first_tv64)         \
                        __get_cpu_var(ipt_netflow_stat).count.first = kts;       \
                __get_cpu_var(ipt_netflow_stat).count.last = kts;                \
        } while (0);

#define NETFLOW_STAT_INC_ATOMIC(count)                          \
        do {                                                    \
                preempt_disable();                              \
                (__get_cpu_var(ipt_netflow_stat).count++);      \
                preempt_enable();                               \
        } while (0);

#define NETFLOW_STAT_ADD_ATOMIC(count, val)                     \
        do {                                                    \
                preempt_disable();                              \
                (__get_cpu_var(ipt_netflow_stat).count += (unsigned long long)val); \
                preempt_enable();                               \
        } while (0);
#define NETFLOW_STAT_READ(count) ({                                     \
                unsigned int _tmp = 0, _cpu;                            \
                for_each_present_cpu(_cpu)                              \
                         _tmp += per_cpu(ipt_netflow_stat, _cpu).count; \
                _tmp;                                                   \
        })

struct duration {
        ktime_t first;
        ktime_t last;
};

/* statistics */
struct ipt_netflow_stat {
        u64 searched;                   // hash stat
        u64 found;                      // hash stat
        u64 notfound;                   // hash stat (new flows)
        u64  pkt_total;                 // packets metered
        u64 traf_total;                 // traffic metered
#ifdef ENABLE_PROMISC
        u64 pkt_promisc;                // how much packets passed promisc code
        u64 pkt_promisc_drop;           // how much packets discarded
#endif
        /* above is grouped for cache */
        unsigned int truncated;         // packets stat (drop)
        unsigned int frags;             // packets stat (drop)
        unsigned int maxflows_err;      // maxflows reached (drop)
        unsigned int alloc_err;         // failed to allocate memory (drop & lost)
        struct duration drop;
        unsigned int send_success;      // sendmsg() ok
        unsigned int send_failed;       // sendmsg() failed
        unsigned int sock_cberr;        // socket error callback called (got icmp refused)
        unsigned int exported_rate;     // netflow traffic itself
        u64 exported_pkt;               // netflow traffic itself
        u64 exported_flow;              // netflow traffic itself
        u64 exported_traf;              // netflow traffic itself
        u64 exported_trafo;             // netflow traffic itself
        u64  pkt_total_prev;            // packets metered previous interval
        u32  pkt_total_rate;            // packet rate for this cpu
        u64  pkt_drop;                  // packets not metered
        u64 traf_drop;                  // traffic not metered
        u64 flow_lost;                  // flows not sent to collector
        u64  pkt_lost;                  // packets not sent to collector
        u64 traf_lost;                  // traffic not sent to collector
        struct duration lost;
        u64  pkt_out;                   // packets out of the hash
        u64 traf_out;                   // traffic out of the hash
#ifdef ENABLE_SAMPLER
        u64 pkts_observed;              // sampler stat
        u64 pkts_selected;              // sampler stat
#endif
        u64 old_searched;               // previous hash stat
        u64 old_found;                  // for calculation per cpu metric
        u64 old_notfound;
        int metric;                     // one minute ewma of hash efficiency
};

#endif
/* vim: set sw=8: */
ipt-netflow-2.2/version.sh0000755000000000000000000000164013213006644014347 0ustar  rootroot#!/bin/sh
# This script determines actual module version.

PATH=$PATH:/usr/local/bin:/usr/bin:/bin

# Base version from the source.
MVERSION=`sed -n 's/^#define.*IPT_NETFLOW_VERSION.*"\(.*\)".*/\1/p' ipt_NETFLOW.c`

# GITVERSION overrides base version.
if [ -e version.h ] && grep -q GITVERSION version.h; then
  MVERSION=`sed -n 's/#define GITVERSION "\(.*\)".*/\1/p' version.h`
fi

# git describe overrides version from the source.
if [ -d .git ] && which git >/dev/null 2>&1; then \
  GVERSION=`git describe --dirty 2>/dev/null`
  if [ "$GVERSION" ]; then
    MVERSION=${GVERSION#v}
  fi
else
  GVERSION=
fi

if [ "$1" = --define ]; then
  # called from Makefile to create version.h
  # which should contain GITVERSION or be empty.
  if [ "$GVERSION" ]; then
    echo "#define GITVERSION \"$MVERSION\""
  else
    echo "/* placeholder, because kernel doesn't like empty files */"
  fi
else
  # normal run
  echo $MVERSION
fi
ipt-netflow-2.2/README0000644000000000000000000010145113213006644013204 0ustar  rootrootipt_NETFLOW linux 2.6.x-4.x kernel module by <abc@telekom.ru> -- 2008-2016.

   High performance NetFlow v5, v9, IPFIX flow data export module for Linux
   kernel. Created to be useful for highly loaded linux router. It should be
   used as iptables target.


=========================
= Detailed Feature List =
=========================

   * High performance and scalability. For highest performance module could be
     run without conntrack being enabled in kernel. Reported to be able to
     handle 10Gbit traffic with more than 1500000 pps with negligible server
     load (on S5500BC).

   * NetFlow v5, v9, and IPFIX are fully supported.

     Support of v9/IPFIX is adding flexibility to exporting of flow data
     plus greater visibility of traffic, letting export many additional fields
     besides what was possible in v5 era. Such as
     
   * IPv6 option headers, IPv4 options, TCP options, ethernet type, dot1q
     service and customer VLAN ids, MAC addresses, and

   * Full IPv6 support,

   * NAT translations events (from conntrack) using NetFlow Event Logging (NEL).
     This is standardized way for v9/IPFIXr, but module export such events even
     for v5 collectors via specially crafted pseudo-records.

   * Deterministic (systematic count-based), random and hash Flow Sampling.
     With appropriate differences in support of v5, v9, and IPFIX.

   * SNMP agent (for net-snmp) for remote management and monitoring.

   * Options Templates (v9/IPFIX) let export useful statistical,
     configurational, and informational records to collector.
     Such as metering, exporting, sampling stat and reliability stat, sampling 
     configuration, network devices ifName, ifDescr list.

   * Tested to compile and work out of the box on Centos 5, 6, 7, Debian and
   * Ubuntu. Many vanilla Linux kernels since 2.6.18 up to the latest (as of
   * writing is 3.19) are supported and tested.

   * Module load time and run-time (via sysctl) configuration.

   * Flexibility in enabling features via ./configure script. This will let you
     disable features you don't need, which increase compatibility with custom
     kernels and performance.

   * SNMP-index translation rules, let convert meaningless and unstable
     interface indexes (ifIndex) to more meaningful numbering scheme.

   * Easy support for catching mirrored traffic with promisc option. Which is
     also supporting optional MPLS decapsulation and MPLS-aware NetFlow.


============================
= OBTAINING LATEST VERSION =
============================

   $ git clone git://github.com/aabc/ipt-netflow.git ipt-netflow
   $ cd ipt-netflow


================
= INSTALLATION =
================

   Five easy steps.

** 1. Prepare Kernel source

   If you have package system install kernel-devel package, otherwise install
   raw kernel source from http://kernel.org matching _exactly_ version of your
   installed kernel.

   a) What to do for Centos:

      ~# yum install kernel-devel

   b) What to do for Debian and Ubuntu:

      ~# apt-get install module-assistant
      ~# m-a prepare

   c) Otherwise, if you downloaded raw kernel sources don't forget to create
    .config by copying it from your distribution's kernel. Its copy could reside
    in /boot or sometimes in /proc, examples:

      kernel-src-dir/# cp /boot/config-`uname -r` .config
    or
      kernel-src-dir/# zcat /proc/config.gz > .config

    Assuming you unpacked kernel source into `kernel-src-dir/' directory.
    Then run:

      kernel-src-dir/# make oldconfig

    After that you'll need to prepare kernel for modules build:

      kernel-src-dir/# make prepare modules_prepare

   Note: Don't try to `make prepare' in Centos kernel-devel package directory
     (which is usually something like /usr/src/kernels/2.6.32-431.el6.x86_64)
     as this is wrong and meaningless.

** 2. Prepare Iptables

   Before this step it also would be useful to install pkg-config if don't
   already have.

   If you have package system just install iptables-devel (or iptables-dev)
   package, otherwise install iptables source matching version of your
   installation from ftp://ftp.netfilter.org/pub/iptables/

   a) What to do for Centos:

      # yum install iptables-devel

   b) What to do for Debian or Ubuntu:

      # apt-get install iptables-dev pkg-config

   c) Otherwise, for raw iptables source build it and make install.

** 3. Prepare net-snmp (optional)

  In case you want to manage or monitor module performance via SNMP you
  may install net-snmp. If you want to skip this step run configure
  with --disable-snmp-agent option.

  a) For Centos:

      # yum install net-snmp net-snmp-devel

  b) For Debian or Ubuntu:

      # apt-get install snmpd libsnmp-dev

  c) Otherwise install net-snmp from www.net-snmp.org

** 4. Now, to actually build the module run:

      ~/ipt-netflow# ./configure
      ~/ipt-netflow# make all install
      ~/ipt-netflow# depmod

   This will install kernel module and iptables specific library.

   Troubleshooting:

     a) Sometimes you will want to add CC=gcc-3 to make command.
     Example: make CC=gcc-3.3

     b) Compile module with actual kernel source compiled.
     I.e. first compile kernel and boot into it, and then compile module.
     If you are using kernel-devel package check that its version matches
     your kernel package.

     c) If you have sources in non-standard places or configure isn't able to
     find something run ./configure --help to see how to specify paths manually.

     d) To run irqtop on Debian 8 you may need to install:

       # apt-get install ruby ruby-dev ncurses-dev
       # gem install curses

     z) If all fails create ticket at
          https://github.com/aabc/ipt-netflow/issues

** 5. After this point you should be able to load module and
     use -j NETFLOW target in your iptables. See next section.


=====================
= Configure Options =
=====================

   Configure script allows to enable or disable optional features:

     --enable-natevents
         enables natevents (NEL) support, (this and option will require
         conntrack support to be enabled into kernel and conntack
         module (nf_conntrack) loaded before ipt_NETFLOW. Usually this is
         done automatically because of `depmod', but if you don't do `make
         install' you'll need to load nf_conntrack manually.
         Read below for explanation of natevents.

     --enable-sampler
         enables flow sampler. Read below for explanation of its configuration
         option.

     --enable-sampler=hash
         additionally enables 'hash' sampler.

     --disable-snmp-agent
         disables building net-snmp agent module, which is enabled by default.

     --enable-snmp-rules
         enables SNMP-index conversion rules. Read below for explanation
         of snmp-rules.

     --enable-macaddress
         enables exporting of src and dst MAC addresses for every flow
         in v9/IPFIX. Difference in any of MAC address will be accounted
         as differnt flow. I.e. MAC addresses will be part of flow key.

     --enable-vlan
         enables exporting of dot1q VLAN Ids and Priorities for every flow
         in v9/IPFIX. It supports outer and second dot1q tags if present.

         Any of two previous options will enable exporting of Ethernet Packet
         Type, ethernetType(256).

     --enable-direction
         enables exporting of flowDirection(61) Element for v9/IPFIX.

         Packets captured in PREROUTING and INPUT chains will be accounted as
         ingress flows(0), in OUTPUT and POSTROUTING as egress flows(1), and
         in FORWARD will have flowDirection set to undefined value 255.

     --enable-aggregation
         enables aggregation rules. Read below for explanation of aggregation.

     --disable-dkms
         disable creating dkms.conf and auto-install module into DKMS tree.

     --disable-dkms-install
         only disable auto-install into DKMS, but still create dkms.conf, in
         case you will want to install it manually.

     --enable-physdev
         Export ingressPhysicalInterface(252) and egressPhysicalInterface(253)
         (relevant for bridges) in V9 and IPFIX. If your collector does not
         support these Elements but you still need physdevs then use
         --enable-physdev-override, in that case physdevs will override normal
         interface numbers ingressInterface(10) and egressInterface(14).

     --enable-promisc
         Enables capturing of promiscuous packets into raw/PREROUTING chain.
         See README.promisc Solution 1 for usage details and example.

     --promisc-mpls
         Enables MPLS label stack decapsulation for promiscuous packets. (For
         IPv4 and IPv6 packets only). This also enables MPLS-aware NetFlow (v9
         and IPFIX), you may wish to specify with --promisc-mpls=n how much MPLS
         labels you want to be recorded and exported (default is 3, maximum is
         10, set to 0 to not report anything).


===========
= RUNNING =
===========

1. You can load module directly by insmod like this:

     # insmod ipt_NETFLOW.ko destination=127.0.0.1:2055 debug=1

   Or if properly installed (make install; depmod) by this:

     # modprobe ipt_NETFLOW destination=127.0.0.1:2055

   See, you may add options in insmod/modprobe command line, or add
   them in /etc/modprobe.conf or /etc/modprobe.d/ipt_NETFLOW.conf
   like thus:

     options ipt_NETFLOW destination=127.0.0.1:2055 protocol=9 natevents=1

2. Statistics is in /proc/net/stat/ipt_netflow
   Machine readable statistics is in /proc/net/stat/ipt_netflow_snmp
   To view boring slab statistics: grep ipt_netflow /proc/slabinfo
   Dump of all flows is in /proc/net/stat/ipt_netflow_flows

3. You can view parameters and control them via sysctl, example:

     # sysctl net.netflow
     # sysctl net.netflow.hashsize=32768

   Note: For after-reboot configuration I recommend to store module parameters
   in modprobe configs instead of storing them in /etc/sysctl.conf, as it's
   less clear when init process will apply sysctl.conf, before of after
   module's load.

4. Example of directing all IPv4 traffic into the module:

     # iptables -I FORWARD -j NETFLOW
     # iptables -I INPUT -j NETFLOW
     # iptables -I OUTPUT -j NETFLOW

   Note: It is preferable (because easier to understand) to _insert_
   NETFLOW target at the top of the chain, otherwise not all traffic may
   reach NETFLOW if your iptables configuration is complicated and some
   other rule inadvertently consume the traffic (dropping or acepting before
   NETFLOW is reached). It's always good to test your configuration.
   Use  iptables -L -nvx  to check pkts/bytes counters on the rules.

5. If you want to account IPv6 traffic you should use protocol 9 or 10.
   Example of directing all IPv6 traffic into the module:

     # sysctl net.netflow.protocol=10
     # ip6tables -I FORWARD -j NETFLOW
     # ip6tables -I INPUT -j NETFLOW
     # ip6tables -I OUTPUT -j NETFLOW

   Note: First enable right version of protocol and after that add ip6tables
     rules, otherwise you will get errors in dmesg.

6. If you want to account NAT events (NEL):

     # sysctl net.netflow.natevents=1

   Note that natevents feature is completely independent from traffic accounting
   (it's using so called conntrack events), thus you don't need to set or change
   any iptables rules to use that. You may need to enable kernel config option
   CONFIG_NF_CONNTRACK_EVENTS though (if it isn't already enabled).
   For details on how they are exported for different protocol versions see
   below.

7. For SNMP support you will need to add this command into snmpd.conf to
   enable IPT-NETFLOW-MIB in SNMP agent:

      dlmod netflow /usr/lib/snmp/dlmod/snmp_NETFLOW.so

   Restart snmpd for changes to take effect. Don't forget to properly configure
   access control. Example simplest configuration may looks like (note that this
   is whole /etc/snmp/snmpd.conf):

      rocommunity public 127.0.0.1
      dlmod netflow /usr/lib/snmp/dlmod/snmp_NETFLOW.so

   Note, that this config will also allow _full_ read-only access to the whole
   linux MIB. To install IPT-NETFLOW-MIB locally, copy file IPT-NETFLOW-MIB.my
   into ~/.snmp/mibs/

   * Detailed example of SNMP configuration is there:
   * https://github.com/aabc/ipt-netflow/wiki/Configuring-SNMP-access

   To check that MIB is installed well you may issue:

     $ snmptranslate -m IPT-NETFLOW-MIB -IR -Tp iptNetflowMIB

   This should output IPT-NETFLOW-MIB in tree form.

   To check that snmp agent is working well issue:

     $ snmpwalk -v 1 -c public 127.0.0.1 -m IPT-NETFLOW-MIB iptNetflowMIB

   Should output full MIB. If MIB is not installed try:

     $ snmpget -v 1 -c public 127.0.0.1 .1.3.6.1.4.1.37476.9000.10.1.1.1.1.0

   Which should output STRING: "ipt_NETFLOW".

   MIB provides access to very similar statistics that you have in
   /proc/net/stat/ipt_netflow, you can read description of objects in
   text file IPT-NETFLOW-MIB.my

   If you want to access to SNMP stat in machine readable form for your
   scripts there is file /proc/net/stat/ipt_netflow_snmp

   Note: Using of SNMP v2c or v3 is mandatory for most tables, because
   this MIB uses 64-bit counters (Counter64) which is not supported in old
   SNMP v1. You should understand that 32-bit counter will wrap on 10Gbit
   traffic in just 3.4 seconds! So, always pass option `-v2c' or `-v3'
   to net-snmp utils. Or, for example, configure option `defVersion 2c'
   in ~/.snmp/snmp.conf  You can also have `defCommunity public' ov v3
   auth parameters (defSecurityName, defSecurityLevel, defPassphrase)
   set there (man snmp.conf).

   Examples for dumping typical IPT-NETFLOW-MIB objects:

   - Module info (similar to modinfo, SNMPv1 is ok for following two objects):

     $ snmpwalk -v 1 -c public 127.0.0.1 -m IPT-NETFLOW-MIB iptNetflowModule

   - Read-write sysctl-like parameters (yes, they are writable via snmpset, you
     may need to configure write access to snmpd, though):

     $ snmpwalk -v 1 -c public 127.0.0.1 -m IPT-NETFLOW-MIB iptNetflowSysctl

   - Global performance stat of the module (note -v2c, because rest of the
     objects require SNMP v2c or SNMP v3):

     $ snmpwalk -v2c -c public 127.0.0.1 -m IPT-NETFLOW-MIB iptNetflowTotals

   - Per-CPU (metering) and per-socket (exporting) statistics in table format:

     $ snmptable -v2c -c public 127.0.0.1 -m IPT-NETFLOW-MIB iptNetflowCpuTable
     $ snmptable -v2c -c public 127.0.0.1 -m IPT-NETFLOW-MIB iptNetflowSockTable


===========
= OPTIONS =
===========

   Options can be passed as parameters to module or changed dynamically
   via  sysctl net.netflow  or  IPT-NETFLOW-MIB::iptNetflowSysctl

   protocol=5
     - what version of NetFlow protocol to use. Default is 5.
       You can choose from 5, 9, or 10 (where 10 is IPFIX). If you plan
       to account IPv6 traffic you should use protocol 9 or 10 (IPFIX),
       because NetFlow v5 isn't compatible with IPv6.

   destination=127.0.0.1:2055
     - where to export netflow, to this ip address. Port is optional, default
       is 2055. You will see this connection in netstat like this:

       udp 0 0 127.0.0.1:32772 127.0.0.1:2055 ESTABLISHED 

   destination=[2001:db8::1]:2055
     - export target using IPv6 address. Brackets are optional, but otherwise
       you should delimit port with 'p' or '#' character.

   destination=127.0.0.1:2055,192.0.0.1:2055
     - mirror flows to two (can be more) addresses, separate addresses
       with comma.

   destination=127.0.0.1:2055@127.0.0.2
     - bind socket to address (127.0.0.2).

   destination=127.0.0.1:2055%eth0
     - bind socket to interface (eth0). May be useful for multi-homed boxes.

   sampler=deterministic:123
   sampler=random:123
   sampler=hash:123
     - enables Flow Sampling. To disable set to the empty value or to `0'.
       Note, that this is flow sampling (as of RFC 7014), not packet
       sampling (PSAMP).

       There is three sampling modes:

         deterministic:  select each N-th observed flow; in IPFIX this mode
                         is called Systematic count-based Sampling;
         random:         select randomly one out of N flows.
         hash:           select hash-randomly one out of N flows.

       Number after colon is population size N, with valid values 2-16383.
       (This 16383 limit is for compatibility with NetFlow v5.)
         Using 'deterministic' and 'random' sampling will not reduce resource
       usage caused by the module, because flows are sampled late in exporting
       process. This will reduces amount of flows which go to the collector,
       thus, reducing load on the collector.
         On the other hand, using 'hash' sampling will reduce CPU and memory
       load caused by the module, because flows are discarded early in the
       processing chain. They are discarded almost like in random sampler,
       except that pseudo-random value is depend on the Flow Key hash for each
       packet.
         All required NetFlow/IPFIX information to signal use of sampling is
       also sent to the collector. 'Hash' sampling will be presented as 'random'
       sampling to the collector, because of their similarity.
       Note, that Flow Sampling is compatible with NetFlow v5, v9, and IPFIX.

   natevents=1
     - Collect and send NAT translation events as NetFlow Event Logging (NEL)
       for NetFlow v9/IPFIX, or as dummy flows compatible with NetFlow v5.
       Default is 0 (don't send).

       For NetFlow v5 protocol meaning of fields in dummy flows are such:
         Src IP, Src Port  is Pre-nat source address.
         Dst IP, Dst Port  is Post-nat destination address.
           - These two fields made equal to data flows catched in FORWARD chain.
         Nexthop, Src AS  is Post-nat source address for SNAT. Or,
         Nexthop, Dst AS  is Pre-nat destination address for DNAT.
         TCP Flags is SYN+SCK for start event, RST+FIN for stop event.
         Pkt/Traffic size is 0 (zero), so it won't interfere with accounting.

       Natevents are compilation disabled by default, to enable you will need to
       add --enable-natevents option to ./configure script.

       For techical description of NAT Events see:
         http://tools.ietf.org/html/draft-ietf-behave-ipfix-nat-logging-04

   inactive_timeout=15
     - export flow after it's inactive for 15 seconds. Default value is 15.

   active_timeout=1800
     - export flow after it's active for 1800 seconds (30 minutes). Default
       value is 1800.

   refresh-rate=20
     - for NetFlow v9 and IPFIX it's rate how frequently to re-send templates
       (per packets). You probably don't need to change default (which is 20).

   timeout-rate=30
     - for NetFlow v9 and IPFIX it's rate when to re-send old templates (in
       minutes). No need to change it.

   debug=0
     - debug level (none).

   sndbuf=number
     - size of output socket buffer in bytes. I recommend you to put higher
       value if you experience netflow packet drops (can be seen in statistics
       as 'sock: fail' number.)
       Default value is system default.

   hashsize=number
     - Hash table bucket size. Used for performance tuning.
       Abstractly speaking, it should be minimum two times bigger than flows
       you usually have, but not need to.
       Default is system memory dependent small enough value.

   maxflows=2000000
     - Maximum number of flows to account. It's here to prevent DOS attacks.
       After this limit is reached new flows will not be accounted. Default is
       2000000, zero is unlimited.

   aggregation=string..
     - Few aggregation rules (or some say they are rule.)

       Buffer for aggregation string 1024 bytes, and sysctl limit it
         to ~700 bytes, so don't write there a lot.
       Rules worked in definition order for each packet, so don't
         write them a lot again.
       Rules applied to both directions (dst and src).
       Rules tried until first match, but for netmask and port
          aggregations separately.
       Delimit them with commas.

       Rules are of two kinds: for netmask aggregation
          and port aggregation:

       a) Netmask aggregation example: 192.0.0.0/8=16
       Which mean to strip addresses matching subnet 192.0.0.0/8 to /16.

       b) Port aggregation example: 80-89=80
       Which mean to replace ports from 80 to 89 with 80.

       Full example:
          aggregation=192.0.0.0/8=16,10.0.0.0/8=16,80-89=80,3128=80

       Aggregation rules are enabled by default, if you feel you don't need them
       you may add --disable-aggregation to ./configure script.

   snmp-rules=string...
     - Few SNMP-index conversion rules similar to fproble-ulog.

       Quoting man fprobe-ulog:

         "Comma separated list of interface name to SNMP-index conversion
         rules.  Each rule consists of interface base name and SNMP-index
         base separated by colon (e.g. ppp:200). Final SNMP-index is  sum
         of corresponding SNMP-index base and interface number.
         In the above example SNMP-index of interface ppp11 is 211.

         If interface  name  did not fit to any of conversion rules then
         SNMP-index will be taken from kernel."

       This implementation isn't optimized for performance (no rule caching
       or hashing), but should be fast if rules list are short.

       Rules are parsed in order from first to last until first match.

       snmp-rules are compilation disabled by default, to enable you will need
       to add --enable-snmp option to ./configure script.

   scan-min=1
     - Minimal interval between flow export scans. Sometimes could be useful
       to reduce load on exporting CPU by increasing this interval. Value are
       in kernel jiffies units (which is x/HZ seconds).

   promisc=1
     - Enables promisc hack. See README.promisc Solution 1 for details.

   exportcpu=number
     - Lock exporter to single CPU. This may be useful to fine control CPU
       load. Common use case: with smp_affinity and RSS you spread packet
       processing to all CPUs except one, and lock it to the exporter. While
       exporter CPU load generally is not high, for someone it may be not
       desirable to combine it with packet processing on very highly loaded
       routers.

       This option could be changed at runtime with:

         # echo number > /sys/module/ipt_NETFLOW/parameters/exportcpu

   engine_id=number
     - Observation Domain ID (on IPFIX, Source Id on NetFlow v9, or Engine Id
       on NetFlow v5) value to be exported. This may help your collector to
       distinguish between multiple exporters. On Netflow v9 and IPFIX this
       value is 32-bit on NetFlow v5 only 8 low bits are significant.
       Default value is 0.

       This option could be changed at runtime with:

         # echo number > /sys/module/ipt_NETFLOW/parameters/engine_id


====================
= HOW TO READ STAT =
====================

  Statistics is your friend to fine tune and understand netflow module
  performance.

  To see stat in human readable form:
    # cat /proc/net/stat/ipt_netflow

  How to interpret the data:

> ipt_NETFLOW version v1.8-122-gfae9d59-dirty, srcversion 6141961152BE0DFA6A21EF4; aggr mac vlan

  This line helps to identify actual source that your module is build on.
  Please always supply it in all bug reports.

  v1.8-122: 1.8 is release, 122 is commit number after release;
  -gfae9d59: fae9d59 is short git commit id;
  -dirty: if present, meaning that git detected that sources are changed since
      last git commit, you may wish to do `git diff' to view changes;
  srcversion 6141961152BE0DFA6A21EF4: binary version of module, you can
      compare this with data from `modinfo ./ipt_NETFLOW.ko' to identify
      actual binary loaded;
  aggr mac vlan: tags to identify compile time options that are enabled.

> Protocol version 10 (ipfix), refresh-rate 20, timeout-rate 30, (templates 2, active 2). Timeouts: active 5, inactive 15. Maxflows 2000000

  Protocol version currently in use. Refresh-rate and timeout-rate
      for v9 and IPFIX. Total templates generated and currently active.
  Timeout: active X: how much seconds to wait before exporting active flow.
    - same as sysctl net.netflow.active_timeout variable.
  inactive X: how much seconds to wait before exporting inactive flow.
    - same as sysctl net.netflow.inactive_timeout variable.
  Maxflows 2000000: maxflows limit.
    - all flows above maxflows limit must be dropped.
    - you can control maxflows limit by sysctl net.netflow.maxflows variable.

> Promisc hack is disabled (observed 0 packets, discarded 0).

  observed n: To see that promisc hack is really working.

> Natevents disabled, count start 0, stop 0.

    - Natevents mode disabled or enabled, and how much start or stop events
      are reported.

> Flows: active 5187 (peak 83905 reached 0d0h1m ago), mem 283K, worker delay 100/1000 (37 ms, 0 us, 4:0 0 [3]).

  active X: currently active flows in memory cache.
    - for optimum CPU performance it is recommended to set hash table size to
      at least twice of average of this value, or higher.
  peak X reached Y ago: peak value of active flows.
  mem XK: how much kilobytes of memory currently taken by active flows.
    - one active flow taking 56 bytes of memory.
    - there is system limit on cache size too.
  worker delay X/HZ: how frequently exporter scan flows table per second.
  Rest is boring debug info.

> Hash: size 8192 (mem 32K), metric 1.00, [1.00, 1.00, 1.00]. InHash: 1420 pkt, 364 K, InPDU 28, 6716.

  Hash: size X: current hash size/limit.
    - you can control this by sysctl net.netflow.hashsize variable.
    - increasing this value can significantly reduce CPU load.
    - default value is not optimal for performance.
    - optimal value is twice of average of active flows.
  mem XK: how much memory occupied by hash table.
    - hash table is fixed size by nature, taking 4 bytes per entry.
  metric X, [X, X, X]: how optimal is your hash table being used.
    - lesser value mean more optimal hash table use, min is 1.0.
    - last three numbers in squares is moving average (EWMA) of hash table
      access divided by match rate (searches / matches) for 4sec, and 1, 5, and
      15 minutes. Sort of hash table load average. First value is instantaneous.
      You can try to increase hashsize if averages more than 1 (increase
      certainly if >= 2).
  InHash: X pkt, X K: how much traffic accounted for flows in the hash table.
  InPDU X, X: how much traffic in flows preparing to be exported.

> Rate: 202448 bits/sec, 83 packets/sec; 1 min: 668463 bps, 930 pps; 5 min: 329039 bps, 483 pps

  - Module throughput values for 1 second, 1 minute, and 5 minutes.

> cpu#  pps; <search found new [metric], trunc frag alloc maxflows>, traffic: <pkt, bytes>, drop: <pkt, bytes>
> cpu0  123; 980540  10473 180600 [1.03],    0    0    0    0, traffic: 188765, 14 MB, drop: 27863, 1142 K

  cpu#: this is Total and per CPU statistics for:
  pps: packets per second on this CPU. It's useful to debug load imbalance.
  <search found new, trunc frag alloc maxflows>: internal stat for:
  search found new: hash table searched, found, and not found counters.
  [metric]: one minute (ewma) average hash metric per cpu.
  trunc: how much truncated packets are ignored
    - for example if packets don't have valid IP header.
    - it's also accounted in drop packets counter, but not in drop bytes.
  frag: how much fragmented packets have seen.
    - kernel defragments INPUT/OUTPUT chains for us if nf_defrag_ipv[46]
      module is loaded.
    - these packets are not ignored but not reassembled either, so:
    - if there is no enough data in fragment (ex. tcp ports) it is considered
      to be zero.
  alloc: how much cache memory allocations are failed.
    - packets ignored and accounted in traffic drop stat.
    - probably increase system memory if this ever happen.
  maxflows: how much packets ignored on maxflows (maximum active flows reached).
    - packets ignored and accounted in traffic drop stat.
    - you can control maxflows limit by sysctl net.netflow.maxflows variable.

  traffic: <pkt, bytes>: how much traffic is accounted.
  pkt, bytes: sum of packets/megabytes accounted by module.
    - flows that failed to be exported (on socket error) is accounted here too.

  drop: <pkt, bytes>: how much of traffic is not accounted.
  pkt, bytes: sum of packets/kilobytes that are dropped by metering process.
    - reasons these drops are accounted here:
      truncated/fragmented packets,
      packet is for new flow but failed to allocate memory for it,
      packet is for new flow but maxflows is already reached.
    Traffic lost due to socket errors is not accounted here. Look below
      about export and socket errors.

> Export: Rate 0 bytes/s; Total 2 pkts, 0 MB, 18 flows; Errors 0 pkts; Traffic lost 0 pkts, 0 Kbytes, 0 flows.

  Rate X bytes/s: traffic rate generated by exporter itself.
  Total X pkts, X MB: total amount of traffic generated by exporter.
  X flows: how much data flows are exported.
  Errors X pkts: how much packets not sent due to socket errors.
  Traffic lost 0 pkts, 0 Kbytes, 0 flows: how much metered traffic is lost
    due to socket errors.
  Note that `cberr' errors are not accounted here due to their asynchronous
    nature. Read below about `cberr' errors.

> sock0: 10.0.0.2:2055 unconnected (1 attempts).

  If socket is unconnected (for example if module loaded before interfaces is
  up) it shows now much connection attempts was failed. It will try to connect
  until success.

> sock0: 10.0.0.2:2055, sndbuf 106496, filled 0, peak 106848; err: sndbuf reached 928, connect 0, cberr 0, other 0

  sockX: per destination stats for:
  X.X.X.X:Y: destination ip address and port.
    - controlled by sysctl net.netflow.destination variable.
  sndbuf X: how much data socket can hold in buffers.
    - controlled by sysctl net.netflow.sndbuf variable.
    - if you have packet drops due to sndbuf reached (error -11) increase this
      value.
  filled X: how much data in socket buffers right now.
  peak X: peak value of how much data in socket buffers was.
    - you will be interested to keep it below sndbuf value.
  err: how much packets are dropped due to errors.
    - all flows from them will be accounted in drop stat.
  sndbuf reached X: how much packets dropped due to sndbuf being too small
      (error -11).
  connect X: how much connection attempts was failed.
  cberr X: how much connection refused ICMP errors we got from export target.
    - probably you are not launched collector software on destination,
    - or specified wrong destination address.
    - flows lost in this fashion is not possible to account in drop stat.
    - these are ICMP errors, and would look like this in tcpdump:
      05:04:09.281247 IP alice.19440 > bob.2055: UDP, length 120
      05:04:09.281405 IP bob > alice: ICMP bob udp port 2055 unreachable, length 156
  other X: dropped due to other possible errors.

> aggr0: ...
  aggrX: aggregation rulesets.
    - controlled by sysctl net.netflow.aggregation variable.


==========================
= NetFlow considerations =
==========================

  List of all IPFIX Elements http://www.iana.org/assignments/ipfix/ipfix.xhtml

  Flow Keys are Elements that distinguish flows. Quoting RFC: "If a Flow
  Record for a specific Flow Key value already exists, the Flow Record is
  updated; otherwise, a new Flow Record is created."

  In this implementation following Elements are treated as Flow Keys:

     IPv4 source address:        sourceIPv4Address(8),
     IPv6 source address:        sourceIPv6Address(27),
     IPv4 destination address:   destinationIPv4Address(12),
     IPv6 destination address:   destinationIPv6Address(28),
     TCP/UDP source port:        sourceTransportPort(7),
     TCP/UDP destination port:   destinationTransportPort(11),
     input interface:            ingressInterface(10),
     IP protocol:                protocolIdentifier(4),
     IP TOS:                     ipClassOfService(5),
     and address family (IP or IPv6).

  Additional Flow Keys if VLAN exporting is enabled:

     First (outer) dot1q VLAN tag: dot1qVlanId(243) and
                                 dot1qPriority(244) for IPFIX,
                                 or vlanId(243) for NetFlow v9.
     Second (customer) dot1q VLAN tag: dot1qCustomerVlanId(245)
                                 and dot1qCustomerPriority(246).

  Additional Flow Keys if MAC address exporting is enabled:

     Destination MAC address:    destinationMacAddress(80),
     Source MAC address:         sourceMacAddress(56).

  Additional Flow Keys if MPLS-aware NetFlow is enabled:

     Captured MPLS stack is fully treated as flow key (including TTL values),
     which is Elements from mplsTopLabelStackSection(70) to
     mplsLabelStackSection10(79), and, if present, mplsTopLabelTTL(200).


  Other Elements are not Flow Keys. Note that outer interface, which is
  egressInterface(14), is not regarded as Flow Key. Quoting RFC 7012: "For
  Information Elements ... for which the value may change from packet to packet
  within a single Flow, the exported value of an Information Element is by
  default determined by the first packet observed for the corresponding Flow".

  Note that NetFlow and IPFIX modes of operation may have slightly different
  Elements being used and different statistics sent via Options Templates.


=========
= VOILA =
=========
ipt-netflow-2.2/README.promisc0000644000000000000000000000675513213006644014672 0ustar  rootrootHello,

If you wish to account with ipt-netflow module traffic mirrored on switch you may follow
 one of these examples:


   Solution 1: Promisc-hack module option. [2014]
   Solution 2: General kernel patch. [2008]
   Solution 3: Alternative w/o kernel patch, using bridges. [2010]


    **************
    * Solution 1 *
    **************

   No kernel patching is need anymore! (As in easy.)

   Compile module with `./configure --enable-promisc' option. This will enable
  `promisc=' module parameter and sysctl parameter `net.netflow.promisc'.
  Set any of these to `1' to enable promisc hack, you will see dmesg message
  that it's enabled, set to `0' to disable (default).

   This option turned on will pass promisc traffic into `PREROUTING' chain
  of `raw' table (same as with promisc patches). Briefly it's like this:

   # cd ipt-netflow/
   # ./configure --enable-promisc
   # make all install
   # iptables -A PREROUTING -t raw -i eth2 -j NETFLOW
   # sysctl net.netflow.promisc=1
   # ifconfig eth2 promisc
   # grep Promisc /proc/net/stat/ipt_netflow

   Now you should be able to see promisc observed packets count increasing.

   Note, that enabling module's parameter promisc=1 will not enable promiscuous
  mode on network cards, these are completely different things. This option will
  let iptables to see promisc traffic. That traffic will not be routed anywhere
  and discarded just after passing PREROUTING chain.

   Do not enable this option if you have kernel already patched with promisc
  patch, as this may cause double accounting. Just keep it disabled or remove
  the patch. Promisc patching is completely custom and non-standard, so if you
  did not apply it - you are most probably safe to use promisc option. But, if
  in doubt - measure if module seeing traffic in raw/PREROUTING, if it isn't - you
  don't have the patch applied and certainly safe to use the option.



    **************
    * Solution 2 *
    **************

1. Patch your kernel with `raw_promisc.patch' to enable raw table to see promisc traffic.

 # cd /usr/src/linux
 # patch -p1 < ~/ipt_netflow/raw_promisc.patch

 Then recompile and reinstall patched kernel.

2. For example you mirroring traffic on your Cisco switch to 47th vlan:

 # interface FastEthernet0/32
 # description this port with vlan 47
 # switchport trunk encapsulation dot1q
 # switchport mode trunk
 # !
 # interface FastEthernet0/33
 # port monitor FastEthernet0/32
 # !

3. Enable promisc mode on interface to actually see the packets:

 # /sbin/ifconfig eth1 promisc

4. Configure vlan on your linux box:

 # /sbin/vconfig add eth1 47
 # /sbin/ifconfig eth1.47 up

5. Compile module:

 # make clean all install

6. Load ipt_netflow module:

 # /sbin/modprobe ipt_NETFLOW hashsize=160000 destination=127.0.0.1:9800

7. Direct all packets from 47th vlan to ipt_netflow module:

 # /sbin/iptables -A PREROUTING -t raw -i eth1.47 -j NETFLOW

Voila.

ps. For Debian Squeeze instructions look at raw_promisc_debian_squeeze6.patch



    **************
    * Solution 3 *
    **************

By Anonymous.

> raw promisc hack is not needed
> there is a more elegant way to capture port mirrored traffic:
>
> 1. create a bridge of eth0 and dummy0
> 2. put eth0 to promisc
> 3. add a "-i br0 -j NETFLOW" rule to FORWARD (possibly also -j DROP after that)
>
> ...for some reason it works when ipt_netflow is attached to a bridge, but
> requires the promisc hack when attached to a real promisc interface.

Sometimes you may need to run:

 # brctl setageing br0 0

for this scheme to work.

ipt-netflow-2.2/murmur3.h0000644000000000000000000000171213213006644014106 0ustar  rootroot/* MurmurHash3, based on https://code.google.com/p/smhasher of Austin Appleby. */

static __always_inline uint32_t rotl32(const uint32_t x, const int8_t r)
{
        return (x << r) | (x >> (32 - r));
}

static __always_inline uint32_t fmix32(register uint32_t h)
{
        h ^= h >> 16;
        h *= 0x85ebca6b;
        h ^= h >> 13;
        h *= 0xc2b2ae35;
        h ^= h >> 16;
        return h;
}

static inline uint32_t murmur3(const void *key, const uint32_t len, const uint32_t seed)
{
        const uint32_t c1 = 0xcc9e2d51;
        const uint32_t c2 = 0x1b873593;
        const uint32_t *blocks;
        const uint8_t *tail;
        register uint32_t h1 = seed;
        uint32_t k1 = 0;
        uint32_t i;

        blocks = (const uint32_t *)key;
        for (i = len / 4; i; --i) {
                h1 ^= rotl32(*blocks++ * c1, 15) * c2;
                h1 = rotl32(h1, 13) * 5 + 0xe6546b64;
        }
        tail = (const uint8_t*)blocks;
        switch (len & 3) {
                case 3: k1 ^= tail[2] << 16;
                case 2: k1 ^= tail[1] << 8;
                case 1: k1 ^= tail[0];
                        h1 ^= rotl32(k1 * c1, 15) * c2;
        }
        return fmix32(h1^ len);
}

ipt-netflow-2.2/CREDITS0000644000000000000000000000572013213006644013346 0ustar  rootrootLicense is GPL v2, is the same as of Linux kernel:

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.


Sign-off rule is that of the Linux kernel:

  Developer's Certificate of Origin 1.1

  By making a contribution to this project, I certify that:

  (a) The contribution was created in whole or in part by me and I
      have the right to submit it under the open source license
      indicated in the file; or

  (b) The contribution is based upon previous work that, to the best
      of my knowledge, is covered under an appropriate open source
      license and I have the right under that license to submit that
      work with modifications, whether created in whole or in part
      by me, under the same open source license (unless I am
      permitted to submit under a different license), as indicated
      in the file; or

  (c) The contribution was provided directly to me by some other
      person who certified (a), (b) or (c) and I have not modified
      it.

  (d) I understand and agree that this project and the contribution
      are public and that a record of the contribution (including all
      personal information I submit with it, including my sign-off) is
      maintained indefinitely and may be redistributed consistent with
      this project or the open source license(s) involved.


Principal author and project maintainer:

   ABC <abc@telekom.ru> [2008-2015]


Compatibility layer is using code from Linux Kernel and should be
attributed to respective Linux developers.

MurmurHash3 is based on smhasher (2012) of Austin Appleby.


Patch authors and submitters:

   Ilya Evseev [2010]
   spizer [2010]
   Eric W. Biederman [2010]
   Giedrius Liubavičius [2010]
   Igor Alov [2010]
   Alexey Osipov [2011]
   Pavel Boldin [2012]
   Alexander Demenshin [2013]
   uropek [2013]
   shaman [2013]
   Jeremy Drake [2013]
   Matthew Martin [2016] DKMS fixes
   alex-eri [2016, 2017] OpenWRT compatibility


Reference or prototype code authors:

   Valentin V. Yankin [2011] SNMP-index


Project supporters:

   Summa Telecom [2014]
   Starlink [2014]
   Anonymous


Extensive testing and other help:

   Alexander (shulik) [2013]
   Igor Diakonov @ Summa Telecom [2014]
   Yuriy Dolgoruk @ Summa Telecom [2014]
   Andrew Savin @ Starlink [2014]
   Alexander Zakharov @ WAW Technologies [2015]
   Ivanov Eduard [2015]
   Maciej Zdeb [2015]


(Send your names, emails, or nicks to add to the list.)


Generated by GNU Enscript 1.6.6.