am 6a92f66b: am 74aa4595: am 18921713: Remove unused #include.

* commit '6a92f66b2f33884ef78139c14b54862ba09299f1':
diff --git a/Android.mk b/Android.mk
index 988d925..5f2fe63 100644
--- a/Android.mk
+++ b/Android.mk
@@ -1,7 +1,7 @@
 LOCAL_PATH:= $(call my-dir)
 include $(CLEAR_VARS)
 
-LOCAL_SRC_FILES:=clatd.c dump.c checksum.c translate.c icmp.c ipv4.c ipv6.c config.c dns64.c logging.c getaddr.c netlink_callbacks.c netlink_msg.c setif.c mtu.c
+LOCAL_SRC_FILES:=clatd.c dump.c checksum.c translate.c icmp.c ipv4.c ipv6.c config.c dns64.c logging.c getaddr.c netlink_callbacks.c netlink_msg.c setif.c mtu.c tun.c ring.c
 
 LOCAL_CFLAGS := -Wall -Werror -Wunused-parameter
 LOCAL_C_INCLUDES := external/libnl/include bionic/libc/dns/include
@@ -30,8 +30,18 @@
 
 LOCAL_MODULE := clatd_test
 LOCAL_CFLAGS := -Wall -Werror -Wunused-parameter
-LOCAL_SRC_FILES := clatd_test.cpp checksum.c translate.c icmp.c ipv4.c ipv6.c logging.c config.c
+LOCAL_SRC_FILES := clatd_test.cpp checksum.c translate.c icmp.c ipv4.c ipv6.c logging.c config.c tun.c
 LOCAL_MODULE_TAGS := eng tests
 LOCAL_SHARED_LIBRARIES := liblog
 
 include $(BUILD_NATIVE_TEST)
+
+# Microbenchmark.
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := clatd_microbenchmark
+LOCAL_CFLAGS := -Wall -Werror -Wunused-parameter
+LOCAL_SRC_FILES := clatd_microbenchmark.c checksum.c tun.c
+LOCAL_MODULE_TAGS := eng tests
+
+include $(BUILD_NATIVE_TEST)
diff --git a/checksum.c b/checksum.c
index 3dd1e00..23a7c02 100644
--- a/checksum.c
+++ b/checksum.c
@@ -22,7 +22,6 @@
 #include <netinet/tcp.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
-#include <linux/icmp.h>
 
 #include "checksum.h"
 
diff --git a/clatd.c b/clatd.c
index 3f0af0b..faeb679 100644
--- a/clatd.c
+++ b/clatd.c
@@ -50,6 +50,8 @@
 #include "mtu.h"
 #include "getaddr.h"
 #include "dump.h"
+#include "tun.h"
+#include "ring.h"
 
 #define DEVICEPREFIX "v4-"
 
@@ -65,44 +67,6 @@
   running = 0;
 }
 
-/* function: tun_open
- * tries to open the tunnel device
- */
-int tun_open() {
-  int fd;
-
-  fd = open("/dev/tun", O_RDWR);
-  if(fd < 0) {
-    fd = open("/dev/net/tun", O_RDWR);
-  }
-
-  return fd;
-}
-
-/* function: tun_alloc
- * creates a tun interface and names it
- * dev - the name for the new tun device
- */
-int tun_alloc(char *dev, int fd) {
-  struct ifreq ifr;
-  int err;
-
-  memset(&ifr, 0, sizeof(ifr));
-
-  ifr.ifr_flags = IFF_TUN;
-  if( *dev ) {
-    strncpy(ifr.ifr_name, dev, IFNAMSIZ);
-    ifr.ifr_name[IFNAMSIZ-1] = '\0';
-  }
-
-  if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ){
-    close(fd);
-    return err;
-  }
-  strcpy(dev, ifr.ifr_name);
-  return 0;
-}
-
 /* function: configure_packet_socket
  * Binds the packet socket and attaches the receive filter to it.
  * sock - the socket to configure
@@ -234,7 +198,7 @@
  * mark - the socket mark to use for the sending raw socket
  */
 void open_sockets(struct tun_data *tunnel, uint32_t mark) {
-  int rawsock = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
+  int rawsock = socket(AF_INET6, SOCK_RAW | SOCK_NONBLOCK, IPPROTO_RAW);
   if (rawsock < 0) {
     logmsg(ANDROID_LOG_FATAL, "raw socket failed: %s", strerror(errno));
     exit(1);
@@ -250,13 +214,10 @@
 
   tunnel->write_fd6 = rawsock;
 
-  int packetsock = socket(AF_PACKET, SOCK_DGRAM, htons(ETH_P_IPV6));
-  if (packetsock < 0) {
-    logmsg(ANDROID_LOG_FATAL, "packet socket failed: %s", strerror(errno));
+  tunnel->read_fd6 = ring_create(tunnel);
+  if (tunnel->read_fd6 < 0) {
     exit(1);
   }
-
-  tunnel->read_fd6 = packetsock;
 }
 
 /* function: update_clat_ipv6_address
@@ -353,23 +314,31 @@
     exit(1);
   }
 
+  error = set_nonblocking(tunnel->fd4);
+  if (error < 0) {
+    logmsg(ANDROID_LOG_FATAL, "set_nonblocking failed: %s", strerror(errno));
+    exit(1);
+  }
+
   configure_tun_ip(tunnel);
 }
 
 /* function: read_packet
- * reads a packet from the tunnel fd and passes it down the stack
- * active_fd - tun file descriptor marked ready for reading
- * tunnel    - tun device data
+ * reads a packet from the tunnel fd and translates it
+ * read_fd  - file descriptor to read original packet from
+ * write_fd - file descriptor to write translated packet to
+ * to_ipv6  - whether the packet is to be translated to ipv6 or ipv4
  */
-void read_packet(int active_fd, const struct tun_data *tunnel) {
+void read_packet(int read_fd, int write_fd, int to_ipv6) {
   ssize_t readlen;
   uint8_t buf[PACKETLEN], *packet;
-  int fd;
 
-  readlen = read(active_fd, buf, PACKETLEN);
+  readlen = read(read_fd, buf, PACKETLEN);
 
   if(readlen < 0) {
-    logmsg(ANDROID_LOG_WARN,"read_packet/read error: %s", strerror(errno));
+    if (errno != EAGAIN) {
+      logmsg(ANDROID_LOG_WARN,"read_packet/read error: %s", strerror(errno));
+    }
     return;
   } else if(readlen == 0) {
     logmsg(ANDROID_LOG_WARN,"read_packet/tun interface removed");
@@ -377,41 +346,32 @@
     return;
   }
 
-  if (active_fd == tunnel->fd4) {
-    ssize_t header_size = sizeof(struct tun_pi);
-
-    if (readlen < header_size) {
-      logmsg(ANDROID_LOG_WARN,"read_packet/short read: got %ld bytes", readlen);
-      return;
-    }
-
-    struct tun_pi *tun_header = (struct tun_pi *) buf;
-    uint16_t proto = ntohs(tun_header->proto);
-    if (proto != ETH_P_IP) {
-      logmsg(ANDROID_LOG_WARN, "%s: unknown packet type = 0x%x", __func__, proto);
-      return;
-    }
-
-    if(tun_header->flags != 0) {
-      logmsg(ANDROID_LOG_WARN, "%s: unexpected flags = %d", __func__, tun_header->flags);
-    }
-
-    fd = tunnel->write_fd6;
-    packet = buf + header_size;
-    readlen -= header_size;
-  } else {
-    fd = tunnel->fd4;
-    packet = buf;
+  struct tun_pi *tun_header = (struct tun_pi *) buf;
+  if (readlen < (ssize_t) sizeof(*tun_header)) {
+    logmsg(ANDROID_LOG_WARN,"read_packet/short read: got %ld bytes", readlen);
+    return;
   }
 
-  translate_packet(fd, (fd == tunnel->write_fd6), packet, readlen);
+  uint16_t proto = ntohs(tun_header->proto);
+  if (proto != ETH_P_IP) {
+    logmsg(ANDROID_LOG_WARN, "%s: unknown packet type = 0x%x", __func__, proto);
+    return;
+  }
+
+  if(tun_header->flags != 0) {
+    logmsg(ANDROID_LOG_WARN, "%s: unexpected flags = %d", __func__, tun_header->flags);
+  }
+
+  packet = (uint8_t *) (tun_header + 1);
+  readlen -= sizeof(*tun_header);
+  translate_packet(write_fd, to_ipv6, packet, readlen);
 }
 
 /* function: event_loop
  * reads packets from the tun network interface and passes them down the stack
  * tunnel - tun device data
  */
-void event_loop(const struct tun_data *tunnel) {
+void event_loop(struct tun_data *tunnel) {
   time_t last_interface_poll;
   struct pollfd wait_fd[] = {
     { tunnel->read_fd6, POLLIN, 0 },
@@ -427,16 +387,16 @@
         logmsg(ANDROID_LOG_WARN,"event_loop/poll returned an error: %s",strerror(errno));
       }
     } else {
-      size_t i;
-      for(i = 0; i < ARRAY_SIZE(wait_fd); i++) {
-        // Call read_packet if the socket has data to be read, but also if an
-        // error is waiting. If we don't call read() after getting POLLERR, a
-        // subsequent poll() will return immediately with POLLERR again,
-        // causing this code to spin in a loop. Calling read() will clear the
-        // socket error flag instead.
-        if(wait_fd[i].revents != 0) {
-          read_packet(wait_fd[i].fd,tunnel);
-        }
+      // Call read_packet if the socket has data to be read, but also if an
+      // error is waiting. If we don't call read() after getting POLLERR, a
+      // subsequent poll() will return immediately with POLLERR again,
+      // causing this code to spin in a loop. Calling read() will clear the
+      // socket error flag instead.
+      if (wait_fd[0].revents) {
+        ring_read(&tunnel->ring, tunnel->fd4, 0 /* to_ipv6 */);
+      }
+      if (wait_fd[1].revents) {
+        read_packet(tunnel->fd4, tunnel->write_fd6, 1 /* to_ipv6 */);
       }
     }
 
diff --git a/clatd.h b/clatd.h
index e7cef43..f421f46 100644
--- a/clatd.h
+++ b/clatd.h
@@ -18,8 +18,7 @@
 #ifndef __CLATD_H__
 #define __CLATD_H__
 
-#include <linux/if.h>
-#include <linux/if_tun.h>
+#include <sys/uio.h>
 
 #define MAXMTU 1500
 #define PACKETLEN (MAXMTU+sizeof(struct tun_pi))
@@ -33,9 +32,15 @@
 // how frequently (in seconds) to poll for an address change while there is no traffic
 #define NO_TRAFFIC_INTERFACE_POLL_FREQUENCY 90
 
-struct tun_data {
-  char device4[IFNAMSIZ];
-  int read_fd6, write_fd6, fd4;
-};
+// A clat_packet is an array of iovec structures representing a packet that we are translating.
+// The CLAT_POS_XXX constants represent the array indices within the clat_packet that contain
+// specific parts of the packet. The packet_* functions operate on all the packet segments past a
+// given position.
+typedef enum {
+    CLAT_POS_TUNHDR, CLAT_POS_IPHDR, CLAT_POS_FRAGHDR, CLAT_POS_TRANSPORTHDR,
+    CLAT_POS_ICMPERR_IPHDR, CLAT_POS_ICMPERR_FRAGHDR, CLAT_POS_ICMPERR_TRANSPORTHDR,
+    CLAT_POS_PAYLOAD, CLAT_POS_MAX
+} clat_packet_index;
+typedef struct iovec clat_packet[CLAT_POS_MAX];
 
 #endif /* __CLATD_H__ */
diff --git a/clatd_microbenchmark.c b/clatd_microbenchmark.c
new file mode 100644
index 0000000..fed3100
--- /dev/null
+++ b/clatd_microbenchmark.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * clatd_microbenchmark.c - micro-benchmark for clatd tun send path
+ *
+ * Run with:
+ *
+ * adb push {$ANDROID_PRODUCT_OUT,}/data/nativetest/clatd_microbenchmark/clatd_microbenchmark
+ * adb shell /data/nativetest/clatd_microbenchmark/clatd_microbenchmark
+ *
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+
+#include "checksum.h"
+#include "tun.h"
+
+#define DEVICENAME "clat4"
+
+#define PORT 51339
+#define PAYLOADSIZE (1280 - sizeof(struct iphdr) - sizeof(struct udphdr))
+#define NUMPACKETS 1000000
+#define SEC_TO_NANOSEC (1000 * 1000 * 1000)
+
+void init_sockaddr_in(struct sockaddr_in *sin, const char *addr) {
+    sin->sin_family = AF_INET;
+    sin->sin_port = 0;
+    sin->sin_addr.s_addr = inet_addr(addr);
+}
+
+void die(const char *str) {
+    perror(str);
+    exit(1);
+}
+
+int setup_tun() {
+    int fd = tun_open();
+    if (fd == -1) die("tun_open");
+
+    char dev[IFNAMSIZ] = DEVICENAME;
+    int ret = tun_alloc(dev, fd);
+    if (ret == -1) die("tun_alloc");
+    struct ifreq ifr = {
+        .ifr_name = DEVICENAME,
+    };
+
+    int s = socket(AF_INET, SOCK_DGRAM, 0);
+    init_sockaddr_in((struct sockaddr_in *) &ifr.ifr_addr, "192.0.0.4");
+    if (ioctl(s, SIOCSIFADDR, &ifr) < 0) die("SIOCSIFADDR");
+    init_sockaddr_in((struct sockaddr_in *) &ifr.ifr_addr, "255.255.255.248");
+    if (ioctl(s, SIOCSIFNETMASK, &ifr) < 0) die("SIOCSIFNETMASK");
+    if (ioctl(s, SIOCGIFFLAGS, &ifr) < 0) die("SIOCGIFFLAGS");
+    ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
+    if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0) die("SIOCSIFFLAGS");
+    return fd;
+}
+
+int send_packet(int fd, uint8_t payload[], int len, uint32_t payload_checksum) {
+    struct tun_pi tun = { 0, htons(ETH_P_IP) };
+    struct udphdr udp = {
+        .source = htons(1234),
+        .dest = htons(PORT),
+        .len = htons(len + sizeof(udp)),
+        .check = 0,
+    };
+    struct iphdr ip = {
+        .version = 4,
+        .ihl = 5,
+        .tot_len = htons(len + sizeof(ip) + sizeof(udp)),
+        .frag_off = htons(IP_DF),
+        .ttl = 55,
+        .protocol = IPPROTO_UDP,
+        .saddr = htonl(0xc0000006),  // 192.0.0.6
+        .daddr = htonl(0xc0000004),  // 192.0.0.4
+    };
+    clat_packet out = {
+        { &tun, sizeof(tun) },  // tun header
+        { &ip, sizeof(ip) },    // IP header
+        { NULL, 0 },            // Fragment header
+        { &udp, sizeof(udp) },  // Transport header
+        { NULL, 0 },            // ICMP error IP header
+        { NULL, 0 },            // ICMP error fragment header
+        { NULL, 0 },            // ICMP error transport header
+        { payload, len },       // Payload
+    };
+
+    ip.check = ip_checksum(&ip, sizeof(ip));
+
+    uint32_t sum;
+    sum = ipv4_pseudo_header_checksum(&ip, ntohs(udp.len));
+    sum = ip_checksum_add(sum, &udp, sizeof(udp));
+    sum += payload_checksum;
+    udp.check = ip_checksum_finish(sum);
+
+    return send_tun(fd, out, sizeof(out) / sizeof(out[0]));
+}
+
+double timedelta(const struct timespec tv1, const struct timespec tv2) {
+    struct timespec end = tv2;
+    if (end.tv_nsec < tv1.tv_nsec) {
+        end.tv_sec -= 1;
+        end.tv_nsec += SEC_TO_NANOSEC;
+    }
+    double seconds = (end.tv_sec - tv1.tv_sec);
+    seconds += (((double) (end.tv_nsec - tv1.tv_nsec)) / SEC_TO_NANOSEC);
+    return seconds;
+}
+
+void benchmark(const char *name, int fd, int s, int num, int do_read,
+               uint8_t payload[], int len, uint32_t payload_sum) {
+    int i;
+    char buf[4096];
+    struct timespec tv1, tv2;
+    int write_err = 0, read_err = 0;
+    clock_gettime(CLOCK_MONOTONIC, &tv1);
+    for (i = 0; i < num; i++) {
+        if (send_packet(fd, payload, len, payload_sum) == -1) write_err++;
+        if (do_read && recvfrom(s, buf, sizeof(buf), 0, NULL, NULL) == -1) {
+            read_err++;
+            if (errno == ETIMEDOUT) {
+                printf("Timed out after %d packets!\n", i);
+                break;
+            }
+        }
+    }
+    clock_gettime(CLOCK_MONOTONIC, &tv2);
+    double seconds = timedelta(tv1, tv2);
+    int pps = (int) (i / seconds);
+    double mbps = (i * PAYLOADSIZE / 1000000 * 8 / seconds);
+    printf("%s: %d packets in %.2fs (%d pps, %.2f Mbps), ", name, i, seconds, pps, mbps);
+    printf("read err %d (%.2f%%), write err %d (%.2f%%)\n",
+           read_err, (float) read_err / i * 100,
+           write_err, (float) write_err / i * 100);
+}
+
+int open_socket() {
+    int sock = socket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, IPPROTO_UDP);
+
+    int on = 1;
+    if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) die("SO_REUSEADDR");
+
+    struct timeval tv = { 1, 0 };
+    if (setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) == -1) die("SO_RCVTIMEO");
+
+    struct sockaddr_in addr = {
+        .sin_family = AF_INET,
+        .sin_port = ntohs(PORT),
+        .sin_addr = { INADDR_ANY }
+    };
+    if (bind(sock, (struct sockaddr *) &addr, sizeof(addr)) == -1) die ("bind");
+
+   return sock;
+}
+
+int main() {
+    int fd = setup_tun();
+    int sock = open_socket();
+
+    int i;
+    uint8_t payload[PAYLOADSIZE];
+    for (i = 0; i < (int) sizeof(payload); i++) {
+        payload[i] = (uint8_t) i;
+    }
+    uint32_t payload_sum = ip_checksum_add(0, payload, sizeof(payload));
+
+    // Check things are working.
+    char buf[4096];
+    if (send_packet(fd, payload, sizeof(payload), payload_sum) == -1) die("send_packet");
+    if (recvfrom(sock, buf, sizeof(buf), 0, NULL, NULL) == -1) die("recvfrom");
+
+    benchmark("Blocking", fd, sock, NUMPACKETS, 1, payload, sizeof(payload), payload_sum);
+    close(fd);
+
+    fd = setup_tun();
+    set_nonblocking(fd);
+    benchmark("No read", fd, sock, NUMPACKETS, 0, payload, sizeof(payload), payload_sum);
+    close(fd);
+
+    fd = setup_tun();
+    set_nonblocking(fd);
+    benchmark("Nonblocking", fd, sock, NUMPACKETS, 1, payload, sizeof(payload), payload_sum);
+    close(fd);
+
+    return 0;
+}
diff --git a/clatd_test.cpp b/clatd_test.cpp
index fe52c21..7e218f0 100644
--- a/clatd_test.cpp
+++ b/clatd_test.cpp
@@ -432,7 +432,6 @@
   if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, fds)) {
     abort();
   }
-  struct tun_pi tun_header = { 0, 0 };
 
   char foo[512];
   snprintf(foo, sizeof(foo), "%s: Invalid original packet", msg);
@@ -443,13 +442,11 @@
   int version = ip_version(original);
   switch (version) {
     case 4:
-      tun_header.proto = htons(ETH_P_IP);
       expected_proto = htons(ETH_P_IPV6);
       read_fd = fds[1];
       write_fd = fds[0];
       break;
     case 6:
-      tun_header.proto = htons(ETH_P_IPV6);
       expected_proto = htons(ETH_P_IP);
       read_fd = fds[0];
       write_fd = fds[1];
@@ -856,7 +853,7 @@
     { 0x1215, 0x5560, 0x15560 + 20, 0x1200 },
     { 0xd0c7, 0x3ad0, 0x2644b, 0xa74a },
   };
-  unsigned i, failed = 0;
+  unsigned i = 0;
 
   for (i = 0; i < ARRAYSIZE(DATA); i++) {
     struct checksum_data *data = DATA + i;
@@ -889,7 +886,6 @@
 }
 
 TEST_F(ClatdTest, Fragmentation) {
-  int len, i;
   check_fragment_translation(kIPv4Fragments, kIPv4FragLengths,
                              kIPv6Fragments, kIPv6FragLengths,
                              ARRAYSIZE(kIPv4Fragments), "IPv4->IPv6 fragment translation");
diff --git a/config.h b/config.h
index b6d9ae1..e31a81d 100644
--- a/config.h
+++ b/config.h
@@ -19,6 +19,7 @@
 #define __CONFIG_H__
 
 #include <netinet/in.h>
+#include <linux/if.h>
 
 #define DEFAULT_IPV4_LOCAL_SUBNET "192.0.0.4"
 #define DEFAULT_IPV4_LOCAL_PREFIXLEN "29"
diff --git a/ring.c b/ring.c
new file mode 100644
index 0000000..5e99fd5
--- /dev/null
+++ b/ring.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * ring.c - packet ring buffer functions
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <linux/if.h>
+#include <linux/if_packet.h>
+
+#include "logging.h"
+#include "ring.h"
+#include "translate.h"
+#include "tun.h"
+
+int ring_create(struct tun_data *tunnel) {
+  int packetsock = socket(AF_PACKET, SOCK_DGRAM, htons(ETH_P_IPV6));
+  if (packetsock < 0) {
+    logmsg(ANDROID_LOG_FATAL, "packet socket failed: %s", strerror(errno));
+    return -1;
+  }
+
+  int ver = TPACKET_V2;
+  if (setsockopt(packetsock, SOL_PACKET, PACKET_VERSION, (void *) &ver, sizeof(ver))) {
+    logmsg(ANDROID_LOG_FATAL, "setsockopt(PACKET_VERSION, %d) failed: %s", ver, strerror(errno));
+    return -1;
+  }
+
+  int on = 1;
+  if (setsockopt(packetsock, SOL_PACKET, PACKET_LOSS, (void *) &on, sizeof(on))) {
+    logmsg(ANDROID_LOG_WARN, "PACKET_LOSS failed: %s", strerror(errno));
+  }
+
+  struct packet_ring *ring = &tunnel->ring;
+  ring->numblocks = TP_NUM_BLOCKS;
+
+  int total_frames = TP_FRAMES * ring->numblocks;
+
+  struct tpacket_req req = {
+      .tp_frame_size = TP_FRAME_SIZE,  // Frame size.
+      .tp_block_size = TP_BLOCK_SIZE,  // Frames per block.
+      .tp_block_nr = ring->numblocks,  // Number of blocks.
+      .tp_frame_nr = total_frames,     // Total frames.
+  };
+
+  if (setsockopt(packetsock, SOL_PACKET, PACKET_RX_RING, &req, sizeof(req)) < 0) {
+    logmsg(ANDROID_LOG_FATAL, "PACKET_RX_RING failed: %s", strerror(errno));
+    return -1;
+  }
+
+  size_t buflen = TP_BLOCK_SIZE * ring->numblocks;
+  ring->base = mmap(NULL, buflen, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_LOCKED|MAP_POPULATE,
+                    packetsock, 0);
+  if (ring->base == MAP_FAILED) {
+    logmsg(ANDROID_LOG_FATAL, "mmap %lu failed: %s", buflen, strerror(errno));
+    return -1;
+  }
+
+  ring->block = 0;
+  ring->slot = 0;
+  ring->numslots = TP_BLOCK_SIZE / TP_FRAME_SIZE;
+  ring->next = (struct tpacket2_hdr *) ring->base;
+
+  logmsg(ANDROID_LOG_INFO, "Using ring buffer with %d frames (%d bytes) at %p",
+         total_frames, buflen, ring->base);
+
+  return packetsock;
+}
+
+/* function: ring_advance
+ * advances to the next position in the packet ring
+ * ring - packet ring buffer
+ */
+static struct tpacket2_hdr* ring_advance(struct packet_ring *ring) {
+  uint8_t *next = (uint8_t *) ring->next;
+
+  ring->slot++;
+  next += TP_FRAME_SIZE;
+
+  if (ring->slot == ring->numslots) {
+    ring->slot = 0;
+    ring->block++;
+
+    if (ring->block < ring->numblocks) {
+      next += TP_FRAME_GAP;
+    } else {
+      ring->block = 0;
+      next = (uint8_t *) ring->base;
+    }
+  }
+
+  ring->next = (struct tpacket2_hdr *) next;
+  return ring->next;
+}
+
+/* function: ring_read
+ * reads a packet from the ring buffer and translates it
+ * read_fd  - file descriptor to read original packet from
+ * write_fd - file descriptor to write translated packet to
+ * to_ipv6  - whether the packet is to be translated to ipv6 or ipv4
+ */
+void ring_read(struct packet_ring *ring, int write_fd, int to_ipv6) {
+  struct tpacket2_hdr *tp = ring->next;
+  if (tp->tp_status & TP_STATUS_USER) {
+    uint8_t *packet = ((uint8_t *) tp) + tp->tp_net;
+    translate_packet(write_fd, to_ipv6, packet, tp->tp_len);
+    tp->tp_status = TP_STATUS_KERNEL;
+    tp = ring_advance(ring);
+  }
+}
diff --git a/ring.h b/ring.h
new file mode 100644
index 0000000..b9b8c11
--- /dev/null
+++ b/ring.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * ring.c - packet ring buffer functions
+ */
+#ifndef __RING_H__
+#define __RING_H__
+
+#include <linux/if.h>
+#include <linux/if_packet.h>
+
+#include "clatd.h"
+
+struct tun_data;
+
+// Frame size. Must be a multiple of TPACKET_ALIGNMENT (=16)
+// Why the 16? http://lxr.free-electrons.com/source/net/packet/af_packet.c?v=3.4#L1764
+#define TP_FRAME_SIZE (TPACKET_ALIGN(MAXMTU) + TPACKET_ALIGN(TPACKET2_HDRLEN) + 16)
+
+// Block size. Must be a multiple of the page size, and a power of two for efficient memory use.
+#define TP_BLOCK_SIZE 65536
+
+// In order to save memory, our frames are not an exact divider of the block size. Therefore, the
+// mmaped region will have gaps corresponding to the empty space at the end of each block.
+#define TP_FRAMES (TP_BLOCK_SIZE / TP_FRAME_SIZE)
+#define TP_FRAME_GAP (TP_BLOCK_SIZE % TP_FRAME_SIZE)
+
+// TODO: Make this configurable. This requires some refactoring because the packet socket is
+// opened before we drop privileges, but the configuration file is read after. A value of 16
+// results in 656 frames (1048576 bytes).
+#define TP_NUM_BLOCKS 16
+
+struct packet_ring {
+  uint8_t *base;
+  struct tpacket2_hdr *next;
+  int slot, numslots;
+  int block, numblocks;
+};
+
+int ring_create(struct tun_data *tunnel);
+void ring_read(struct packet_ring *ring, int write_fd, int to_ipv6);
+
+#endif
diff --git a/setif.c b/setif.c
index 359ed24..07f5bac 100644
--- a/setif.c
+++ b/setif.c
@@ -159,7 +159,7 @@
  * ifname    - name of interface to add the address to
  */
 int add_anycast_address(int sock, struct in6_addr *addr, const char *ifname) {
-  int ifindex, s, ret;
+  int ifindex;
 
   ifindex = if_nametoindex(ifname);
   if (!ifindex) {
diff --git a/translate.c b/translate.c
index 487468b..ddc9bac 100644
--- a/translate.c
+++ b/translate.c
@@ -16,7 +16,6 @@
  * translate.c - CLAT functions / partial implementation of rfc6145
  */
 #include <string.h>
-#include <sys/uio.h>
 
 #include "icmp.h"
 #include "translate.h"
@@ -25,6 +24,7 @@
 #include "config.h"
 #include "logging.h"
 #include "debug.h"
+#include "tun.h"
 
 /* function: packet_checksum
  * calculates the checksum over all the packet components starting from pos
@@ -465,10 +465,6 @@
   return CLAT_POS_PAYLOAD + 1;
 }
 
-void send_tun(int fd, clat_packet out, int iov_len) {
-  writev(fd, out, iov_len);
-}
-
 // Weak symbol so we can override it in the unit test.
 void send_rawv6(int fd, clat_packet out, int iov_len) __attribute__((weak));
 
diff --git a/translate.h b/translate.h
index 46e178b..aa8b736 100644
--- a/translate.h
+++ b/translate.h
@@ -32,17 +32,6 @@
 
 #define MAX_TCP_HDR (15 * 4)   // Data offset field is 4 bits and counts in 32-bit words.
 
-// A clat_packet is an array of iovec structures representing a packet that we are translating.
-// The CLAT_POS_XXX constants represent the array indices within the clat_packet that contain
-// specific parts of the packet. The packet_* functions operate on all the packet segments past a
-// given position.
-typedef enum {
-    CLAT_POS_TUNHDR, CLAT_POS_IPHDR, CLAT_POS_FRAGHDR, CLAT_POS_TRANSPORTHDR,
-    CLAT_POS_ICMPERR_IPHDR, CLAT_POS_ICMPERR_FRAGHDR, CLAT_POS_ICMPERR_TRANSPORTHDR,
-    CLAT_POS_PAYLOAD, CLAT_POS_MAX
-} clat_packet_index;
-typedef struct iovec clat_packet[CLAT_POS_MAX];
-
 // Calculates the checksum over all the packet components starting from pos.
 uint16_t packet_checksum(uint32_t checksum, clat_packet packet, clat_packet_index pos);
 
diff --git a/tun.c b/tun.c
new file mode 100644
index 0000000..49f0ea7
--- /dev/null
+++ b/tun.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * tun.c - tun device functions
+ */
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <sys/ioctl.h>
+#include <sys/uio.h>
+
+#include "clatd.h"
+
+/* function: tun_open
+ * tries to open the tunnel device
+ */
+int tun_open() {
+  int fd;
+
+  fd = open("/dev/tun", O_RDWR);
+  if(fd < 0) {
+    fd = open("/dev/net/tun", O_RDWR);
+  }
+
+  return fd;
+}
+
+/* function: tun_alloc
+ * creates a tun interface and names it
+ * dev - the name for the new tun device
+ */
+int tun_alloc(char *dev, int fd) {
+  struct ifreq ifr;
+  int err;
+
+  memset(&ifr, 0, sizeof(ifr));
+
+  ifr.ifr_flags = IFF_TUN;
+  if( *dev ) {
+    strncpy(ifr.ifr_name, dev, IFNAMSIZ);
+    ifr.ifr_name[IFNAMSIZ-1] = '\0';
+  }
+
+  if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ){
+    close(fd);
+    return err;
+  }
+  strcpy(dev, ifr.ifr_name);
+  return 0;
+}
+
+/* function: set_nonblocking
+ * sets a filedescriptor to non-blocking mode
+ * fd - the filedescriptor
+ * returns: 0 on success, -1 on failure
+ */
+int set_nonblocking(int fd) {
+  int flags = fcntl(fd, F_GETFL);
+  if (flags == -1) {
+    return flags;
+  }
+  return fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+}
+
+/* function: send_tun
+ * sends a clat_packet to a tun interface
+ * fd      - the tun filedescriptor
+ * out     - the packet to send
+ * iov_len - the number of entries in the clat_packet
+ * returns: number of bytes read on success, -1 on failure
+ */
+int send_tun(int fd, clat_packet out, int iov_len) {
+  return writev(fd, out, iov_len);
+}
diff --git a/tun.h b/tun.h
new file mode 100644
index 0000000..bcdd10e
--- /dev/null
+++ b/tun.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * tun.h - tun device functions
+ */
+#ifndef __TUN_H__
+#define __TUN_H__
+
+#include <linux/if.h>
+
+#include "clatd.h"
+#include "ring.h"
+
+struct tun_data {
+  char device4[IFNAMSIZ];
+  int read_fd6, write_fd6, fd4;
+  struct packet_ring ring;
+};
+
+int tun_open();
+int tun_alloc(char *dev, int fd);
+int send_tun(int fd, clat_packet out, int iov_len);
+int set_nonblocking(int fd);
+
+#endif