Support translating fragmented packets.

Bug: 11542311
Change-Id: I14a20b9ac669cdb5927f6ac26147bb0109099497
diff --git a/clatd.c b/clatd.c
index 3ab2371..a89c588 100644
--- a/clatd.c
+++ b/clatd.c
@@ -51,6 +51,9 @@
 #define DEVICENAME6 "clat"
 #define DEVICENAME4 "clat4"
 
+/* 40 bytes IPv6 header - 20 bytes IPv4 header + 8 bytes fragment header */
+#define MTU_DELTA 28
+
 int forwarding_fd = -1;
 volatile sig_atomic_t running = 1;
 
@@ -271,8 +274,9 @@
     Global_Clatd_Config.mtu = 1280;
   }
 
-  if(Global_Clatd_Config.ipv4mtu <= 0 || (Global_Clatd_Config.ipv4mtu > Global_Clatd_Config.mtu - 20)) {
-    Global_Clatd_Config.ipv4mtu = Global_Clatd_Config.mtu-20;
+  if(Global_Clatd_Config.ipv4mtu <= 0 ||
+     Global_Clatd_Config.ipv4mtu > Global_Clatd_Config.mtu - MTU_DELTA) {
+    Global_Clatd_Config.ipv4mtu = Global_Clatd_Config.mtu - MTU_DELTA;
     logmsg(ANDROID_LOG_WARN,"ipv4mtu now set to = %d",Global_Clatd_Config.ipv4mtu);
   }
 
diff --git a/dump.c b/dump.c
index 94e4796..ba5fa3e 100644
--- a/dump.c
+++ b/dump.c
@@ -220,6 +220,7 @@
   char output[PACKETLEN*3+2];
   size_t i;
 
+  output[0] = '\0';
   for(i = 0; i < len && i < PACKETLEN; i++) {
     snprintf(output + i*3, 4, " %02x", (uint8_t)data[i]);
   }
diff --git a/ipv4.c b/ipv4.c
index e2636b4..84dccd5 100644
--- a/ipv4.c
+++ b/ipv4.c
@@ -57,7 +57,8 @@
 int ipv4_packet(clat_packet out, int pos, const char *packet, size_t len) {
   const struct iphdr *header = (struct iphdr *) packet;
   struct ip6_hdr *ip6_targ = (struct ip6_hdr *) out[pos].iov_base;
-  uint16_t frag_flags;
+  struct ip6_frag *frag_hdr;
+  size_t frag_hdr_len;
   uint8_t nxthdr;
   const char *next_header;
   size_t len_left;
@@ -69,12 +70,6 @@
     return 0;
   }
 
-  frag_flags = ntohs(header->frag_off);
-  if(frag_flags & IP_MF) { // this could theoretically be supported, but isn't
-    logmsg_dbg(ANDROID_LOG_ERROR, "ip_packet/more fragments set, dropping");
-    return 0;
-  }
-
   if(header->ihl < 5) {
     logmsg_dbg(ANDROID_LOG_ERROR, "ip_packet/ip header length set to less than 5: %x", header->ihl);
     return 0;
@@ -111,20 +106,32 @@
   fill_ip6_header(ip6_targ, 0, nxthdr, header);
   out[pos].iov_len = sizeof(struct ip6_hdr);
 
-  // Calculate the pseudo-header checksum.
+  /* Calculate the pseudo-header checksum.
+   * Technically, the length that is used in the pseudo-header checksum is the transport layer
+   * length, which is not the same as len_left in the case of fragmented packets. But since
+   * translation does not change the transport layer length, the checksum is unaffected.
+   */
   old_sum = ipv4_pseudo_header_checksum(header, len_left);
   new_sum = ipv6_pseudo_header_checksum(ip6_targ, len_left, nxthdr);
 
-  if (nxthdr == IPPROTO_ICMPV6) {
-    iov_len = icmp_packet(out, pos + 1, (const struct icmphdr *) next_header, new_sum, len_left);
+  // If the IPv4 packet is fragmented, add a Fragment header.
+  frag_hdr = (struct ip6_frag *) out[pos + 1].iov_base;
+  frag_hdr_len = maybe_fill_frag_header(frag_hdr, ip6_targ, header);
+  out[pos + 1].iov_len = frag_hdr_len;
+
+  if (frag_hdr_len && frag_hdr->ip6f_offlg & IP6F_OFF_MASK) {
+    // Non-first fragment. Copy the rest of the packet as is.
+    iov_len = generic_packet(out, pos + 2, next_header, len_left);
+  } else if (nxthdr == IPPROTO_ICMPV6) {
+    iov_len = icmp_packet(out, pos + 2, (const struct icmphdr *) next_header, new_sum, len_left);
   } else if (nxthdr == IPPROTO_TCP) {
-    iov_len = tcp_packet(out, pos + 1, (const struct tcphdr *) next_header, old_sum, new_sum,
+    iov_len = tcp_packet(out, pos + 2, (const struct tcphdr *) next_header, old_sum, new_sum,
                          len_left);
   } else if (nxthdr == IPPROTO_UDP) {
-    iov_len = udp_packet(out, pos + 1, (const struct udphdr *) next_header, old_sum, new_sum,
+    iov_len = udp_packet(out, pos + 2, (const struct udphdr *) next_header, old_sum, new_sum,
                          len_left);
   } else if (nxthdr == IPPROTO_GRE) {
-    iov_len = generic_packet(out, pos + 1, next_header, len_left);
+    iov_len = generic_packet(out, pos + 2, next_header, len_left);
   } else {
 #if CLAT_DEBUG
     logmsg_dbg(ANDROID_LOG_ERROR, "ip_packet/unknown protocol: %x",header->protocol);
diff --git a/ipv6.c b/ipv6.c
index d188e47..2e83be1 100644
--- a/ipv6.c
+++ b/ipv6.c
@@ -77,6 +77,7 @@
 int ipv6_packet(clat_packet out, int pos, const char *packet, size_t len) {
   const struct ip6_hdr *ip6 = (struct ip6_hdr *) packet;
   struct iphdr *ip_targ = (struct iphdr *) out[pos].iov_base;
+  struct ip6_frag *frag_hdr = NULL;
   uint8_t protocol;
   const char *next_header;
   size_t len_left;
@@ -112,10 +113,6 @@
   len_left = len - sizeof(struct ip6_hdr);
 
   protocol = ip6->ip6_nxt;
-  if (protocol == IPPROTO_ICMPV6) {
-    // ICMP and ICMPv6 have different protocol numbers.
-    protocol = IPPROTO_ICMP;
-  }
 
   /* Fill in the IPv4 header. We need to do this before we translate the packet because TCP and
    * UDP include parts of the IP header in the checksum. Set the length to zero because we don't
@@ -124,21 +121,48 @@
   fill_ip_header(ip_targ, 0, protocol, ip6);
   out[pos].iov_len = sizeof(struct iphdr);
 
-  // Calculate the pseudo-header checksum.
+  // If there's a Fragment header, parse it and decide what the next header is.
+  // Do this before calculating the pseudo-header checksum because it updates the next header value.
+  if (protocol == IPPROTO_FRAGMENT) {
+    frag_hdr = (struct ip6_frag *) next_header;
+    if (len_left < sizeof(*frag_hdr)) {
+      logmsg_dbg(ANDROID_LOG_ERROR, "ipv6_packet/too short for fragment header: %d", len);
+      return 0;
+    }
+
+    next_header += sizeof(*frag_hdr);
+    len_left -= sizeof(*frag_hdr);
+
+    protocol = parse_frag_header(frag_hdr, ip_targ);
+  }
+
+  // ICMP and ICMPv6 have different protocol numbers.
+  if (protocol == IPPROTO_ICMPV6) {
+    protocol = IPPROTO_ICMP;
+    ip_targ->protocol = IPPROTO_ICMP;
+  }
+
+  /* Calculate the pseudo-header checksum.
+   * Technically, the length that is used in the pseudo-header checksum is the transport layer
+   * length, which is not the same as len_left in the case of fragmented packets. But since
+   * translation does not change the transport layer length, the checksum is unaffected.
+   */
   old_sum = ipv6_pseudo_header_checksum(ip6, len_left, protocol);
   new_sum = ipv4_pseudo_header_checksum(ip_targ, len_left);
 
-  // does not support IPv6 extension headers, this will drop any packet with them
-  if (protocol == IPPROTO_ICMP) {
-    iov_len = icmp6_packet(out, pos + 1, (const struct icmp6_hdr *) next_header, len_left);
-  } else if (ip6->ip6_nxt == IPPROTO_TCP) {
-    iov_len = tcp_packet(out, pos + 1, (const struct tcphdr *) next_header, old_sum, new_sum,
+  // Does not support IPv6 extension headers except Fragment.
+  if (frag_hdr && (frag_hdr->ip6f_offlg & IP6F_OFF_MASK)) {
+    iov_len = generic_packet(out, pos + 2, next_header, len_left);
+  } else if (protocol == IPPROTO_ICMP) {
+    iov_len = icmp6_packet(out, pos + 2, (const struct icmp6_hdr *) next_header, len_left);
+  } else if (protocol == IPPROTO_TCP) {
+    iov_len = tcp_packet(out, pos + 2, (const struct tcphdr *) next_header, old_sum, new_sum,
                          len_left);
-  } else if (ip6->ip6_nxt == IPPROTO_UDP) {
-    iov_len = udp_packet(out, pos + 1, (const struct udphdr *) next_header, old_sum, new_sum,
+  } else if (protocol == IPPROTO_UDP) {
+    iov_len = udp_packet(out, pos + 2, (const struct udphdr *) next_header, old_sum, new_sum,
                          len_left);
-  } else if (ip6->ip6_nxt == IPPROTO_GRE) {
-    iov_len = generic_packet(out, pos + 1, next_header, len_left);
+  } else if (protocol == IPPROTO_GRE) {
+    iov_len = generic_packet(out, pos + 2, next_header, len_left);
   } else {
 #if CLAT_DEBUG
     logmsg(ANDROID_LOG_ERROR, "ipv6_packet/unknown next header type: %x", ip6->ip6_nxt);
diff --git a/translate.c b/translate.c
index f7f09cb..7585092 100644
--- a/translate.c
+++ b/translate.c
@@ -167,6 +167,53 @@
   ip6->ip6_dst = ipv4_addr_to_ipv6_addr(old_header->daddr);
 }
 
+/* function: maybe_fill_frag_header
+ * fills a fragmentation header
+ * generate an ipv6 fragment header from an ipv4 header
+ * frag_hdr    - target (ipv6) fragmentation header
+ * ip6_targ    - target (ipv6) header
+ * old_header  - (ipv4) source packet header
+ * returns: the length of the fragmentation header if present, or zero if not present
+ */
+size_t maybe_fill_frag_header(struct ip6_frag *frag_hdr, struct ip6_hdr *ip6_targ,
+                              const struct iphdr *old_header) {
+  uint16_t frag_flags = ntohs(old_header->frag_off);
+  uint16_t frag_off = frag_flags & IP_OFFMASK;
+  if (frag_off == 0 && (frag_flags & IP_MF) == 0) {
+    // Not a fragment.
+    return 0;
+  }
+
+  frag_hdr->ip6f_nxt = ip6_targ->ip6_nxt;
+  frag_hdr->ip6f_reserved = 0;
+  // In IPv4, the offset is the bottom 13 bits; in IPv6 it's the top 13 bits.
+  frag_hdr->ip6f_offlg = htons(frag_off << 3);
+  if (frag_flags & IP_MF) {
+    frag_hdr->ip6f_offlg |= IP6F_MORE_FRAG;
+  }
+  frag_hdr->ip6f_ident = htonl(ntohs(old_header->id));
+  ip6_targ->ip6_nxt = IPPROTO_FRAGMENT;
+
+  return sizeof(*frag_hdr);
+}
+
+/* function: parse_frag_header
+ * return the length of the fragmentation header if present, or zero if not present
+ * generate an ipv6 fragment header from an ipv4 header
+ * frag_hdr    - (ipv6) fragmentation header
+ * ip_targ     - target (ipv4) header
+ * returns: the next header value
+ */
+uint8_t parse_frag_header(const struct ip6_frag *frag_hdr, struct iphdr *ip_targ) {
+  uint16_t frag_off = (ntohs(frag_hdr->ip6f_offlg & IP6F_OFF_MASK) >> 3);
+  if (frag_hdr->ip6f_offlg & IP6F_MORE_FRAG) {
+    frag_off |= IP_MF;
+  }
+  ip_targ->frag_off = htons(frag_off);
+  ip_targ->id = htons(ntohl(frag_hdr->ip6f_ident) & 0xffff);
+  ip_targ->protocol = frag_hdr->ip6f_nxt;
+  return frag_hdr->ip6f_nxt;
+}
 
 /* function: icmp_to_icmp6
  * translate ipv4 icmp to ipv6 icmp
@@ -393,9 +440,6 @@
  * payload      - tcp payload
  * payload_size - size of payload
  * returns: the highest position in the output clat_packet that's filled in
- *
- * TODO: mss rewrite
- * TODO: hosts without pmtu discovery - non DF packets will rely on fragmentation (unimplemented)
  */
 int tcp_translate(clat_packet out, int pos, const struct tcphdr *tcp, size_t header_size,
                   uint32_t old_sum, uint32_t new_sum, const char *payload, size_t payload_size) {
@@ -435,16 +479,20 @@
   // Allocate buffers for all packet headers.
   struct tun_pi tun_targ;
   char iphdr[sizeof(struct ip6_hdr)];
+  char fraghdr[sizeof(struct ip6_frag)];
   char transporthdr[MAX_TCP_HDR];
   char icmp_iphdr[sizeof(struct ip6_hdr)];
+  char icmp_fraghdr[sizeof(struct ip6_frag)];
   char icmp_transporthdr[MAX_TCP_HDR];
 
   // iovec of the packets we'll send. This gets passed down to the translation functions.
   clat_packet out = {
     { &tun_targ, sizeof(tun_targ) },  // Tunnel header.
     { iphdr, 0 },                     // IP header.
+    { fraghdr, 0 },                   // Fragment header.
     { transporthdr, 0 },              // Transport layer header.
     { icmp_iphdr, 0 },                // ICMP error inner IP header.
+    { icmp_fraghdr, 0 },              // ICMP error fragmentation header.
     { icmp_transporthdr, 0 },         // ICMP error transport layer header.
     { NULL, 0 },                      // Payload. No buffer, it's a pointer to the original payload.
   };
diff --git a/translate.h b/translate.h
index 3378254..5efa817 100644
--- a/translate.h
+++ b/translate.h
@@ -36,9 +36,11 @@
 // The CLAT_POS_XXX constants represent the array indices within the clat_packet that contain
 // specific parts of the packet. The packet_* functions operate on all the packet segments past a
 // given position.
-enum clat_packet_index { CLAT_POS_TUNHDR, CLAT_POS_IPHDR, CLAT_POS_TRANSPORTHDR,
-                         CLAT_POS_ICMPERR_IPHDR, CLAT_POS_ICMPERR_TRANSPORTHDR,
-                         CLAT_POS_PAYLOAD, CLAT_POS_MAX };
+enum clat_packet_index {
+    CLAT_POS_TUNHDR, CLAT_POS_IPHDR, CLAT_POS_FRAGHDR, CLAT_POS_TRANSPORTHDR,
+    CLAT_POS_ICMPERR_IPHDR, CLAT_POS_ICMPERR_FRAGHDR, CLAT_POS_ICMPERR_TRANSPORTHDR,
+    CLAT_POS_PAYLOAD, CLAT_POS_MAX
+};
 typedef struct iovec clat_packet[CLAT_POS_MAX];
 
 // Calculates the checksum over all the packet components starting from pos.
@@ -65,6 +67,11 @@
 int ipv4_packet(clat_packet out, int pos, const char *packet, size_t len);
 int ipv6_packet(clat_packet out, int pos, const char *packet, size_t len);
 
+// Deal with fragmented packets.
+size_t maybe_fill_frag_header(struct ip6_frag *frag_hdr, struct ip6_hdr *ip6_targ,
+                              const struct iphdr *old_header);
+uint8_t parse_frag_header(const struct ip6_frag *frag_hdr, struct iphdr *ip_targ);
+
 // Translate ICMP packets.
 int icmp_to_icmp6(clat_packet out, int pos, const struct icmphdr *icmp, uint32_t checksum,
                   const char *payload, size_t payload_size);