| Elliott Hughes | d40e63e | 2011-02-17 16:20:07 -0800 | [diff] [blame] | 1 |  | 
|  | 2 | import java.io.*; | 
|  | 3 | import java.util.*; | 
|  | 4 |  | 
| Elliott Hughes | 328a484 | 2012-10-19 13:03:52 -0700 | [diff] [blame] | 5 | // usage: java ZoneCompiler <setup file> <data directory> <output directory> <tzdata version> | 
| Elliott Hughes | d40e63e | 2011-02-17 16:20:07 -0800 | [diff] [blame] | 6 | // | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 7 | // Compile a set of tzfile-formatted files into a single file containing an index. | 
| Elliott Hughes | d40e63e | 2011-02-17 16:20:07 -0800 | [diff] [blame] | 8 | // | 
|  | 9 | // The compilation is controlled by a setup file, which is provided as a | 
|  | 10 | // command-line argument.  The setup file has the form: | 
|  | 11 | // | 
|  | 12 | // Link <toName> <fromName> | 
|  | 13 | // ... | 
|  | 14 | // <zone filename> | 
|  | 15 | // ... | 
|  | 16 | // | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 17 | // Note that the links must be declared prior to the zone names. | 
|  | 18 | // A zone name is a filename relative to the source directory such as | 
| Elliott Hughes | d40e63e | 2011-02-17 16:20:07 -0800 | [diff] [blame] | 19 | // 'GMT', 'Africa/Dakar', or 'America/Argentina/Jujuy'. | 
|  | 20 | // | 
|  | 21 | // Use the 'zic' command-line tool to convert from flat files | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 22 | // (such as 'africa' or 'northamerica') to a directory | 
|  | 23 | // hierarchy suitable for this tool (containing files such as 'data/Africa/Abidjan'). | 
| Elliott Hughes | d40e63e | 2011-02-17 16:20:07 -0800 | [diff] [blame] | 24 | // | 
| Elliott Hughes | d40e63e | 2011-02-17 16:20:07 -0800 | [diff] [blame] | 25 |  | 
|  | 26 | public class ZoneCompactor { | 
| Elliott Hughes | 13bab43 | 2014-08-06 15:23:11 -0700 | [diff] [blame] | 27 | // Maximum number of characters in a zone name, including '\0' terminator. | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 28 | private static final int MAXNAME = 40; | 
|  | 29 |  | 
| Elliott Hughes | 13bab43 | 2014-08-06 15:23:11 -0700 | [diff] [blame] | 30 | // Zone name synonyms. | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 31 | private Map<String,String> links = new HashMap<String,String>(); | 
|  | 32 |  | 
| Elliott Hughes | 13bab43 | 2014-08-06 15:23:11 -0700 | [diff] [blame] | 33 | // File offsets by zone name. | 
|  | 34 | private Map<String,Integer> offsets = new HashMap<String,Integer>(); | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 35 |  | 
| Elliott Hughes | 13bab43 | 2014-08-06 15:23:11 -0700 | [diff] [blame] | 36 | // File lengths by zone name. | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 37 | private Map<String,Integer> lengths = new HashMap<String,Integer>(); | 
|  | 38 |  | 
| Elliott Hughes | 13bab43 | 2014-08-06 15:23:11 -0700 | [diff] [blame] | 39 | // Concatenate the contents of 'inFile' onto 'out'. | 
|  | 40 | private static void copyFile(File inFile, OutputStream out) throws Exception { | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 41 | byte[] ret = new byte[0]; | 
|  | 42 |  | 
|  | 43 | InputStream in = new FileInputStream(inFile); | 
|  | 44 | byte[] buf = new byte[8192]; | 
|  | 45 | while (true) { | 
|  | 46 | int nbytes = in.read(buf); | 
|  | 47 | if (nbytes == -1) { | 
|  | 48 | break; | 
|  | 49 | } | 
|  | 50 | out.write(buf, 0, nbytes); | 
|  | 51 |  | 
|  | 52 | byte[] nret = new byte[ret.length + nbytes]; | 
|  | 53 | System.arraycopy(ret, 0, nret, 0, ret.length); | 
|  | 54 | System.arraycopy(buf, 0, nret, ret.length, nbytes); | 
|  | 55 | ret = nret; | 
|  | 56 | } | 
|  | 57 | out.flush(); | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 58 | } | 
|  | 59 |  | 
| Elliott Hughes | 2393535 | 2012-10-22 14:47:58 -0700 | [diff] [blame] | 60 | public ZoneCompactor(String setupFile, String dataDirectory, String zoneTabFile, String outputDirectory, String version) throws Exception { | 
| Elliott Hughes | 13bab43 | 2014-08-06 15:23:11 -0700 | [diff] [blame] | 61 | // Read the setup file and concatenate all the data. | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 62 | ByteArrayOutputStream allData = new ByteArrayOutputStream(); | 
|  | 63 | BufferedReader reader = new BufferedReader(new FileReader(setupFile)); | 
|  | 64 | String s; | 
| Elliott Hughes | 13bab43 | 2014-08-06 15:23:11 -0700 | [diff] [blame] | 65 | int offset = 0; | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 66 | while ((s = reader.readLine()) != null) { | 
|  | 67 | s = s.trim(); | 
|  | 68 | if (s.startsWith("Link")) { | 
|  | 69 | StringTokenizer st = new StringTokenizer(s); | 
|  | 70 | st.nextToken(); | 
|  | 71 | String to = st.nextToken(); | 
|  | 72 | String from = st.nextToken(); | 
|  | 73 | links.put(from, to); | 
|  | 74 | } else { | 
|  | 75 | String link = links.get(s); | 
|  | 76 | if (link == null) { | 
|  | 77 | File sourceFile = new File(dataDirectory, s); | 
|  | 78 | long length = sourceFile.length(); | 
| Elliott Hughes | 13bab43 | 2014-08-06 15:23:11 -0700 | [diff] [blame] | 79 | offsets.put(s, offset); | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 80 | lengths.put(s, (int) length); | 
|  | 81 |  | 
| Elliott Hughes | 13bab43 | 2014-08-06 15:23:11 -0700 | [diff] [blame] | 82 | offset += length; | 
|  | 83 | copyFile(sourceFile, allData); | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 84 | } | 
|  | 85 | } | 
|  | 86 | } | 
| Elliott Hughes | 2393535 | 2012-10-22 14:47:58 -0700 | [diff] [blame] | 87 | reader.close(); | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 88 |  | 
|  | 89 | // Fill in fields for links. | 
|  | 90 | Iterator<String> it = links.keySet().iterator(); | 
|  | 91 | while (it.hasNext()) { | 
|  | 92 | String from = it.next(); | 
|  | 93 | String to = links.get(from); | 
|  | 94 |  | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 95 | offsets.put(from, offsets.get(to)); | 
| Elliott Hughes | 13bab43 | 2014-08-06 15:23:11 -0700 | [diff] [blame] | 96 | lengths.put(from, lengths.get(to)); | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 97 | } | 
|  | 98 |  | 
|  | 99 | // Create/truncate the destination file. | 
|  | 100 | RandomAccessFile f = new RandomAccessFile(new File(outputDirectory, "tzdata"), "rw"); | 
|  | 101 | f.setLength(0); | 
|  | 102 |  | 
|  | 103 | // Write the header. | 
|  | 104 |  | 
| Elliott Hughes | 2393535 | 2012-10-22 14:47:58 -0700 | [diff] [blame] | 105 | // byte[12] tzdata_version -- 'tzdata2012f\0' | 
|  | 106 | // int index_offset -- so we can slip in extra header fields in a backwards-compatible way | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 107 | // int data_offset | 
|  | 108 | // int zonetab_offset | 
|  | 109 |  | 
|  | 110 | // tzdata_version | 
|  | 111 | f.write(toAscii(new byte[12], version)); | 
|  | 112 |  | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 113 | // Write dummy values for the three offsets, and remember where we need to seek back to later | 
|  | 114 | // when we have the real values. | 
|  | 115 | int index_offset_offset = (int) f.getFilePointer(); | 
|  | 116 | f.writeInt(0); | 
|  | 117 | int data_offset_offset = (int) f.getFilePointer(); | 
|  | 118 | f.writeInt(0); | 
|  | 119 | int zonetab_offset_offset = (int) f.getFilePointer(); | 
|  | 120 | f.writeInt(0); | 
|  | 121 |  | 
|  | 122 | int index_offset = (int) f.getFilePointer(); | 
|  | 123 |  | 
|  | 124 | // Write the index. | 
|  | 125 | ArrayList<String> sortedOlsonIds = new ArrayList<String>(); | 
| Elliott Hughes | 13bab43 | 2014-08-06 15:23:11 -0700 | [diff] [blame] | 126 | sortedOlsonIds.addAll(offsets.keySet()); | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 127 | Collections.sort(sortedOlsonIds); | 
|  | 128 | it = sortedOlsonIds.iterator(); | 
|  | 129 | while (it.hasNext()) { | 
|  | 130 | String zoneName = it.next(); | 
|  | 131 | if (zoneName.length() >= MAXNAME) { | 
|  | 132 | throw new RuntimeException("zone filename too long: " + zoneName.length()); | 
|  | 133 | } | 
|  | 134 |  | 
|  | 135 | f.write(toAscii(new byte[MAXNAME], zoneName)); | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 136 | f.writeInt(offsets.get(zoneName)); | 
| Elliott Hughes | 13bab43 | 2014-08-06 15:23:11 -0700 | [diff] [blame] | 137 | f.writeInt(lengths.get(zoneName)); | 
|  | 138 | f.writeInt(0); // Used to be raw GMT offset. No longer used. | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 139 | } | 
|  | 140 |  | 
|  | 141 | int data_offset = (int) f.getFilePointer(); | 
|  | 142 |  | 
|  | 143 | // Write the data. | 
|  | 144 | f.write(allData.toByteArray()); | 
|  | 145 |  | 
| Elliott Hughes | af7f2f2 | 2013-03-14 17:10:24 -0700 | [diff] [blame] | 146 | int zonetab_offset = (int) f.getFilePointer(); | 
|  | 147 |  | 
| Elliott Hughes | 2393535 | 2012-10-22 14:47:58 -0700 | [diff] [blame] | 148 | // Copy the zone.tab. | 
|  | 149 | reader = new BufferedReader(new FileReader(zoneTabFile)); | 
|  | 150 | while ((s = reader.readLine()) != null) { | 
|  | 151 | if (!s.startsWith("#")) { | 
|  | 152 | f.writeBytes(s); | 
|  | 153 | f.write('\n'); | 
|  | 154 | } | 
|  | 155 | } | 
|  | 156 | reader.close(); | 
|  | 157 |  | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 158 | // Go back and fix up the offsets in the header. | 
|  | 159 | f.seek(index_offset_offset); | 
|  | 160 | f.writeInt(index_offset); | 
|  | 161 | f.seek(data_offset_offset); | 
|  | 162 | f.writeInt(data_offset); | 
|  | 163 | f.seek(zonetab_offset_offset); | 
|  | 164 | f.writeInt(zonetab_offset); | 
|  | 165 |  | 
|  | 166 | f.close(); | 
|  | 167 | } | 
|  | 168 |  | 
|  | 169 | private static byte[] toAscii(byte[] dst, String src) { | 
|  | 170 | for (int i = 0; i < src.length(); ++i) { | 
|  | 171 | if (src.charAt(i) > '~') { | 
|  | 172 | throw new RuntimeException("non-ASCII string: " + src); | 
|  | 173 | } | 
|  | 174 | dst[i] = (byte) src.charAt(i); | 
|  | 175 | } | 
|  | 176 | return dst; | 
|  | 177 | } | 
|  | 178 |  | 
|  | 179 | public static void main(String[] args) throws Exception { | 
| Elliott Hughes | 2393535 | 2012-10-22 14:47:58 -0700 | [diff] [blame] | 180 | if (args.length != 5) { | 
|  | 181 | System.err.println("usage: java ZoneCompactor <setup file> <data directory> <zone.tab file> <output directory> <tzdata version>"); | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 182 | System.exit(0); | 
|  | 183 | } | 
| Elliott Hughes | 2393535 | 2012-10-22 14:47:58 -0700 | [diff] [blame] | 184 | new ZoneCompactor(args[0], args[1], args[2], args[3], args[4]); | 
| Elliott Hughes | 5b1497a | 2012-10-19 14:47:37 -0700 | [diff] [blame] | 185 | } | 
| Elliott Hughes | d40e63e | 2011-02-17 16:20:07 -0800 | [diff] [blame] | 186 | } |