Blame - opengl/libs/ETC1/etc1.cpp - android_frameworks_native

blob: 19d428a394e18370ed8c6b5c0d9619d8ecd6361f [file] [log] [blame]

Jack Palevich	01cc538	2009-12-28 19:31:43 +0800	[diff] [blame]	1	// Copyright 2009 Google Inc.
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
				6	//
				7	// http://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
				14
				15	#include <ETC1/etc1.h>
				16
				17	#include <string.h>
				18
				19	/* From http://www.khronos.org/registry/gles/extensions/OES/OES_compressed_ETC1_RGB8_texture.txt
				20
				21	The number of bits that represent a 4x4 texel block is 64 bits if
				22	<internalformat> is given by ETC1_RGB8_OES.
				23
				24	The data for a block is a number of bytes,
				25
				26	{q0, q1, q2, q3, q4, q5, q6, q7}
				27
				28	where byte q0 is located at the lowest memory address and q7 at
				29	the highest. The 64 bits specifying the block is then represented
				30	by the following 64 bit integer:
				31
				32	int64bit = 256(256(256(256(256(256(256*q0+q1)+q2)+q3)+q4)+q5)+q6)+q7;
				33
				34	ETC1_RGB8_OES:
				35
				36	a) bit layout in bits 63 through 32 if diffbit = 0
				37
				38	63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48
				39	-----------------------------------------------
				40	\| base col1 \| base col2 \| base col1 \| base col2 \|
				41	\| R1 (4bits)\| R2 (4bits)\| G1 (4bits)\| G2 (4bits)\|
				42	-----------------------------------------------
				43
				44	47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32
				45	---------------------------------------------------
				46	\| base col1 \| base col2 \| table \| table \|diff\|flip\|
				47	\| B1 (4bits)\| B2 (4bits)\| cw 1 \| cw 2 \|bit \|bit \|
				48	---------------------------------------------------
				49
				50
				51	b) bit layout in bits 63 through 32 if diffbit = 1
				52
				53	63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48
				54	-----------------------------------------------
				55	\| base col1 \| dcol 2 \| base col1 \| dcol 2 \|
				56	\| R1' (5 bits) \| dR2 \| G1' (5 bits) \| dG2 \|
				57	-----------------------------------------------
				58
				59	47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32
				60	---------------------------------------------------
				61	\| base col 1 \| dcol 2 \| table \| table \|diff\|flip\|
				62	\| B1' (5 bits) \| dB2 \| cw 1 \| cw 2 \|bit \|bit \|
				63	---------------------------------------------------
				64
				65
				66	c) bit layout in bits 31 through 0 (in both cases)
				67
				68	31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
				69	-----------------------------------------------
				70	\| most significant pixel index bits \|
				71	\| p\| o\| n\| m\| l\| k\| j\| i\| h\| g\| f\| e\| d\| c\| b\| a\|
				72	-----------------------------------------------
				73
				74	15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
				75	--------------------------------------------------
				76	\| least significant pixel index bits \|
				77	\| p\| o\| n\| m\| l\| k\| j\| i\| h\| g\| f\| e\| d\| c \| b \| a \|
				78	--------------------------------------------------
				79
				80
				81	Add table 3.17.2: Intensity modifier sets for ETC1 compressed textures:
				82
				83	table codeword modifier table
				84	------------------ ----------------------
				85	0 -8 -2 2 8
				86	1 -17 -5 5 17
				87	2 -29 -9 9 29
				88	3 -42 -13 13 42
				89	4 -60 -18 18 60
				90	5 -80 -24 24 80
				91	6 -106 -33 33 106
				92	7 -183 -47 47 183
				93
				94
				95	Add table 3.17.3 Mapping from pixel index values to modifier values for
				96	ETC1 compressed textures:
				97
				98	pixel index value
				99	---------------
				100	msb lsb resulting modifier value
				101	----- ----- -------------------------
				102	1 1 -b (large negative value)
				103	1 0 -a (small negative value)
				104	0 0 a (small positive value)
				105	0 1 b (large positive value)
				106
				107
				108	*/
				109
				110	static const int kModifierTable[] = {
				111	/* 0 */2, 8, -2, -8,
				112	/* 1 */5, 17, -5, -17,
				113	/* 2 */9, 29, -9, -29,
				114	/* 3 */13, 42, -13, -42,
				115	/* 4 */18, 60, -18, -60,
				116	/* 5 */24, 80, -24, -80,
				117	/* 6 */33, 106, -33, -106,
				118	/* 7 */47, 183, -47, -183 };
				119
				120	static const int kLookup[8] = { 0, 1, 2, 3, -4, -3, -2, -1 };
				121
				122	static inline etc1_byte clamp(int x) {
				123	return (etc1_byte) (x >= 0 ? (x < 255 ? x : 255) : 0);
				124	}
				125
				126	static
				127	inline int convert4To8(int b) {
				128	int c = b & 0xf;
				129	return (c << 4) \| c;
				130	}
				131
				132	static
				133	inline int convert5To8(int b) {
				134	int c = b & 0x1f;
				135	return (c << 3) \| (c >> 2);
				136	}
				137
				138	static
				139	inline int convert6To8(int b) {
				140	int c = b & 0x3f;
				141	return (c << 2) \| (c >> 4);
				142	}
				143
				144	static
				145	inline int divideBy255(int d) {
				146	return (d + 128 + (d >> 8)) >> 8;
				147	}
				148
				149	static
				150	inline int convert8To4(int b) {
				151	int c = b & 0xff;
Jack Palevich	2b93f0b	2011-12-09 14:06:07 -0800	[diff] [blame]	152	return divideBy255(c * 15);
Jack Palevich	01cc538	2009-12-28 19:31:43 +0800	[diff] [blame]	153	}
				154
				155	static
				156	inline int convert8To5(int b) {
				157	int c = b & 0xff;
Jack Palevich	2b93f0b	2011-12-09 14:06:07 -0800	[diff] [blame]	158	return divideBy255(c * 31);
Jack Palevich	01cc538	2009-12-28 19:31:43 +0800	[diff] [blame]	159	}
				160
				161	static
				162	inline int convertDiff(int base, int diff) {
				163	return convert5To8((0x1f & base) + kLookup[0x7 & diff]);
				164	}
				165
				166	static
				167	void decode_subblock(etc1_byte* pOut, int r, int g, int b, const int* table,
				168	etc1_uint32 low, bool second, bool flipped) {
				169	int baseX = 0;
				170	int baseY = 0;
				171	if (second) {
				172	if (flipped) {
				173	baseY = 2;
				174	} else {
				175	baseX = 2;
				176	}
				177	}
				178	for (int i = 0; i < 8; i++) {
				179	int x, y;
				180	if (flipped) {
				181	x = baseX + (i >> 1);
				182	y = baseY + (i & 1);
				183	} else {
				184	x = baseX + (i >> 2);
				185	y = baseY + (i & 3);
				186	}
				187	int k = y + (x * 4);
				188	int offset = ((low >> k) & 1) \| ((low >> (k + 15)) & 2);
				189	int delta = table[offset];
				190	etc1_byte* q = pOut + 3 * (x + 4 * y);
				191	*q++ = clamp(r + delta);
				192	*q++ = clamp(g + delta);
				193	*q++ = clamp(b + delta);
				194	}
				195	}
				196
				197	// Input is an ETC1 compressed version of the data.
				198	// Output is a 4 x 4 square of 3-byte pixels in form R, G, B
				199
				200	void etc1_decode_block(const etc1_byte* pIn, etc1_byte* pOut) {
				201	etc1_uint32 high = (pIn[0] << 24) \| (pIn[1] << 16) \| (pIn[2] << 8) \| pIn[3];
				202	etc1_uint32 low = (pIn[4] << 24) \| (pIn[5] << 16) \| (pIn[6] << 8) \| pIn[7];
				203	int r1, r2, g1, g2, b1, b2;
				204	if (high & 2) {
				205	// differential
				206	int rBase = high >> 27;
				207	int gBase = high >> 19;
				208	int bBase = high >> 11;
				209	r1 = convert5To8(rBase);
				210	r2 = convertDiff(rBase, high >> 24);
				211	g1 = convert5To8(gBase);
				212	g2 = convertDiff(gBase, high >> 16);
				213	b1 = convert5To8(bBase);
				214	b2 = convertDiff(bBase, high >> 8);
				215	} else {
				216	// not differential
				217	r1 = convert4To8(high >> 28);
				218	r2 = convert4To8(high >> 24);
				219	g1 = convert4To8(high >> 20);
				220	g2 = convert4To8(high >> 16);
				221	b1 = convert4To8(high >> 12);
				222	b2 = convert4To8(high >> 8);
				223	}
				224	int tableIndexA = 7 & (high >> 5);
				225	int tableIndexB = 7 & (high >> 2);
				226	const int* tableA = kModifierTable + tableIndexA * 4;
				227	const int* tableB = kModifierTable + tableIndexB * 4;
				228	bool flipped = (high & 1) != 0;
				229	decode_subblock(pOut, r1, g1, b1, tableA, low, false, flipped);
				230	decode_subblock(pOut, r2, g2, b2, tableB, low, true, flipped);
				231	}
				232
				233	typedef struct {
				234	etc1_uint32 high;
				235	etc1_uint32 low;
				236	etc1_uint32 score; // Lower is more accurate
				237	} etc_compressed;
				238
				239	static
				240	inline void take_best(etc_compressed* a, const etc_compressed* b) {
				241	if (a->score > b->score) {
				242	a = b;
				243	}
				244	}
				245
				246	static
				247	void etc_average_colors_subblock(const etc1_byte* pIn, etc1_uint32 inMask,
				248	etc1_byte* pColors, bool flipped, bool second) {
				249	int r = 0;
				250	int g = 0;
				251	int b = 0;
				252
				253	if (flipped) {
				254	int by = 0;
				255	if (second) {
				256	by = 2;
				257	}
				258	for (int y = 0; y < 2; y++) {
				259	int yy = by + y;
				260	for (int x = 0; x < 4; x++) {
				261	int i = x + 4 * yy;
				262	if (inMask & (1 << i)) {
				263	const etc1_byte* p = pIn + i * 3;
				264	r += *(p++);
				265	g += *(p++);
				266	b += *(p++);
				267	}
				268	}
				269	}
				270	} else {
				271	int bx = 0;
				272	if (second) {
				273	bx = 2;
				274	}
				275	for (int y = 0; y < 4; y++) {
				276	for (int x = 0; x < 2; x++) {
				277	int xx = bx + x;
				278	int i = xx + 4 * y;
				279	if (inMask & (1 << i)) {
				280	const etc1_byte* p = pIn + i * 3;
				281	r += *(p++);
				282	g += *(p++);
				283	b += *(p++);
				284	}
				285	}
				286	}
				287	}
				288	pColors[0] = (etc1_byte)((r + 4) >> 3);
				289	pColors[1] = (etc1_byte)((g + 4) >> 3);
				290	pColors[2] = (etc1_byte)((b + 4) >> 3);
				291	}
				292
				293	static
				294	inline int square(int x) {
				295	return x * x;
				296	}
				297
				298	static etc1_uint32 chooseModifier(const etc1_byte* pBaseColors,
				299	const etc1_byte* pIn, etc1_uint32 *pLow, int bitIndex,
				300	const int* pModifierTable) {
				301	etc1_uint32 bestScore = ~0;
				302	int bestIndex = 0;
				303	int pixelR = pIn[0];
				304	int pixelG = pIn[1];
				305	int pixelB = pIn[2];
				306	int r = pBaseColors[0];
				307	int g = pBaseColors[1];
				308	int b = pBaseColors[2];
				309	for (int i = 0; i < 4; i++) {
				310	int modifier = pModifierTable[i];
				311	int decodedG = clamp(g + modifier);
				312	etc1_uint32 score = (etc1_uint32) (6 * square(decodedG - pixelG));
				313	if (score >= bestScore) {
				314	continue;
				315	}
				316	int decodedR = clamp(r + modifier);
				317	score += (etc1_uint32) (3 * square(decodedR - pixelR));
				318	if (score >= bestScore) {
				319	continue;
				320	}
				321	int decodedB = clamp(b + modifier);
				322	score += (etc1_uint32) square(decodedB - pixelB);
				323	if (score < bestScore) {
				324	bestScore = score;
				325	bestIndex = i;
				326	}
				327	}
				328	etc1_uint32 lowMask = (((bestIndex >> 1) << 16) \| (bestIndex & 1))
				329	<< bitIndex;
				330	*pLow \|= lowMask;
				331	return bestScore;
				332	}
				333
				334	static
				335	void etc_encode_subblock_helper(const etc1_byte* pIn, etc1_uint32 inMask,
				336	etc_compressed* pCompressed, bool flipped, bool second,
				337	const etc1_byte* pBaseColors, const int* pModifierTable) {
				338	int score = pCompressed->score;
				339	if (flipped) {
				340	int by = 0;
				341	if (second) {
				342	by = 2;
				343	}
				344	for (int y = 0; y < 2; y++) {
				345	int yy = by + y;
				346	for (int x = 0; x < 4; x++) {
				347	int i = x + 4 * yy;
				348	if (inMask & (1 << i)) {
				349	score += chooseModifier(pBaseColors, pIn + i * 3,
				350	&pCompressed->low, yy + x * 4, pModifierTable);
				351	}
				352	}
				353	}
				354	} else {
				355	int bx = 0;
				356	if (second) {
				357	bx = 2;
				358	}
				359	for (int y = 0; y < 4; y++) {
				360	for (int x = 0; x < 2; x++) {
				361	int xx = bx + x;
				362	int i = xx + 4 * y;
				363	if (inMask & (1 << i)) {
				364	score += chooseModifier(pBaseColors, pIn + i * 3,
				365	&pCompressed->low, y + xx * 4, pModifierTable);
				366	}
				367	}
				368	}
				369	}
				370	pCompressed->score = score;
				371	}
				372
				373	static bool inRange4bitSigned(int color) {
				374	return color >= -4 && color <= 3;
				375	}
				376
				377	static void etc_encodeBaseColors(etc1_byte* pBaseColors,
				378	const etc1_byte* pColors, etc_compressed* pCompressed) {
				379	int r1, g1, b1, r2, g2, b2; // 8 bit base colors for sub-blocks
				380	bool differential;
Stephen Hines	6792321	2019-05-08 17:24:33 -0700	[diff] [blame]	381	int r51 = convert8To5(pColors[0]);
				382	int g51 = convert8To5(pColors[1]);
				383	int b51 = convert8To5(pColors[2]);
				384	int r52 = convert8To5(pColors[3]);
				385	int g52 = convert8To5(pColors[4]);
				386	int b52 = convert8To5(pColors[5]);
Jack Palevich	01cc538	2009-12-28 19:31:43 +0800	[diff] [blame]	387
Stephen Hines	6792321	2019-05-08 17:24:33 -0700	[diff] [blame]	388	r1 = convert5To8(r51);
				389	g1 = convert5To8(g51);
				390	b1 = convert5To8(b51);
Jack Palevich	01cc538	2009-12-28 19:31:43 +0800	[diff] [blame]	391
Stephen Hines	6792321	2019-05-08 17:24:33 -0700	[diff] [blame]	392	int dr = r52 - r51;
				393	int dg = g52 - g51;
				394	int db = b52 - b51;
Jack Palevich	01cc538	2009-12-28 19:31:43 +0800	[diff] [blame]	395
Stephen Hines	6792321	2019-05-08 17:24:33 -0700	[diff] [blame]	396	differential = inRange4bitSigned(dr) && inRange4bitSigned(dg)
				397	&& inRange4bitSigned(db);
				398	if (differential) {
				399	r2 = convert5To8(r51 + dr);
				400	g2 = convert5To8(g51 + dg);
				401	b2 = convert5To8(b51 + db);
				402	pCompressed->high \|= (r51 << 27) \| ((7 & dr) << 24) \| (g51 << 19)
				403	\| ((7 & dg) << 16) \| (b51 << 11) \| ((7 & db) << 8) \| 2;
				404	} else {
Jack Palevich	01cc538	2009-12-28 19:31:43 +0800	[diff] [blame]	405	int r41 = convert8To4(pColors[0]);
				406	int g41 = convert8To4(pColors[1]);
				407	int b41 = convert8To4(pColors[2]);
				408	int r42 = convert8To4(pColors[3]);
				409	int g42 = convert8To4(pColors[4]);
				410	int b42 = convert8To4(pColors[5]);
				411	r1 = convert4To8(r41);
				412	g1 = convert4To8(g41);
				413	b1 = convert4To8(b41);
				414	r2 = convert4To8(r42);
				415	g2 = convert4To8(g42);
				416	b2 = convert4To8(b42);
				417	pCompressed->high \|= (r41 << 28) \| (r42 << 24) \| (g41 << 20) \| (g42
				418	<< 16) \| (b41 << 12) \| (b42 << 8);
				419	}
				420	pBaseColors[0] = r1;
				421	pBaseColors[1] = g1;
				422	pBaseColors[2] = b1;
				423	pBaseColors[3] = r2;
				424	pBaseColors[4] = g2;
				425	pBaseColors[5] = b2;
				426	}
				427
				428	static
				429	void etc_encode_block_helper(const etc1_byte* pIn, etc1_uint32 inMask,
				430	const etc1_byte* pColors, etc_compressed* pCompressed, bool flipped) {
				431	pCompressed->score = ~0;
				432	pCompressed->high = (flipped ? 1 : 0);
				433	pCompressed->low = 0;
				434
				435	etc1_byte pBaseColors[6];
				436
				437	etc_encodeBaseColors(pBaseColors, pColors, pCompressed);
				438
				439	int originalHigh = pCompressed->high;
				440
				441	const int* pModifierTable = kModifierTable;
				442	for (int i = 0; i < 8; i++, pModifierTable += 4) {
				443	etc_compressed temp;
				444	temp.score = 0;
				445	temp.high = originalHigh \| (i << 5);
				446	temp.low = 0;
				447	etc_encode_subblock_helper(pIn, inMask, &temp, flipped, false,
				448	pBaseColors, pModifierTable);
				449	take_best(pCompressed, &temp);
				450	}
				451	pModifierTable = kModifierTable;
				452	etc_compressed firstHalf = *pCompressed;
				453	for (int i = 0; i < 8; i++, pModifierTable += 4) {
				454	etc_compressed temp;
				455	temp.score = firstHalf.score;
				456	temp.high = firstHalf.high \| (i << 2);
				457	temp.low = firstHalf.low;
				458	etc_encode_subblock_helper(pIn, inMask, &temp, flipped, true,
				459	pBaseColors + 3, pModifierTable);
				460	if (i == 0) {
				461	*pCompressed = temp;
				462	} else {
				463	take_best(pCompressed, &temp);
				464	}
				465	}
				466	}
				467
				468	static void writeBigEndian(etc1_byte* pOut, etc1_uint32 d) {
				469	pOut[0] = (etc1_byte)(d >> 24);
				470	pOut[1] = (etc1_byte)(d >> 16);
				471	pOut[2] = (etc1_byte)(d >> 8);
				472	pOut[3] = (etc1_byte) d;
				473	}
				474
				475	// Input is a 4 x 4 square of 3-byte pixels in form R, G, B
				476	// inmask is a 16-bit mask where bit (1 << (x + y * 4)) tells whether the corresponding (x,y)
				477	// pixel is valid or not. Invalid pixel color values are ignored when compressing.
				478	// Output is an ETC1 compressed version of the data.
				479
				480	void etc1_encode_block(const etc1_byte* pIn, etc1_uint32 inMask,
				481	etc1_byte* pOut) {
				482	etc1_byte colors[6];
				483	etc1_byte flippedColors[6];
				484	etc_average_colors_subblock(pIn, inMask, colors, false, false);
				485	etc_average_colors_subblock(pIn, inMask, colors + 3, false, true);
				486	etc_average_colors_subblock(pIn, inMask, flippedColors, true, false);
				487	etc_average_colors_subblock(pIn, inMask, flippedColors + 3, true, true);
				488
				489	etc_compressed a, b;
				490	etc_encode_block_helper(pIn, inMask, colors, &a, false);
				491	etc_encode_block_helper(pIn, inMask, flippedColors, &b, true);
				492	take_best(&a, &b);
				493	writeBigEndian(pOut, a.high);
				494	writeBigEndian(pOut + 4, a.low);
				495	}
				496
				497	// Return the size of the encoded image data (does not include size of PKM header).
				498
				499	etc1_uint32 etc1_get_encoded_data_size(etc1_uint32 width, etc1_uint32 height) {
				500	return (((width + 3) & ~3) * ((height + 3) & ~3)) >> 1;
				501	}
				502
				503	// Encode an entire image.
				504	// pIn - pointer to the image data. Formatted such that the Red component of
				505	// pixel (x,y) is at pIn + pixelSize * x + stride * y + redOffset;
				506	// pOut - pointer to encoded data. Must be large enough to store entire encoded image.
				507
				508	int etc1_encode_image(const etc1_byte* pIn, etc1_uint32 width, etc1_uint32 height,
				509	etc1_uint32 pixelSize, etc1_uint32 stride, etc1_byte* pOut) {
				510	if (pixelSize < 2 \|\| pixelSize > 3) {
				511	return -1;
				512	}
				513	static const unsigned short kYMask[] = { 0x0, 0xf, 0xff, 0xfff, 0xffff };
				514	static const unsigned short kXMask[] = { 0x0, 0x1111, 0x3333, 0x7777,
				515	0xffff };
				516	etc1_byte block[ETC1_DECODED_BLOCK_SIZE];
				517	etc1_byte encoded[ETC1_ENCODED_BLOCK_SIZE];
				518
				519	etc1_uint32 encodedWidth = (width + 3) & ~3;
				520	etc1_uint32 encodedHeight = (height + 3) & ~3;
				521
				522	for (etc1_uint32 y = 0; y < encodedHeight; y += 4) {
				523	etc1_uint32 yEnd = height - y;
				524	if (yEnd > 4) {
				525	yEnd = 4;
				526	}
				527	int ymask = kYMask[yEnd];
				528	for (etc1_uint32 x = 0; x < encodedWidth; x += 4) {
				529	etc1_uint32 xEnd = width - x;
				530	if (xEnd > 4) {
				531	xEnd = 4;
				532	}
				533	int mask = ymask & kXMask[xEnd];
				534	for (etc1_uint32 cy = 0; cy < yEnd; cy++) {
				535	etc1_byte* q = block + (cy * 4) * 3;
				536	const etc1_byte* p = pIn + pixelSize * x + stride * (y + cy);
				537	if (pixelSize == 3) {
				538	memcpy(q, p, xEnd * 3);
				539	} else {
				540	for (etc1_uint32 cx = 0; cx < xEnd; cx++) {
				541	int pixel = (p[1] << 8) \| p[0];
				542	*q++ = convert5To8(pixel >> 11);
				543	*q++ = convert6To8(pixel >> 5);
				544	*q++ = convert5To8(pixel);
				545	p += pixelSize;
				546	}
				547	}
				548	}
				549	etc1_encode_block(block, mask, encoded);
				550	memcpy(pOut, encoded, sizeof(encoded));
				551	pOut += sizeof(encoded);
				552	}
				553	}
				554	return 0;
				555	}
				556
				557	// Decode an entire image.
				558	// pIn - pointer to encoded data.
				559	// pOut - pointer to the image data. Will be written such that the Red component of
				560	// pixel (x,y) is at pIn + pixelSize * x + stride * y + redOffset. Must be
				561	// large enough to store entire image.
				562
				563
				564	int etc1_decode_image(const etc1_byte* pIn, etc1_byte* pOut,
				565	etc1_uint32 width, etc1_uint32 height,
				566	etc1_uint32 pixelSize, etc1_uint32 stride) {
				567	if (pixelSize < 2 \|\| pixelSize > 3) {
				568	return -1;
				569	}
				570	etc1_byte block[ETC1_DECODED_BLOCK_SIZE];
				571
				572	etc1_uint32 encodedWidth = (width + 3) & ~3;
				573	etc1_uint32 encodedHeight = (height + 3) & ~3;
				574
				575	for (etc1_uint32 y = 0; y < encodedHeight; y += 4) {
				576	etc1_uint32 yEnd = height - y;
				577	if (yEnd > 4) {
				578	yEnd = 4;
				579	}
				580	for (etc1_uint32 x = 0; x < encodedWidth; x += 4) {
				581	etc1_uint32 xEnd = width - x;
				582	if (xEnd > 4) {
				583	xEnd = 4;
				584	}
				585	etc1_decode_block(pIn, block);
				586	pIn += ETC1_ENCODED_BLOCK_SIZE;
				587	for (etc1_uint32 cy = 0; cy < yEnd; cy++) {
				588	const etc1_byte* q = block + (cy * 4) * 3;
				589	etc1_byte* p = pOut + pixelSize * x + stride * (y + cy);
				590	if (pixelSize == 3) {
				591	memcpy(p, q, xEnd * 3);
				592	} else {
				593	for (etc1_uint32 cx = 0; cx < xEnd; cx++) {
				594	etc1_byte r = *q++;
				595	etc1_byte g = *q++;
				596	etc1_byte b = *q++;
				597	etc1_uint32 pixel = ((r >> 3) << 11) \| ((g >> 2) << 5) \| (b >> 3);
				598	*p++ = (etc1_byte) pixel;
				599	*p++ = (etc1_byte) (pixel >> 8);
				600	}
				601	}
				602	}
				603	}
				604	}
				605	return 0;
				606	}
				607
				608	static const char kMagic[] = { 'P', 'K', 'M', ' ', '1', '0' };
				609
				610	static const etc1_uint32 ETC1_PKM_FORMAT_OFFSET = 6;
				611	static const etc1_uint32 ETC1_PKM_ENCODED_WIDTH_OFFSET = 8;
				612	static const etc1_uint32 ETC1_PKM_ENCODED_HEIGHT_OFFSET = 10;
				613	static const etc1_uint32 ETC1_PKM_WIDTH_OFFSET = 12;
				614	static const etc1_uint32 ETC1_PKM_HEIGHT_OFFSET = 14;
				615
				616	static const etc1_uint32 ETC1_RGB_NO_MIPMAPS = 0;
				617
				618	static void writeBEUint16(etc1_byte* pOut, etc1_uint32 data) {
				619	pOut[0] = (etc1_byte) (data >> 8);
				620	pOut[1] = (etc1_byte) data;
				621	}
				622
				623	static etc1_uint32 readBEUint16(const etc1_byte* pIn) {
				624	return (pIn[0] << 8) \| pIn[1];
				625	}
				626
				627	// Format a PKM header
				628
				629	void etc1_pkm_format_header(etc1_byte* pHeader, etc1_uint32 width, etc1_uint32 height) {
				630	memcpy(pHeader, kMagic, sizeof(kMagic));
				631	etc1_uint32 encodedWidth = (width + 3) & ~3;
				632	etc1_uint32 encodedHeight = (height + 3) & ~3;
				633	writeBEUint16(pHeader + ETC1_PKM_FORMAT_OFFSET, ETC1_RGB_NO_MIPMAPS);
				634	writeBEUint16(pHeader + ETC1_PKM_ENCODED_WIDTH_OFFSET, encodedWidth);
				635	writeBEUint16(pHeader + ETC1_PKM_ENCODED_HEIGHT_OFFSET, encodedHeight);
				636	writeBEUint16(pHeader + ETC1_PKM_WIDTH_OFFSET, width);
				637	writeBEUint16(pHeader + ETC1_PKM_HEIGHT_OFFSET, height);
				638	}
				639
				640	// Check if a PKM header is correctly formatted.
				641
				642	etc1_bool etc1_pkm_is_valid(const etc1_byte* pHeader) {
				643	if (memcmp(pHeader, kMagic, sizeof(kMagic))) {
				644	return false;
				645	}
				646	etc1_uint32 format = readBEUint16(pHeader + ETC1_PKM_FORMAT_OFFSET);
				647	etc1_uint32 encodedWidth = readBEUint16(pHeader + ETC1_PKM_ENCODED_WIDTH_OFFSET);
				648	etc1_uint32 encodedHeight = readBEUint16(pHeader + ETC1_PKM_ENCODED_HEIGHT_OFFSET);
				649	etc1_uint32 width = readBEUint16(pHeader + ETC1_PKM_WIDTH_OFFSET);
				650	etc1_uint32 height = readBEUint16(pHeader + ETC1_PKM_HEIGHT_OFFSET);
				651	return format == ETC1_RGB_NO_MIPMAPS &&
				652	encodedWidth >= width && encodedWidth - width < 4 &&
				653	encodedHeight >= height && encodedHeight - height < 4;
				654	}
				655
				656	// Read the image width from a PKM header
				657
				658	etc1_uint32 etc1_pkm_get_width(const etc1_byte* pHeader) {
				659	return readBEUint16(pHeader + ETC1_PKM_WIDTH_OFFSET);
				660	}
				661
				662	// Read the image height from a PKM header
				663
				664	etc1_uint32 etc1_pkm_get_height(const etc1_byte* pHeader){
				665	return readBEUint16(pHeader + ETC1_PKM_HEIGHT_OFFSET);
				666	}