summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohnathan Roatch2020-11-21 23:48:12 -0500
committerJohnathan Roatch2020-11-21 23:48:12 -0500
commit08fd8986ecef5a2481b5d485e83ad386c307ce55 (patch)
treefd5d437b0b1d7ea7e23f24fb3ddc352fa1b1709a
downloadjrrtilevq-08fd8986ecef5a2481b5d485e83ad386c307ce55.tar.gz
jrrtilevq-08fd8986ecef5a2481b5d485e83ad386c307ce55.zip
Initial commit
-rw-r--r--jrrtilevq-cli.c222
-rw-r--r--jrrtilevq.h758
-rw-r--r--lodepng.c6410
-rw-r--r--lodepng.h1945
-rw-r--r--makefile20
-rw-r--r--xxhash.h5451
6 files changed, 14806 insertions, 0 deletions
diff --git a/jrrtilevq-cli.c b/jrrtilevq-cli.c
new file mode 100644
index 0000000..3f46cc6
--- /dev/null
+++ b/jrrtilevq-cli.c
@@ -0,0 +1,222 @@
+/* Standard headers that do not require the C runtime */
+#include <stddef.h>
+#include <limits.h>
+#include <float.h>
+#include <stdarg.h>
+#include <stdint.h> // C99
+#include <stdbool.h> // C99
+//#include <iso646.h> // don't use this
+//#include <stdalign.h> // C11
+//#include <stdnoreturn.h> // C11
+
+const char *PROGRAM_NAME = "jrrtilevq";
+const char *USAGE_TEXT =
+ "jrrtilevq - combines similar tiles in PNG file.\n"
+ "\n"
+ "Usage:\n"
+ " jrrtilevq -t 8x8 -n 256 INPUT.png [OUTPUT.png]\n"
+ "\n"
+ "Options:\n"
+ " -h show this help message and exit\n"
+ " -t NxN The width and height of tiles (default 8x8)\n"
+ " -n N reduce image to at least this many tiles (default 256)\n"
+ " -f Match tiles by flipping horizontally and vertically\n"
+;
+
+#define XXH_INLINE_ALL
+#include "xxhash.h"
+#define JRRTILEVQ_HASH_FUNCTION XXH3_64bits
+#define JRRTILEVQ_IMPLEMENTATION
+#include "jrrtilevq.h"
+
+#include "lodepng.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+// Reads files without fseek and ftell capabilities (stdin, named pipes)
+// this does so by using realloc a bunch of times.
+// returns 0 and does *not* set data_ptr if error or zero sized file.
+size_t read_whole_file_without_fseek(uint8_t **data_ptr, FILE *f)
+{
+ uint8_t *data = NULL;
+ size_t length = 0;
+ uint8_t *realloc_data = NULL;
+ // I want a slower exponential growth then something like a simple
+ // "capacity *= 2", so I choose the fibonacci sequence (growth rate of about 1.6).
+ // 4KiB (common memory page size) times the 6th fibonacci number is 32kiB.
+ size_t capacity = 32768;
+ size_t previous_capacity = 20480;
+
+ if (!data_ptr || !f)
+ return 0;
+
+ data = malloc(capacity);
+ if (!data)
+ return 0;
+
+ while (!feof(f)) {
+ length += fread(data + length, sizeof(uint8_t), capacity - length, f);
+ if (ferror(f))
+ goto free_and_return_empty;
+
+ if (length == capacity) {
+ size_t n = capacity;
+ capacity = capacity + previous_capacity;
+ previous_capacity = n;
+ realloc_data = realloc(data, capacity);
+ if (!realloc_data)
+ goto free_and_return_empty;
+ data = realloc_data;
+ }
+ }
+ if (!length)
+ goto free_and_return_empty;
+ // shrink buffer to fit the final size
+ realloc_data = realloc(data, length);
+ if (!realloc_data)
+ goto free_and_return_empty;
+ data = realloc_data;
+
+ *data_ptr = data;
+ return length;
+free_and_return_empty:
+ free(data);
+ return 0;
+}
+
+// returns 0 and does *not* set data_ptr if error or zero sized file.
+size_t read_whole_file(uint8_t **data_ptr, FILE *f)
+{
+ if (!data_ptr || !f)
+ return 0;
+
+ if (fseek(f, 0, SEEK_END) != 0) {
+ // If fseek fails the file cursor is still at the beginning
+ // so no need for rewind(f)
+ return read_whole_file_without_fseek(data_ptr, f);
+ }
+
+ // fseek works, so now do it the simple way
+ size_t length = ftell(f);
+ if (length <= 0)
+ return 0;
+ uint8_t *data = malloc(length);
+ if (!data)
+ return 0;
+ rewind(f);
+ size_t read_length = fread(data, sizeof(uint8_t), length, f);
+ if (read_length != length) {
+ free(data);
+ return 0;
+ }
+
+ *data_ptr = data;
+ return length;
+}
+
+
+int main (int argc, char *argv[])
+{
+ char* input_filename = NULL;
+ char* output_filename = NULL;
+ uint8_t* image_buf;
+ unsigned image_width;
+ unsigned image_height;
+
+ unsigned transforms = 1;
+ unsigned max_number_of_tiles = 256;
+ unsigned tile_width = 8;
+ unsigned tile_height = 8;
+
+ int argn;
+ unsigned parsed_number;
+ char *strtol_ptr;
+ char *strtol_ptr_result;
+ for (argn = 1; argn < argc; ++argn) {
+ if (argv[argn][0] != '-') {
+ break; //assume the rest of arguments are the input and output filenames
+ }
+ switch (argv[argn][1]) {
+ case 'h':
+ fputs(USAGE_TEXT, stdout);
+ return 0;
+ break; case 'f':
+ transforms = 0x0f;
+ break; case 'n':
+ strtol_ptr = argv[argn]+2;
+ if ((*strtol_ptr == '\0') && (argn+1 < argc)) {
+ ++argn;
+ strtol_ptr = argv[argn];
+ }
+ parsed_number = strtoul(strtol_ptr, &strtol_ptr_result, 0);
+ if (strtol_ptr == strtol_ptr_result) {
+ fprintf(stderr, "Failed to parse the -n argument\n");
+ return 1;
+ }
+ max_number_of_tiles = parsed_number;
+ break; case 't':
+ strtol_ptr = argv[argn]+2;
+ if ((*strtol_ptr == '\0') && (argn+1 < argc)) {
+ ++argn;
+ strtol_ptr = argv[argn];
+ }
+ parsed_number = strtoul(strtol_ptr, &strtol_ptr_result, 0);
+ if (strtol_ptr == strtol_ptr_result) {
+ fprintf(stderr, "Failed to parse the width of the -t argument\n");
+ return 1;
+ }
+ tile_width = parsed_number;
+ strtol_ptr = strtol_ptr_result+1;
+ parsed_number = strtoul(strtol_ptr, &strtol_ptr_result, 0);
+ if (strtol_ptr == strtol_ptr_result) {
+ fprintf(stderr, "Failed to parse the height of the -t argument\n");
+ return 1;
+ }
+ tile_height = parsed_number;
+ break; default:
+ fprintf(stderr, "Unreconized argument, sorry\n");
+ return 1;
+ }
+ }
+ size_t input_name_size;
+ char buf[FILENAME_MAX];
+ switch (argc - argn) {
+ case 0:
+ fputs(USAGE_TEXT, stdout);
+ return 0;
+ break; case 1:
+ input_filename = argv[argn];
+ input_name_size = strlen(input_filename);
+ strncpy(buf, argv[argn], FILENAME_MAX-1);
+ buf[input_name_size-4] = '\0'; // remove '.png'
+ strncat(buf, ".out.png", FILENAME_MAX-1);
+ output_filename = buf;
+ break; default:
+ input_filename = argv[argn];
+ output_filename = argv[argn+1];
+ }
+
+ //printf("tile_width: %d, tile_height: %d, max_number_of_tiles: %d, transforms: %d, input_filename: \"%s\", output_filename: \"%s\", number of non option args: %d\n", tile_width, tile_height, max_number_of_tiles, transforms, input_filename, output_filename, argc - argn);
+
+ unsigned lodepng_error;
+
+ lodepng_error = lodepng_decode32_file(&image_buf, &image_width, &image_height, input_filename);
+ if (lodepng_error) {
+ fprintf(stderr, "%s: LodePNG error %u: %s\n", input_filename, lodepng_error, lodepng_error_text(lodepng_error));
+ return 1;
+ }
+
+ jrrtilevq_pad_image(&image_buf, &image_width, &image_height, tile_width, tile_height, 0, 0);
+
+ jrrtilevq(image_buf, image_width, image_height, tile_width, tile_height, max_number_of_tiles, transforms);
+
+ lodepng_error = lodepng_encode32_file(output_filename, image_buf, image_width, image_height);
+ if (lodepng_error) {
+ fprintf(stderr, "%s: LodePNG error %u: %s\n", output_filename, lodepng_error, lodepng_error_text(lodepng_error));
+ return 1;
+ }
+
+ free(image_buf);
+ return 0;
+}
diff --git a/jrrtilevq.h b/jrrtilevq.h
new file mode 100644
index 0000000..1d65a7f
--- /dev/null
+++ b/jrrtilevq.h
@@ -0,0 +1,758 @@
+#ifndef INCLUDE_JRRTILEVQ_H
+#define INCLUDE_JRRTILEVQ_H
+
+#include <stddef.h>
+#include <limits.h>
+#include <float.h>
+#include <stdarg.h>
+#include <stdint.h> // C99
+#include <stdbool.h> // C99
+//#include <iso646.h> // don't use this
+//#include <stdalign.h> // C11
+//#include <stdnoreturn.h> // C11
+
+struct jrrtilevq_tilemap {
+ unsigned int map_w;
+ unsigned int map_h;
+ unsigned int tile_w;
+ unsigned int tile_h;
+ unsigned int n_tiles;
+ unsigned int tiles_cap;
+ void *mem;
+};
+
+int jrrtilevq(uint8_t* image, unsigned int width, unsigned int height, unsigned int tile_w, unsigned int tile_h, unsigned int target_n_tiles, unsigned int allowed_flips);
+
+int jrrtilevq_make_tilemap(struct jrrtilevq_tilemap* tilemap, const uint8_t* image, unsigned int width, unsigned int height, unsigned int tile_w, unsigned int tile_h, unsigned int allowed_transforms);
+
+int jrrtilevq_render_image(uint8_t* image, unsigned int width, unsigned int height, struct jrrtilevq_tilemap tilemap);
+
+void jrrtilevq_free(struct jrrtilevq_tilemap* tilemap);
+
+int jrrtilevq_pad_image(uint8_t** image, unsigned int *width, unsigned int *height, unsigned int tile_w, unsigned int tile_h, int offset_x, int offset_y);
+
+int jrrtilevq_normalize_flip(uint8_t *tile, unsigned int w, unsigned int h, unsigned int allowed_transforms);
+
+#ifdef JRRTILEVQ_IMPLEMENTATION
+
+
+#ifndef JRRTILEVQ_HASH_FUNCTION
+// http://www.isthe.com/chongo/tech/comp/fnv/index.html
+static uint64_t jrrtilevq_fnv1a(const void* data, size_t len)
+{
+ uint64_t hash = 0xcbf29ce484222325;
+ for (size_t i = 0; i < len; ++i) {
+ hash ^= ((const uint8_t*)data)[i];
+ hash *= 0x00000100000001B3;
+ }
+ return hash;
+}
+#define JRRTILEVQ_HASH_FUNCTION jrrtilevq_fnv1a
+#endif
+
+#include <stdlib.h> // malloc, free
+#include <string.h> // memcpy, memcmp
+
+int jrrtilevq_pad_image(uint8_t** image, unsigned int *width, unsigned int *height, unsigned int tile_w, unsigned int tile_h, int offset_x, int offset_y)
+{
+// if (!image || !(*image) || !width || !(*width) || !height || !(*height) || !tile_w || !tile_h)
+// return -1;
+ if (!image || !(*image))
+ return 1;
+
+ int old_width = *width;
+ int old_height = *height;
+ int left_pad = (offset_x < 0) ? (offset_x % tile_w) + tile_w : offset_x % tile_w;
+ int top_pad = (offset_y < 0) ? (offset_y % tile_h) + tile_h : offset_y % tile_h;
+ int new_width = old_width + left_pad;
+ int new_height = old_height + top_pad;
+ new_width = ((new_width + tile_w - 1) / tile_w) * tile_w;
+ new_height = ((new_height + tile_h - 1) / tile_h) * tile_h;
+
+ if ((old_width == new_width) && (old_height == new_height))
+ return 0;
+
+ uint8_t* new_image = calloc(new_width * new_height, sizeof(uint32_t));
+ if (!new_image)
+ return 1;
+
+ uint32_t* src = (uint32_t*)(*image);
+ uint32_t* dst = (uint32_t*)new_image;
+ for (int y = top_pad; y < top_pad + old_height; ++y) {
+ memcpy(dst + (y * new_width) + left_pad, src, old_width * sizeof(uint32_t));
+ src += old_width;
+ }
+
+ free(*image);
+ *image = new_image;
+ *width = new_width;
+ *height = new_height;
+ return 0;
+}
+
+static int jrrtilevq_mem_size(struct jrrtilevq_tilemap tilemap)
+{
+ // palette is limited to 255 colors and the don't care color,
+ // becasue the algorithm is not well suited for true-color images.
+ int palette_size = sizeof(uint32_t) * 256;
+ int map_size = tilemap.map_w * tilemap.map_h * sizeof(uint64_t);
+ int map_attr_size = tilemap.map_w * tilemap.map_h * sizeof(uint8_t);
+ int tiles_size = tilemap.tile_w * tilemap.tile_h * tilemap.tiles_cap;
+ int counts_size = tilemap.tiles_cap * sizeof(int);
+ return palette_size + map_size + map_attr_size + tiles_size + counts_size;
+}
+
+void jrrtilevq_free(struct jrrtilevq_tilemap* tilemap)
+{
+ if(!tilemap)
+ return;
+ free(tilemap->mem);
+ memset(tilemap, 0x00, sizeof(*tilemap));
+}
+
+static uint64_t jrrtilevq_hashtable_add(struct jrrtilevq_tilemap* tilemap, const uint8_t* tile_buf, int n)
+{
+ if (!tilemap)
+ return 0;
+
+ int tile_size = tilemap->tile_w * tilemap->tile_h;
+ uint64_t tile_hash = JRRTILEVQ_HASH_FUNCTION(tile_buf, tile_size);
+
+ uint32_t* palette = (uint32_t*)(tilemap->mem);
+ uint64_t* map = (uint64_t*)(palette + 256);
+ uint8_t* map_attr = (uint8_t*)(map + (tilemap->map_w * tilemap->map_h));
+ uint8_t* tiles = (uint8_t*)(map_attr + (tilemap->map_w * tilemap->map_h));
+ unsigned int* counts = (unsigned int*)(tiles + (tilemap->tiles_cap * tile_size));
+
+ int tile_slot = tile_hash % tilemap->tiles_cap;
+ while (counts[tile_slot] && memcmp(tiles + (tile_slot * tile_size), tile_buf, tile_size)) {
+ tile_slot = (tile_slot + 1) % tilemap->tiles_cap;
+ }
+ if (!counts[tile_slot]) {
+ memcpy(tiles + (tile_slot * tile_size), tile_buf, tile_size);
+ tilemap->n_tiles += 1;
+ }
+ counts[tile_slot] += n;
+
+ return tile_hash;
+}
+
+static unsigned int jrrtilevq_hashtable_remove(struct jrrtilevq_tilemap* tilemap, const uint8_t* tile_buf)
+{
+ if (!tilemap)
+ return 0;
+
+ int tile_size = tilemap->tile_w * tilemap->tile_h;
+
+ uint32_t* palette = (uint32_t*)(tilemap->mem);
+ uint64_t* map = (uint64_t*)(palette + 256);
+ uint8_t* map_attr = (uint8_t*)(map + (tilemap->map_w * tilemap->map_h));
+ uint8_t* tiles = (uint8_t*)(map_attr + (tilemap->map_w * tilemap->map_h));
+ unsigned int* counts = (unsigned int*)(tiles + (tilemap->tiles_cap * tile_size));
+
+ uint64_t tile_hash = JRRTILEVQ_HASH_FUNCTION(tile_buf, tile_size);
+ unsigned int tile_slot = tile_hash % tilemap->tiles_cap;
+ while (counts[tile_slot] && memcmp(tiles + (tile_slot * tile_size), tile_buf, tile_size)) {
+ tile_slot = (tile_slot + 1) % tilemap->tiles_cap;
+ }
+
+ unsigned int amount_removed = counts[tile_slot];
+ if (!amount_removed)
+ return 0;
+
+ unsigned int removed_slot = tile_slot;
+ unsigned int new_slot;
+
+ counts[removed_slot] = 0;
+ while (1) {
+ tile_slot = (tile_slot + 1) % tilemap->tiles_cap;
+ if (!counts[tile_slot])
+ break;
+ tile_hash = JRRTILEVQ_HASH_FUNCTION(tiles + (tile_slot * tile_size), tile_size);
+ new_slot = tile_hash % tilemap->tiles_cap;
+ if ( (removed_slot <= tile_slot)
+ ? ((removed_slot < new_slot) && (new_slot <= tile_slot))
+ : ((removed_slot < new_slot) || (new_slot <= tile_slot))
+ ) {
+ continue;
+ }
+ memcpy(tiles + (removed_slot * tile_size), tiles + (tile_slot * tile_size), tile_size);
+ counts[removed_slot] = counts[tile_slot];
+ removed_slot = tile_slot;
+ counts[removed_slot] = 0;
+ }
+
+ --(tilemap->n_tiles);
+
+ return amount_removed;
+}
+
+static void jrrtilevq_resize_hashtable(struct jrrtilevq_tilemap* tilemap, int new_tiles_cap)
+{
+ if (!tilemap)
+ return;
+
+ struct jrrtilevq_tilemap new_tilemap;
+ int tile_size = tilemap->tile_w * tilemap->tile_h;
+
+ new_tilemap.map_w = tilemap->map_w;
+ new_tilemap.map_h = tilemap->map_h;
+ new_tilemap.tile_w = tilemap->tile_w;
+ new_tilemap.tile_h = tilemap->tile_h;
+ new_tilemap.n_tiles = 0;
+ new_tilemap.tiles_cap = new_tiles_cap;
+
+ size_t new_mem_size = jrrtilevq_mem_size(new_tilemap);
+ new_tilemap.mem = calloc(new_mem_size, sizeof(uint8_t));
+ if (!new_tilemap.mem)
+ return;
+
+ uint32_t* palette = (uint32_t*)(tilemap->mem);
+ uint64_t* map = (uint64_t*)(palette + 256);
+ uint8_t* map_attr = (uint8_t*)(map + (tilemap->map_w * tilemap->map_h));
+ uint8_t* tiles = (uint8_t*)(map_attr + (tilemap->map_w * tilemap->map_h));
+ unsigned int* counts = (unsigned int*)(tiles + (tilemap->tiles_cap * tile_size));
+
+ size_t pal_and_map_size = (256 * sizeof(uint32_t)) + (tilemap->map_w * tilemap->map_h * sizeof(uint64_t));
+
+ memcpy(new_tilemap.mem, tilemap->mem, pal_and_map_size);
+
+ //uint32_t* new_palette = (uint32_t*)(new_tilemap.mem);
+ //uint64_t* new_map = (uint64_t*)(new_palette + 256);
+ //uint8_t* new_map_attr = (uint8_t*)(map + (new_tilemap.map_w * new_tilemap.map_h));
+ //uint8_t* new_tiles = (uint8_t*)(new_map_attr + (new_tilemap.map_w * new_tilemap.map_h));
+ //int* new_counts = (unsigned int*)(new_tiles + (new_tilemap.tiles_cap * tile_size));
+
+ unsigned int slot;
+ for (slot = 0; slot < tilemap->tiles_cap; ++slot) {
+ if (counts[slot]) {
+ jrrtilevq_hashtable_add(&new_tilemap, tiles + (slot * tile_size), counts[slot]);
+ }
+ }
+
+ free(tilemap->mem);
+ tilemap->mem = new_tilemap.mem;
+ tilemap->n_tiles = new_tilemap.n_tiles;
+ tilemap->tiles_cap = new_tilemap.tiles_cap;
+}
+
+
+static void jrrtilevq_map_search_and_replace(struct jrrtilevq_tilemap* tilemap, uint64_t tile_m_hash, uint64_t tile_a_hash, uint64_t tile_b_hash, unsigned int t_a, unsigned int t_b)
+{
+ if (!tilemap)
+ return;
+ unsigned int map_w = tilemap->map_w;
+ unsigned int map_h = tilemap->map_h;
+
+ //int tile_size = tilemap->tile_w * tilemap->tile_h;
+
+ uint32_t* palette = (uint32_t*)(tilemap->mem);
+ uint64_t* map = (uint64_t*)(palette + 256);
+ uint8_t* map_attr = (uint8_t*)(map + (tilemap->map_w * tilemap->map_h));
+ //uint8_t* tiles = (uint8_t*)(map_attr + (tilemap->map_w * tilemap->map_h));
+ //unsigned int* counts = (unsigned int*)(tiles + (tilemap->tiles_cap * tile_size));
+
+ unsigned int tx, ty, temp;
+ for (ty = 0; ty < map_h; ++ty) {
+ for (tx = 0; tx < map_w; ++tx) {
+ if (map[ty * map_w + tx] == tile_a_hash) {
+ map[ty * map_w + tx] = tile_m_hash;
+ temp = map_attr[ty * map_w + tx];
+ if (temp & 4) {
+ // if source was transposed,
+ // then applying horizontal flips behaves
+ // like vertical flips and vice versa
+ temp ^= ((t_a & 1) << 1) | ((t_a & 2) >> 1) | (t_a & 4);
+ } else {
+ temp ^= t_a;
+ }
+ map_attr[ty * map_w + tx] = temp;
+ } else if (map[ty * map_w + tx] == tile_b_hash) {
+ map[ty * map_w + tx] = tile_m_hash;
+ temp = map_attr[ty * map_w + tx];
+ if (temp & 4) {
+ temp ^= ((t_b & 1) << 1) | ((t_b & 2) >> 1) | (t_b & 4);
+ } else {
+ temp ^= t_b;
+ }
+ map_attr[ty * map_w + tx] = temp;
+ }
+ }
+ }
+}
+
+
+static unsigned int jrrtilevq_tile_coords(unsigned int x, unsigned int y, unsigned int w, unsigned int h, unsigned int t, bool in_transform)
+{
+ unsigned int temp;
+ if ((in_transform) && (t & 4)) {
+ temp = x;
+ x = y;
+ y = temp;
+ }
+ if (t & 1)
+ x = w - x - 1;
+ if (t & 2)
+ y = h - y - 1;
+ if ((!in_transform) && (t & 4)) {
+ temp = x;
+ x = y;
+ y = temp;
+ }
+ return y * w + x;
+}
+
+// modifies tile, and returns the type of flip used.
+int jrrtilevq_normalize_flip(uint8_t *tile, unsigned int w, unsigned int h, unsigned int allowed_transforms)
+{
+ if (!tile || !w || !h)
+ return 0;
+ unsigned int x, y, i, i2, t, p, min_index;
+ unsigned int canidate_transforms = allowed_transforms | 1;
+ if (allowed_transforms == 1)
+ return 0;
+ uint8_t tile_copy[w*h];
+ memcpy(tile_copy, tile, w*h);
+ for (y = 0; y < h; ++y) {
+ for (x = 0; x < w; ++x) {
+ min_index = 256;
+ p = 0;
+ for (t = 0; t < 8; ++t) {
+ if (canidate_transforms & (1<<t)) {
+ i = jrrtilevq_tile_coords(x, y, w, h, t, 1);
+ if (tile_copy[i] < min_index)
+ min_index = tile_copy[i];
+ ++p;
+ }
+ }
+ if (p <= 1)
+ goto found_transform;
+ for (t = 0; t < 8; ++t) {
+ if (canidate_transforms & (1<<t)) {
+ i = jrrtilevq_tile_coords(x, y, w, h, t, 1);
+ if (min_index < tile_copy[i])
+ canidate_transforms &= ~(1<<t);
+ }
+ }
+ }
+ }
+found_transform:
+ for (t = 0; t < 8; ++t) {
+ if (canidate_transforms & (1<<t))
+ break;
+ }
+ for (y = 0; y < h; ++y) {
+ for (x = 0; x < w; ++x) {
+ i = y * w + x;
+ i2 = jrrtilevq_tile_coords(x, y, w, h, t, 1);
+ tile[i] = tile_copy[i2];
+ }
+ }
+ return t;
+}
+
+static unsigned int jrrtilevq_minimum_changed_pixels(const uint8_t *tile_a, const uint8_t *tile_b, unsigned int w, unsigned int h, unsigned int counts_a, unsigned int counts_b, unsigned int* t_a, unsigned int* t_b, unsigned int allowed_transforms)
+{
+ unsigned int x, y, i1, i2, t1, t2;
+ unsigned int count = 0;
+ allowed_transforms |= 1;
+ unsigned int minimum_count = UINT_MAX;
+ unsigned int min_t_a = 0;
+ unsigned int min_t_b = 0;
+ for (t1 = 0; t1 < 8; ++t1) {
+ if (!(1<<t1 & allowed_transforms))
+ continue;
+ for (t2 = 0; t2 < 8; ++t2) {
+ if (!(1<<t2 & allowed_transforms))
+ continue;
+ count = 0;
+ for (y = 0; y < h; ++y) {
+ for (x = 0; x < w; ++x) {
+ i1 = jrrtilevq_tile_coords(x, y, w, h, t1, 1);
+ i2 = jrrtilevq_tile_coords(x, y, w, h, t2, 1);
+ if ((tile_a[i1] == tile_b[i2]) || !tile_a[i1] || !tile_b[i2])
+ continue;
+ if (tile_a[i1] != 0xff)
+ count += counts_a;
+ if (tile_b[i2] != 0xff)
+ count += counts_b;
+ }
+ }
+ if (count < minimum_count) {
+ minimum_count = count;
+ min_t_a = t1;
+ min_t_b = t2;
+ }
+ }
+ }
+/* if ((minimum_count <=10) && ((min_t_a == 5) || (min_t_b == 5) || (min_t_a == 6) || (min_t_b == 6))) {
+ for (y = 0; y < h; ++y) {
+ for (x = 0; x < w; ++x) {
+ printf("%02x ", tile_a[y * w + x]);
+ }
+ printf(" ");
+ for (x = 0; x < w; ++x) {
+ printf("%02x ", tile_b[y * w + x]);
+ }
+ printf("\n");
+ }
+ printf("t_a: %d, t_b: %d, counts_a: %d, counts_b: %d, minimum_count: %d\n", min_t_a, min_t_b, counts_a, counts_b, minimum_count);
+ for (y = 0; y < h; ++y) {
+ for (x = 0; x < w; ++x) {
+ i1 = jrrtilevq_tile_coords(x, y, w, h, min_t_a, 1);
+ printf("%02x ", tile_a[i1]);
+ }
+ printf(" ");
+ for (x = 0; x < w; ++x) {
+ i2 = jrrtilevq_tile_coords(x, y, w, h, min_t_b, 1);
+ printf("%02x ", tile_b[i2]);
+ }
+ printf(" ");
+ for (x = 0; x < w; ++x) {
+ i1 = jrrtilevq_tile_coords(x, y, w, h, min_t_a, 1);
+ i2 = jrrtilevq_tile_coords(x, y, w, h, min_t_b, 1);
+ if (tile_a[i1] != tile_b[i2])
+ printf("xx ");
+ else
+ printf(".. ");
+ }
+ printf("\n");
+ }
+ getchar();
+ }*/
+ *t_a = min_t_a;
+ *t_b = min_t_b;
+ return minimum_count;
+}
+
+
+// Convert a randomized 64 bit int to a double in the range 0.0 <= x < 1.0
+// Idea stolen from https://github.com/mattiasgustavsson/libs/blob/main/rnd.h
+static double float_from_u64(uint64_t value)
+{
+ uint64_t exponent = 0x3ff;
+ uint64_t mantissa = value >> 12;
+ uint64_t result = ( exponent << 52 ) | mantissa;
+ double fresult;
+ memcpy(&fresult, &result, sizeof(double));
+ return fresult - 1.0;
+}
+
+static void jrrtilevq_make_diff_tile(uint8_t* merged_tile, const uint8_t* tile_a, const uint8_t* tile_b, unsigned int w, unsigned int h, unsigned int t_a, unsigned int t_b, double bias)
+{
+ unsigned int x, y, i1, i2, c;
+ uint64_t rand = JRRTILEVQ_HASH_FUNCTION(tile_a, w*h);
+ rand ^= JRRTILEVQ_HASH_FUNCTION(tile_b, w*h);
+ rand |= 1;
+ for (y = 0; y < h; ++y) {
+ for (x = 0; x < w; ++x) {
+ i1 = jrrtilevq_tile_coords(x, y, w, h, t_a, 1);
+ i2 = jrrtilevq_tile_coords(x, y, w, h, t_b, 1);
+ // if either index is 0 then take the other index
+ // if both are not 0 and also not equal then set the merge flag 0xff.
+ c = tile_a[i1];
+ //if (tile_a[i1] != tile_b[i2])
+ // c = 0xff;
+ if (tile_a[i1] != tile_b[i2]) {
+ //printf("%f\n", float_from_u64(rand));
+ if (float_from_u64(rand) < bias)
+ c = tile_a[i1];
+ else
+ c = tile_b[i2];
+ // random generator is xorshift64
+ rand ^= rand << 13;
+ rand ^= rand >> 7;
+ rand ^= rand << 17;
+ }
+ if (!tile_a[i1])
+ c = tile_b[i2];
+ if (!tile_b[i2])
+ c = tile_a[i1];
+ merged_tile[y * w + x] = c;
+ }
+ }
+}
+
+
+int jrrtilevq_make_tilemap(struct jrrtilevq_tilemap* tilemap, const uint8_t* image, unsigned int width, unsigned int height, unsigned int tile_w, unsigned int tile_h, unsigned int allowed_transforms)
+{
+ if (!tilemap || !image)
+ return 1;
+
+ struct jrrtilevq_tilemap tm = {0};
+
+ uint8_t tile_buf[tile_w * tile_h];
+
+ unsigned int map_w = width / tile_w;
+ unsigned int map_h = height / tile_h;
+ unsigned int tiles_cap = 1024;
+
+ tm.map_w = map_w;
+ tm.map_h = map_h;
+ tm.tile_w = tile_w;
+ tm.tile_h = tile_h;
+ tm.tiles_cap = tiles_cap;
+ size_t mem_size = jrrtilevq_mem_size(tm);
+
+ tm.mem = calloc(mem_size, sizeof(uint8_t));
+ if (!tm.mem)
+ return 1;
+
+ uint32_t* palette = (uint32_t*)(tm.mem);
+ uint64_t* map = (uint64_t*)(palette + 256);
+ uint8_t* map_attr = (uint8_t*)(map + (map_w * map_h));
+ //uint8_t* tiles = (uint8_t*)(map_attr + (map_w * map_h));
+ //int* counts = (unsigned int*)(tiles + tiles_cap);
+
+ unsigned int ty, tx, y, x, pal_index, pal_used, tansform;
+ uint32_t pixel;
+ uint64_t tile_hash;
+ pal_used = 1; // reserve don't care color
+ palette[0] = 0xffff00ff;
+ palette[255] = 0xff0000ff;
+ for (ty = 0; ty < map_h; ++ty) {
+ for (tx = 0; tx < map_w; ++tx) {
+ for (y = 0; y < tile_h; ++y) {
+ for (x = 0; x < tile_w; ++x) {
+ pixel = ((uint32_t*)(image))[(ty * (tile_h * map_w * tile_w)) + (y * (map_w * tile_w)) + (tx * tile_w) + x];
+ pal_index = 0;
+ if ((pixel & 0xff000000) >= 0x80000000) {
+ for (pal_index = 0; pal_index < pal_used; ++pal_index) {
+ if(pixel == palette[pal_index]) {
+ break;
+ }
+ }
+ if (pal_index == pal_used) {
+ if (pal_used >= 255) {
+ // abort if there's more then 255 indexes.
+ // the 255'th index is more internal marking
+ free(tm.mem);
+ return 1;
+ }
+ palette[pal_index] = pixel;
+ ++pal_used;
+ }
+ }
+ tile_buf[y * tile_w + x] = pal_index;
+ }
+ }
+ tansform = jrrtilevq_normalize_flip(tile_buf, tile_w, tile_h, allowed_transforms);
+ tile_hash = jrrtilevq_hashtable_add(&tm, tile_buf, 1);
+ map[ty * map_w + tx] = tile_hash;
+ map_attr[ty * map_w + tx] = tansform;
+ //printf("%d", tansform);
+ if ((tm.n_tiles * 5) > (tm.tiles_cap * 4)) {
+ // resize the hash table when >%80 full
+ jrrtilevq_resize_hashtable(&tm, tm.tiles_cap * 2);
+ palette = (uint32_t*)(tm.mem);
+ map = (uint64_t*)(palette + 256);
+ map_attr = (uint8_t*)(map + (map_w * map_h));
+ }
+ }
+ //printf("\n");
+ }
+
+ memcpy(tilemap, &tm, sizeof(tm));
+ return 0;
+}
+
+int jrrtilevq_render_image(uint8_t* image, unsigned int width, unsigned int height, struct jrrtilevq_tilemap tilemap)
+{
+ if ((!image) || (width < (tilemap.map_w * tilemap.tile_w)) || (height < (tilemap.map_h * tilemap.tile_h)))
+ return 1;
+
+ memset(image, 0x00, width * height * sizeof(uint32_t));
+
+ int tile_size = tilemap.tile_w * tilemap.tile_h;
+
+ uint32_t* palette = (uint32_t*)(tilemap.mem);
+ uint64_t* map = (uint64_t*)(palette + 256);
+ uint8_t* map_attr = (uint8_t*)(map + (tilemap.map_w * tilemap.map_h));
+ uint8_t* tiles = (uint8_t*)(map_attr + (tilemap.map_w * tilemap.map_h));
+ unsigned int* counts = (unsigned int*)(tiles + (tilemap.tiles_cap * tile_size));
+
+ int map_w = tilemap.map_w;
+ int map_h = tilemap.map_h;
+ int tile_w = tilemap.tile_w;
+ int tile_h = tilemap.tile_h;
+ int tiles_cap = tilemap.tiles_cap;
+
+ int tx, ty, x, y, i, pal_index, t;
+ uint32_t pixel;
+ int tile_slot;
+ uint64_t tile_entry, tile_hash;
+ for (ty = 0; ty < map_h; ++ty) {
+ for (tx = 0; tx < map_w; ++tx) {
+ tile_entry = map[ty * map_w + tx];
+ tile_slot = tile_entry % tiles_cap;
+ tile_hash = JRRTILEVQ_HASH_FUNCTION(tiles + (tile_slot * tile_size), tile_size);
+ while ((tile_hash != tile_entry) && counts[tile_slot]) {
+ tile_slot = (tile_slot + 1) % tiles_cap;
+ tile_hash = JRRTILEVQ_HASH_FUNCTION(tiles + (tile_slot * tile_size), tile_size);
+ }
+ if (!counts[tile_slot]) {
+ pixel = 0xffff00ff;
+ for (y = 0; y < tile_h; ++y) {
+ for (x = 0; x < tile_w; ++x) {
+ ((uint32_t*)(image))[(ty * (tile_h * width)) + (y * width) + (tx * tile_w) + x] = pixel;
+ }
+ }
+ continue;
+ }
+ t = map_attr[ty * map_w + tx];
+ for (y = 0; y < tile_h; ++y) {
+ for (x = 0; x < tile_w; ++x) {
+ i = jrrtilevq_tile_coords(x, y, tile_w, tile_h, t, 0);
+ pal_index = tiles[(tile_slot * tile_size) + i];
+ pixel = palette[pal_index];
+ //if ((i < 4) || (i == tile_w))
+ // pixel = 0xff00ff00;
+ ((uint32_t*)(image))[(ty * (tile_h * width)) + (y * width) + (tx * tile_w) + x] = pixel;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*#include <stdio.h>
+void debug_render_and_wait(struct jrrtilevq_tilemap tilemap)
+{
+ static int frame_no = 0;
+ char filename_buffer[256];
+
+ printf("map_w: %d, map_h: %d, tile_w: %d, tile_h: %d\n", tilemap.map_w, tilemap.map_h, tilemap.tile_w, tilemap.tile_h);
+
+ unsigned int image_width = tilemap.map_w * tilemap.tile_w;
+ unsigned int image_height = tilemap.map_h * tilemap.tile_h;
+ uint8_t* image = malloc(image_width * image_height * sizeof(uint32_t));
+ jrrtilevq_render_image(image, image_width, image_height, tilemap);
+ snprintf(filename_buffer, 256, "__debug_frames/%04d.png", frame_no);
+
+ lodepng_encode32_file(filename_buffer, image, image_width, image_height);
+ free(image);
+ printf("cap: %d, n: %d\n", tilemap.tiles_cap, tilemap.n_tiles);
+ ++frame_no;
+ getchar();
+}*/
+
+
+int jrrtilevq(uint8_t* image, unsigned int width, unsigned int height, unsigned int tile_w, unsigned int tile_h, unsigned int target_n_tiles, unsigned int allowed_transforms)
+{
+ struct jrrtilevq_tilemap tilemap;
+
+ if (tile_w != tile_h)
+ allowed_transforms &= 0x0f; // disable matrix translation if not square.
+ allowed_transforms |= 1; // always allow identity of course.
+
+ //printf("allowed_transforms: %d\n", allowed_transforms);
+
+ if(jrrtilevq_make_tilemap(&tilemap, image, width, height, tile_w, tile_h, allowed_transforms))
+ return 1;
+
+ int tile_size = tile_w * tile_h;
+ uint8_t tile_a_buf[tile_size];
+ uint8_t tile_b_buf[tile_size];
+ uint8_t tile_m_buf[tile_size];
+
+ uint32_t* palette = (uint32_t*)(tilemap.mem);
+ uint64_t* map = (uint64_t*)(palette + 256);
+ uint8_t* map_attr = (uint8_t*)(map + (tilemap.map_w * tilemap.map_h));
+ uint8_t* tiles = (uint8_t*)(map_attr + (tilemap.map_w * tilemap.map_h));
+ unsigned int* counts = (unsigned int*)(tiles + (tilemap.tiles_cap * tile_size));
+
+ unsigned int d;
+ unsigned int d_threshold = 2;
+ unsigned int t_a, t_b;
+
+// int max_count = 0;
+ unsigned int last_changed_slot = 0;
+/* for (unsigned int slot = 0; slot < tilemap.tiles_cap; ++slot) {
+ // find the "background tile" so that most of the dont care pixels
+ // will match to it.
+ if (!counts[slot])
+ continue;
+ if (max_count < counts[slot]) {
+ max_count = counts[slot];
+ last_changed_slot = slot;
+ }
+ }
+ printf("max_count: %d, last_changed_slot %d\n", max_count, last_changed_slot);*/
+
+ unsigned int d_min = UINT_MAX;
+
+ //debug_render_and_wait(tilemap);
+
+ while (target_n_tiles < tilemap.n_tiles) {
+ //printf("Current number of tiles: %d\n", tilemap.n_tiles);
+reset_pass:
+ d_min = UINT_MAX;
+ for (unsigned int slot_a_count = 0; slot_a_count < tilemap.tiles_cap; ++slot_a_count) {
+ unsigned int slot_a = (slot_a_count + last_changed_slot) % tilemap.tiles_cap;
+ if (!counts[slot_a])
+ continue;
+ for (unsigned int slot_b_count = slot_a_count+1; slot_b_count < tilemap.tiles_cap; ++slot_b_count) {
+ unsigned int slot_b = (slot_b_count + last_changed_slot) % tilemap.tiles_cap;
+ if (!counts[slot_b])
+ continue;
+ if ((counts[slot_a] > d_threshold) && (counts[slot_b] > d_threshold))
+ continue;
+ d = jrrtilevq_minimum_changed_pixels(tiles + (slot_a * tile_size), tiles + (slot_b * tile_size), tile_w, tile_h, counts[slot_a], counts[slot_b], &t_a, &t_b, allowed_transforms);
+ //d *= counts[slot_a] + counts[slot_b];
+ if (d < d_min)
+ d_min = d;
+ if (d <= d_threshold) {
+ //printf("slot_a: %d, slot_b: %d, d: %d, d_min: %d, d_threshold: %d, t_a: %d, t_b: %d, counts[a]: %d, counts[b]: %d\n", slot_a, slot_b, d, d_min, d_threshold, t_a, t_b, counts[slot_a], counts[slot_b]);
+ memcpy(tile_a_buf, tiles + (slot_a * tile_size), tile_size);
+ memcpy(tile_b_buf, tiles + (slot_b * tile_size), tile_size);
+ uint64_t tile_a_hash = JRRTILEVQ_HASH_FUNCTION(tile_a_buf, tile_size);
+ uint64_t tile_b_hash = JRRTILEVQ_HASH_FUNCTION(tile_b_buf, tile_size);
+ double bias = (double)counts[slot_b] / (double)(counts[slot_a] + counts[slot_b]);
+ jrrtilevq_make_diff_tile(tile_m_buf, tile_a_buf, tile_b_buf, tile_w, tile_h, t_a, t_b, bias);
+ unsigned int t_n = jrrtilevq_normalize_flip(tile_m_buf, tile_w, tile_h, allowed_transforms);
+ if (t_a & 4) {
+ t_a ^= ((t_n & 1) << 1) | ((t_n & 2) >> 1) | (t_n & 4);
+ } else {
+ t_a ^= t_n;
+ }
+ if (t_b & 4) {
+ t_b ^= ((t_n & 1) << 1) | ((t_n & 2) >> 1) | (t_n & 4);
+ } else {
+ t_b ^= t_n;
+ }
+ unsigned int tile_m_count = 0;
+ tile_m_count += jrrtilevq_hashtable_remove(&tilemap, tile_a_buf);
+ tile_m_count += jrrtilevq_hashtable_remove(&tilemap, tile_b_buf);
+ uint64_t tile_m_hash = jrrtilevq_hashtable_add(&tilemap, tile_m_buf, tile_m_count);
+
+ jrrtilevq_map_search_and_replace(&tilemap, tile_m_hash, tile_a_hash, tile_b_hash, t_a, t_b);
+
+ palette = (uint32_t*)(tilemap.mem);
+ map = (uint64_t*)(palette + 256);
+ map_attr = (uint8_t*)(map + (tilemap.map_w * tilemap.map_h));
+ tiles = (uint8_t*)(map_attr + (tilemap.map_w * tilemap.map_h));
+ counts = (unsigned int*)(tiles + (tilemap.tiles_cap * tile_size));
+
+ //debug_render_and_wait(tilemap);
+
+ // slot_a has been removed, so break out of the slot_b loop
+ // to continue.
+ if (tilemap.n_tiles <= target_n_tiles)
+ goto done;
+ last_changed_slot = slot_a;
+ goto reset_pass;
+ }
+ }
+ }
+ d_threshold = d_min;
+ }
+done:
+
+ jrrtilevq_render_image(image, width, height, tilemap);
+ jrrtilevq_free(&tilemap);
+ return 0;
+}
+
+#endif // JRRTILEVQ_IMPLEMENTATION
+#endif // INCLUDE_JRRTILEVQ_H
diff --git a/lodepng.c b/lodepng.c
new file mode 100644
index 0000000..ee8cf33
--- /dev/null
+++ b/lodepng.c
@@ -0,0 +1,6410 @@
+/*
+LodePNG version 20200306
+
+Copyright (c) 2005-2020 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+
+ 3. This notice may not be removed or altered from any source
+ distribution.
+*/
+
+/*
+The manual and changelog are in the header file "lodepng.h"
+Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for C.
+*/
+
+#include "lodepng.h"
+
+#ifdef LODEPNG_COMPILE_DISK
+#include <limits.h> /* LONG_MAX */
+#include <stdio.h> /* file handling */
+#endif /* LODEPNG_COMPILE_DISK */
+
+#ifdef LODEPNG_COMPILE_ALLOCATORS
+#include <stdlib.h> /* allocations */
+#endif /* LODEPNG_COMPILE_ALLOCATORS */
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1310) /*Visual Studio: A few warning types are not desired here.*/
+#pragma warning( disable : 4244 ) /*implicit conversions: not warned by gcc -Wall -Wextra and requires too much casts*/
+#pragma warning( disable : 4996 ) /*VS does not like fopen, but fopen_s is not standard C so unusable here*/
+#endif /*_MSC_VER */
+
+const char* LODEPNG_VERSION_STRING = "20200306";
+
+/*
+This source file is built up in the following large parts. The code sections
+with the "LODEPNG_COMPILE_" #defines divide this up further in an intermixed way.
+-Tools for C and common code for PNG and Zlib
+-C Code for Zlib (huffman, deflate, ...)
+-C Code for PNG (file format chunks, adam7, PNG filters, color conversions, ...)
+-The C++ wrapper around all of the above
+*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // Tools for C, and common code for PNG and Zlib. // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*The malloc, realloc and free functions defined here with "lodepng_" in front
+of the name, so that you can easily change them to others related to your
+platform if needed. Everything else in the code calls these. Pass
+-DLODEPNG_NO_COMPILE_ALLOCATORS to the compiler, or comment out
+#define LODEPNG_COMPILE_ALLOCATORS in the header, to disable the ones here and
+define them in your own project's source files without needing to change
+lodepng source code. Don't forget to remove "static" if you copypaste them
+from here.*/
+
+#ifdef LODEPNG_COMPILE_ALLOCATORS
+static void* lodepng_malloc(size_t size) {
+#ifdef LODEPNG_MAX_ALLOC
+ if(size > LODEPNG_MAX_ALLOC) return 0;
+#endif
+ return malloc(size);
+}
+
+/* NOTE: when realloc returns NULL, it leaves the original memory untouched */
+static void* lodepng_realloc(void* ptr, size_t new_size) {
+#ifdef LODEPNG_MAX_ALLOC
+ if(new_size > LODEPNG_MAX_ALLOC) return 0;
+#endif
+ return realloc(ptr, new_size);
+}
+
+static void lodepng_free(void* ptr) {
+ free(ptr);
+}
+#else /*LODEPNG_COMPILE_ALLOCATORS*/
+/* TODO: support giving additional void* payload to the custom allocators */
+void* lodepng_malloc(size_t size);
+void* lodepng_realloc(void* ptr, size_t new_size);
+void lodepng_free(void* ptr);
+#endif /*LODEPNG_COMPILE_ALLOCATORS*/
+
+/* convince the compiler to inline a function, for use when this measurably improves performance */
+/* inline is not available in C90, but use it when supported by the compiler */
+#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || (defined(__cplusplus) && (__cplusplus >= 199711L))
+#define LODEPNG_INLINE inline
+#else
+#define LODEPNG_INLINE /* not available */
+#endif
+
+/* restrict is not available in C90, but use it when supported by the compiler */
+#if (defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) ||\
+ (defined(_MSC_VER) && (_MSC_VER >= 1400)) || \
+ (defined(__WATCOMC__) && (__WATCOMC__ >= 1250) && !defined(__cplusplus))
+#define LODEPNG_RESTRICT __restrict
+#else
+#define LODEPNG_RESTRICT /* not available */
+#endif
+
+/* Replacements for C library functions such as memcpy and strlen, to support platforms
+where a full C library is not available. The compiler can recognize them and compile
+to something as fast. */
+
+static void lodepng_memcpy(void* LODEPNG_RESTRICT dst,
+ const void* LODEPNG_RESTRICT src, size_t size) {
+ size_t i;
+ for(i = 0; i < size; i++) ((char*)dst)[i] = ((const char*)src)[i];
+}
+
+static void lodepng_memset(void* LODEPNG_RESTRICT dst,
+ int value, size_t num) {
+ size_t i;
+ for(i = 0; i < num; i++) ((char*)dst)[i] = (char)value;
+}
+
+/* does not check memory out of bounds, do not use on untrusted data */
+static size_t lodepng_strlen(const char* a) {
+ const char* orig = a;
+ /* avoid warning about unused function in case of disabled COMPILE... macros */
+ (void)(&lodepng_strlen);
+ while(*a) a++;
+ return (size_t)(a - orig);
+}
+
+#define LODEPNG_MAX(a, b) (((a) > (b)) ? (a) : (b))
+#define LODEPNG_MIN(a, b) (((a) < (b)) ? (a) : (b))
+#define LODEPNG_ABS(x) ((x) < 0 ? -(x) : (x))
+
+#if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_DECODER)
+/* Safely check if adding two integers will overflow (no undefined
+behavior, compiler removing the code, etc...) and output result. */
+static int lodepng_addofl(size_t a, size_t b, size_t* result) {
+ *result = a + b; /* Unsigned addition is well defined and safe in C90 */
+ return *result < a;
+}
+#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_DECODER)*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+/* Safely check if multiplying two integers will overflow (no undefined
+behavior, compiler removing the code, etc...) and output result. */
+static int lodepng_mulofl(size_t a, size_t b, size_t* result) {
+ *result = a * b; /* Unsigned multiplication is well defined and safe in C90 */
+ return (a != 0 && *result / a != b);
+}
+
+#ifdef LODEPNG_COMPILE_ZLIB
+/* Safely check if a + b > c, even if overflow could happen. */
+static int lodepng_gtofl(size_t a, size_t b, size_t c) {
+ size_t d;
+ if(lodepng_addofl(a, b, &d)) return 1;
+ return d > c;
+}
+#endif /*LODEPNG_COMPILE_ZLIB*/
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+
+/*
+Often in case of an error a value is assigned to a variable and then it breaks
+out of a loop (to go to the cleanup phase of a function). This macro does that.
+It makes the error handling code shorter and more readable.
+
+Example: if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83);
+*/
+#define CERROR_BREAK(errorvar, code){\
+ errorvar = code;\
+ break;\
+}
+
+/*version of CERROR_BREAK that assumes the common case where the error variable is named "error"*/
+#define ERROR_BREAK(code) CERROR_BREAK(error, code)
+
+/*Set error var to the error code, and return it.*/
+#define CERROR_RETURN_ERROR(errorvar, code){\
+ errorvar = code;\
+ return code;\
+}
+
+/*Try the code, if it returns error, also return the error.*/
+#define CERROR_TRY_RETURN(call){\
+ unsigned error = call;\
+ if(error) return error;\
+}
+
+/*Set error var to the error code, and return from the void function.*/
+#define CERROR_RETURN(errorvar, code){\
+ errorvar = code;\
+ return;\
+}
+
+/*
+About uivector, ucvector and string:
+-All of them wrap dynamic arrays or text strings in a similar way.
+-LodePNG was originally written in C++. The vectors replace the std::vectors that were used in the C++ version.
+-The string tools are made to avoid problems with compilers that declare things like strncat as deprecated.
+-They're not used in the interface, only internally in this file as static functions.
+-As with many other structs in this file, the init and cleanup functions serve as ctor and dtor.
+*/
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_ENCODER
+/*dynamic vector of unsigned ints*/
+typedef struct uivector {
+ unsigned* data;
+ size_t size; /*size in number of unsigned longs*/
+ size_t allocsize; /*allocated size in bytes*/
+} uivector;
+
+static void uivector_cleanup(void* p) {
+ ((uivector*)p)->size = ((uivector*)p)->allocsize = 0;
+ lodepng_free(((uivector*)p)->data);
+ ((uivector*)p)->data = NULL;
+}
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned uivector_resize(uivector* p, size_t size) {
+ size_t allocsize = size * sizeof(unsigned);
+ if(allocsize > p->allocsize) {
+ size_t newsize = allocsize + (p->allocsize >> 1u);
+ void* data = lodepng_realloc(p->data, newsize);
+ if(data) {
+ p->allocsize = newsize;
+ p->data = (unsigned*)data;
+ }
+ else return 0; /*error: not enough memory*/
+ }
+ p->size = size;
+ return 1; /*success*/
+}
+
+static void uivector_init(uivector* p) {
+ p->data = NULL;
+ p->size = p->allocsize = 0;
+}
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned uivector_push_back(uivector* p, unsigned c) {
+ if(!uivector_resize(p, p->size + 1)) return 0;
+ p->data[p->size - 1] = c;
+ return 1;
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+/* /////////////////////////////////////////////////////////////////////////// */
+
+/*dynamic vector of unsigned chars*/
+typedef struct ucvector {
+ unsigned char* data;
+ size_t size; /*used size*/
+ size_t allocsize; /*allocated size*/
+} ucvector;
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned ucvector_resize(ucvector* p, size_t size) {
+ if(size > p->allocsize) {
+ size_t newsize = size + (p->allocsize >> 1u);
+ void* data = lodepng_realloc(p->data, newsize);
+ if(data) {
+ p->allocsize = newsize;
+ p->data = (unsigned char*)data;
+ }
+ else return 0; /*error: not enough memory*/
+ }
+ p->size = size;
+ return 1; /*success*/
+}
+
+static ucvector ucvector_init(unsigned char* buffer, size_t size) {
+ ucvector v;
+ v.data = buffer;
+ v.allocsize = v.size = size;
+ return v;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_PNG
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+
+/*free string pointer and set it to NULL*/
+static void string_cleanup(char** out) {
+ lodepng_free(*out);
+ *out = NULL;
+}
+
+static char* alloc_string_sized(const char* in, size_t insize) {
+ char* out = (char*)lodepng_malloc(insize + 1);
+ if(out) {
+ lodepng_memcpy(out, in, insize);
+ out[insize] = 0;
+ }
+ return out;
+}
+
+/* dynamically allocates a new string with a copy of the null terminated input text */
+static char* alloc_string(const char* in) {
+ return alloc_string_sized(in, lodepng_strlen(in));
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_PNG)
+static unsigned lodepng_read32bitInt(const unsigned char* buffer) {
+ return (((unsigned)buffer[0] << 24u) | ((unsigned)buffer[1] << 16u) |
+ ((unsigned)buffer[2] << 8u) | (unsigned)buffer[3]);
+}
+#endif /*defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_PNG)*/
+
+#if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)
+/*buffer must have at least 4 allocated bytes available*/
+static void lodepng_set32bitInt(unsigned char* buffer, unsigned value) {
+ buffer[0] = (unsigned char)((value >> 24) & 0xff);
+ buffer[1] = (unsigned char)((value >> 16) & 0xff);
+ buffer[2] = (unsigned char)((value >> 8) & 0xff);
+ buffer[3] = (unsigned char)((value ) & 0xff);
+}
+#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / File IO / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_DISK
+
+/* returns negative value on error. This should be pure C compatible, so no fstat. */
+static long lodepng_filesize(const char* filename) {
+ FILE* file;
+ long size;
+ file = fopen(filename, "rb");
+ if(!file) return -1;
+
+ if(fseek(file, 0, SEEK_END) != 0) {
+ fclose(file);
+ return -1;
+ }
+
+ size = ftell(file);
+ /* It may give LONG_MAX as directory size, this is invalid for us. */
+ if(size == LONG_MAX) size = -1;
+
+ fclose(file);
+ return size;
+}
+
+/* load file into buffer that already has the correct allocated size. Returns error code.*/
+static unsigned lodepng_buffer_file(unsigned char* out, size_t size, const char* filename) {
+ FILE* file;
+ size_t readsize;
+ file = fopen(filename, "rb");
+ if(!file) return 78;
+
+ readsize = fread(out, 1, size, file);
+ fclose(file);
+
+ if(readsize != size) return 78;
+ return 0;
+}
+
+unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename) {
+ long size = lodepng_filesize(filename);
+ if(size < 0) return 78;
+ *outsize = (size_t)size;
+
+ *out = (unsigned char*)lodepng_malloc((size_t)size);
+ if(!(*out) && size > 0) return 83; /*the above malloc failed*/
+
+ return lodepng_buffer_file(*out, (size_t)size, filename);
+}
+
+/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
+unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename) {
+ FILE* file;
+ file = fopen(filename, "wb" );
+ if(!file) return 79;
+ fwrite(buffer, 1, buffersize, file);
+ fclose(file);
+ return 0;
+}
+
+#endif /*LODEPNG_COMPILE_DISK*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // End of common code and tools. Begin of Zlib related code. // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_ENCODER
+
+typedef struct {
+ ucvector* data;
+ unsigned char bp; /*ok to overflow, indicates bit pos inside byte*/
+} LodePNGBitWriter;
+
+static void LodePNGBitWriter_init(LodePNGBitWriter* writer, ucvector* data) {
+ writer->data = data;
+ writer->bp = 0;
+}
+
+/*TODO: this ignores potential out of memory errors*/
+#define WRITEBIT(writer, bit){\
+ /* append new byte */\
+ if(((writer->bp) & 7u) == 0) {\
+ if(!ucvector_resize(writer->data, writer->data->size + 1)) return;\
+ writer->data->data[writer->data->size - 1] = 0;\
+ }\
+ (writer->data->data[writer->data->size - 1]) |= (bit << ((writer->bp) & 7u));\
+ ++writer->bp;\
+}
+
+/* LSB of value is written first, and LSB of bytes is used first */
+static void writeBits(LodePNGBitWriter* writer, unsigned value, size_t nbits) {
+ if(nbits == 1) { /* compiler should statically compile this case if nbits == 1 */
+ WRITEBIT(writer, value);
+ } else {
+ /* TODO: increase output size only once here rather than in each WRITEBIT */
+ size_t i;
+ for(i = 0; i != nbits; ++i) {
+ WRITEBIT(writer, (unsigned char)((value >> i) & 1));
+ }
+ }
+}
+
+/* This one is to use for adding huffman symbol, the value bits are written MSB first */
+static void writeBitsReversed(LodePNGBitWriter* writer, unsigned value, size_t nbits) {
+ size_t i;
+ for(i = 0; i != nbits; ++i) {
+ /* TODO: increase output size only once here rather than in each WRITEBIT */
+ WRITEBIT(writer, (unsigned char)((value >> (nbits - 1u - i)) & 1u));
+ }
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+typedef struct {
+ const unsigned char* data;
+ size_t size; /*size of data in bytes*/
+ size_t bitsize; /*size of data in bits, end of valid bp values, should be 8*size*/
+ size_t bp;
+ unsigned buffer; /*buffer for reading bits. NOTE: 'unsigned' must support at least 32 bits*/
+} LodePNGBitReader;
+
+/* data size argument is in bytes. Returns error if size too large causing overflow */
+static unsigned LodePNGBitReader_init(LodePNGBitReader* reader, const unsigned char* data, size_t size) {
+ size_t temp;
+ reader->data = data;
+ reader->size = size;
+ /* size in bits, return error if overflow (if size_t is 32 bit this supports up to 500MB) */
+ if(lodepng_mulofl(size, 8u, &reader->bitsize)) return 105;
+ /*ensure incremented bp can be compared to bitsize without overflow even when it would be incremented 32 too much and
+ trying to ensure 32 more bits*/
+ if(lodepng_addofl(reader->bitsize, 64u, &temp)) return 105;
+ reader->bp = 0;
+ reader->buffer = 0;
+ return 0; /*ok*/
+}
+
+/*
+ensureBits functions:
+Ensures the reader can at least read nbits bits in one or more readBits calls,
+safely even if not enough bits are available.
+Returns 1 if there are enough bits available, 0 if not.
+*/
+
+/*See ensureBits documentation above. This one ensures exactly 1 bit */
+/*static unsigned ensureBits1(LodePNGBitReader* reader) {
+ if(reader->bp >= reader->bitsize) return 0;
+ reader->buffer = (unsigned)reader->data[reader->bp >> 3u] >> (reader->bp & 7u);
+ return 1;
+}*/
+
+/*See ensureBits documentation above. This one ensures up to 9 bits */
+static unsigned ensureBits9(LodePNGBitReader* reader, size_t nbits) {
+ size_t start = reader->bp >> 3u;
+ size_t size = reader->size;
+ if(start + 1u < size) {
+ reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u);
+ reader->buffer >>= (reader->bp & 7u);
+ return 1;
+ } else {
+ reader->buffer = 0;
+ if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+ reader->buffer >>= (reader->bp & 7u);
+ return reader->bp + nbits <= reader->bitsize;
+ }
+}
+
+/*See ensureBits documentation above. This one ensures up to 17 bits */
+static unsigned ensureBits17(LodePNGBitReader* reader, size_t nbits) {
+ size_t start = reader->bp >> 3u;
+ size_t size = reader->size;
+ if(start + 2u < size) {
+ reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u) |
+ ((unsigned)reader->data[start + 2] << 16u);
+ reader->buffer >>= (reader->bp & 7u);
+ return 1;
+ } else {
+ reader->buffer = 0;
+ if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+ if(start + 1u < size) reader->buffer |= ((unsigned)reader->data[start + 1] << 8u);
+ reader->buffer >>= (reader->bp & 7u);
+ return reader->bp + nbits <= reader->bitsize;
+ }
+}
+
+/*See ensureBits documentation above. This one ensures up to 25 bits */
+static LODEPNG_INLINE unsigned ensureBits25(LodePNGBitReader* reader, size_t nbits) {
+ size_t start = reader->bp >> 3u;
+ size_t size = reader->size;
+ if(start + 3u < size) {
+ reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u) |
+ ((unsigned)reader->data[start + 2] << 16u) | ((unsigned)reader->data[start + 3] << 24u);
+ reader->buffer >>= (reader->bp & 7u);
+ return 1;
+ } else {
+ reader->buffer = 0;
+ if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+ if(start + 1u < size) reader->buffer |= ((unsigned)reader->data[start + 1] << 8u);
+ if(start + 2u < size) reader->buffer |= ((unsigned)reader->data[start + 2] << 16u);
+ reader->buffer >>= (reader->bp & 7u);
+ return reader->bp + nbits <= reader->bitsize;
+ }
+}
+
+/*See ensureBits documentation above. This one ensures up to 32 bits */
+static LODEPNG_INLINE unsigned ensureBits32(LodePNGBitReader* reader, size_t nbits) {
+ size_t start = reader->bp >> 3u;
+ size_t size = reader->size;
+ if(start + 4u < size) {
+ reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u) |
+ ((unsigned)reader->data[start + 2] << 16u) | ((unsigned)reader->data[start + 3] << 24u);
+ reader->buffer >>= (reader->bp & 7u);
+ reader->buffer |= (((unsigned)reader->data[start + 4] << 24u) << (8u - (reader->bp & 7u)));
+ return 1;
+ } else {
+ reader->buffer = 0;
+ if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+ if(start + 1u < size) reader->buffer |= ((unsigned)reader->data[start + 1] << 8u);
+ if(start + 2u < size) reader->buffer |= ((unsigned)reader->data[start + 2] << 16u);
+ if(start + 3u < size) reader->buffer |= ((unsigned)reader->data[start + 3] << 24u);
+ reader->buffer >>= (reader->bp & 7u);
+ return reader->bp + nbits <= reader->bitsize;
+ }
+}
+
+/* Get bits without advancing the bit pointer. Must have enough bits available with ensureBits. Max nbits is 31. */
+static unsigned peekBits(LodePNGBitReader* reader, size_t nbits) {
+ /* The shift allows nbits to be only up to 31. */
+ return reader->buffer & ((1u << nbits) - 1u);
+}
+
+/* Must have enough bits available with ensureBits */
+static void advanceBits(LodePNGBitReader* reader, size_t nbits) {
+ reader->buffer >>= nbits;
+ reader->bp += nbits;
+}
+
+/* Must have enough bits available with ensureBits */
+static unsigned readBits(LodePNGBitReader* reader, size_t nbits) {
+ unsigned result = peekBits(reader, nbits);
+ advanceBits(reader, nbits);
+ return result;
+}
+
+/* Public for testing only. steps and result must have numsteps values. */
+unsigned lode_png_test_bitreader(const unsigned char* data, size_t size,
+ size_t numsteps, const size_t* steps, unsigned* result) {
+ size_t i;
+ LodePNGBitReader reader;
+ unsigned error = LodePNGBitReader_init(&reader, data, size);
+ if(error) return 0;
+ for(i = 0; i < numsteps; i++) {
+ size_t step = steps[i];
+ unsigned ok;
+ if(step > 25) ok = ensureBits32(&reader, step);
+ else if(step > 17) ok = ensureBits25(&reader, step);
+ else if(step > 9) ok = ensureBits17(&reader, step);
+ else ok = ensureBits9(&reader, step);
+ if(!ok) return 0;
+ result[i] = readBits(&reader, step);
+ }
+ return 1;
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+static unsigned reverseBits(unsigned bits, unsigned num) {
+ /*TODO: implement faster lookup table based version when needed*/
+ unsigned i, result = 0;
+ for(i = 0; i < num; i++) result |= ((bits >> (num - i - 1u)) & 1u) << i;
+ return result;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Deflate - Huffman / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#define FIRST_LENGTH_CODE_INDEX 257
+#define LAST_LENGTH_CODE_INDEX 285
+/*256 literals, the end code, some length codes, and 2 unused codes*/
+#define NUM_DEFLATE_CODE_SYMBOLS 288
+/*the distance codes have their own symbols, 30 used, 2 unused*/
+#define NUM_DISTANCE_SYMBOLS 32
+/*the code length codes. 0-15: code lengths, 16: copy previous 3-6 times, 17: 3-10 zeros, 18: 11-138 zeros*/
+#define NUM_CODE_LENGTH_CODES 19
+
+/*the base lengths represented by codes 257-285*/
+static const unsigned LENGTHBASE[29]
+ = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
+ 67, 83, 99, 115, 131, 163, 195, 227, 258};
+
+/*the extra bits used by codes 257-285 (added to base length)*/
+static const unsigned LENGTHEXTRA[29]
+ = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 4, 4, 5, 5, 5, 5, 0};
+
+/*the base backwards distances (the bits of distance codes appear after length codes and use their own huffman tree)*/
+static const unsigned DISTANCEBASE[30]
+ = {1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
+ 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577};
+
+/*the extra bits of backwards distances (added to base)*/
+static const unsigned DISTANCEEXTRA[30]
+ = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
+ 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
+
+/*the order in which "code length alphabet code lengths" are stored as specified by deflate, out of this the huffman
+tree of the dynamic huffman tree lengths is generated*/
+static const unsigned CLCL_ORDER[NUM_CODE_LENGTH_CODES]
+ = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*
+Huffman tree struct, containing multiple representations of the tree
+*/
+typedef struct HuffmanTree {
+ unsigned* codes; /*the huffman codes (bit patterns representing the symbols)*/
+ unsigned* lengths; /*the lengths of the huffman codes*/
+ unsigned maxbitlen; /*maximum number of bits a single code can get*/
+ unsigned numcodes; /*number of symbols in the alphabet = number of codes*/
+ /* for reading only */
+ unsigned char* table_len; /*length of symbol from lookup table, or max length if secondary lookup needed*/
+ unsigned short* table_value; /*value of symbol from lookup table, or pointer to secondary table if needed*/
+} HuffmanTree;
+
+static void HuffmanTree_init(HuffmanTree* tree) {
+ tree->codes = 0;
+ tree->lengths = 0;
+ tree->table_len = 0;
+ tree->table_value = 0;
+}
+
+static void HuffmanTree_cleanup(HuffmanTree* tree) {
+ lodepng_free(tree->codes);
+ lodepng_free(tree->lengths);
+ lodepng_free(tree->table_len);
+ lodepng_free(tree->table_value);
+}
+
+/* amount of bits for first huffman table lookup (aka root bits), see HuffmanTree_makeTable and huffmanDecodeSymbol.*/
+/* values 8u and 9u work the fastest */
+#define FIRSTBITS 9u
+
+/* a symbol value too big to represent any valid symbol, to indicate reading disallowed huffman bits combination,
+which is possible in case of only 0 or 1 present symbols. */
+#define INVALIDSYMBOL 65535u
+
+/* make table for huffman decoding */
+static unsigned HuffmanTree_makeTable(HuffmanTree* tree) {
+ static const unsigned headsize = 1u << FIRSTBITS; /*size of the first table*/
+ static const unsigned mask = (1u << FIRSTBITS) /*headsize*/ - 1u;
+ size_t i, numpresent, pointer, size; /*total table size*/
+ unsigned* maxlens = (unsigned*)lodepng_malloc(headsize * sizeof(unsigned));
+ if(!maxlens) return 83; /*alloc fail*/
+
+ /* compute maxlens: max total bit length of symbols sharing prefix in the first table*/
+ lodepng_memset(maxlens, 0, headsize * sizeof(*maxlens));
+ for(i = 0; i < tree->numcodes; i++) {
+ unsigned symbol = tree->codes[i];
+ unsigned l = tree->lengths[i];
+ unsigned index;
+ if(l <= FIRSTBITS) continue; /*symbols that fit in first table don't increase secondary table size*/
+ /*get the FIRSTBITS MSBs, the MSBs of the symbol are encoded first. See later comment about the reversing*/
+ index = reverseBits(symbol >> (l - FIRSTBITS), FIRSTBITS);
+ maxlens[index] = LODEPNG_MAX(maxlens[index], l);
+ }
+ /* compute total table size: size of first table plus all secondary tables for symbols longer than FIRSTBITS */
+ size = headsize;
+ for(i = 0; i < headsize; ++i) {
+ unsigned l = maxlens[i];
+ if(l > FIRSTBITS) size += (1u << (l - FIRSTBITS));
+ }
+ tree->table_len = (unsigned char*)lodepng_malloc(size * sizeof(*tree->table_len));
+ tree->table_value = (unsigned short*)lodepng_malloc(size * sizeof(*tree->table_value));
+ if(!tree->table_len || !tree->table_value) {
+ lodepng_free(maxlens);
+ /* freeing tree->table values is done at a higher scope */
+ return 83; /*alloc fail*/
+ }
+ /*initialize with an invalid length to indicate unused entries*/
+ for(i = 0; i < size; ++i) tree->table_len[i] = 16;
+
+ /*fill in the first table for long symbols: max prefix size and pointer to secondary tables*/
+ pointer = headsize;
+ for(i = 0; i < headsize; ++i) {
+ unsigned l = maxlens[i];
+ if(l <= FIRSTBITS) continue;
+ tree->table_len[i] = l;
+ tree->table_value[i] = pointer;
+ pointer += (1u << (l - FIRSTBITS));
+ }
+ lodepng_free(maxlens);
+
+ /*fill in the first table for short symbols, or secondary table for long symbols*/
+ numpresent = 0;
+ for(i = 0; i < tree->numcodes; ++i) {
+ unsigned l = tree->lengths[i];
+ unsigned symbol = tree->codes[i]; /*the huffman bit pattern. i itself is the value.*/
+ /*reverse bits, because the huffman bits are given in MSB first order but the bit reader reads LSB first*/
+ unsigned reverse = reverseBits(symbol, l);
+ if(l == 0) continue;
+ numpresent++;
+
+ if(l <= FIRSTBITS) {
+ /*short symbol, fully in first table, replicated num times if l < FIRSTBITS*/
+ unsigned num = 1u << (FIRSTBITS - l);
+ unsigned j;
+ for(j = 0; j < num; ++j) {
+ /*bit reader will read the l bits of symbol first, the remaining FIRSTBITS - l bits go to the MSB's*/
+ unsigned index = reverse | (j << l);
+ if(tree->table_len[index] != 16) return 55; /*invalid tree: long symbol shares prefix with short symbol*/
+ tree->table_len[index] = l;
+ tree->table_value[index] = i;
+ }
+ } else {
+ /*long symbol, shares prefix with other long symbols in first lookup table, needs second lookup*/
+ /*the FIRSTBITS MSBs of the symbol are the first table index*/
+ unsigned index = reverse & mask;
+ unsigned maxlen = tree->table_len[index];
+ /*log2 of secondary table length, should be >= l - FIRSTBITS*/
+ unsigned tablelen = maxlen - FIRSTBITS;
+ unsigned start = tree->table_value[index]; /*starting index in secondary table*/
+ unsigned num = 1u << (tablelen - (l - FIRSTBITS)); /*amount of entries of this symbol in secondary table*/
+ unsigned j;
+ if(maxlen < l) return 55; /*invalid tree: long symbol shares prefix with short symbol*/
+ for(j = 0; j < num; ++j) {
+ unsigned reverse2 = reverse >> FIRSTBITS; /* l - FIRSTBITS bits */
+ unsigned index2 = start + (reverse2 | (j << (l - FIRSTBITS)));
+ tree->table_len[index2] = l;
+ tree->table_value[index2] = i;
+ }
+ }
+ }
+
+ if(numpresent < 2) {
+ /* In case of exactly 1 symbol, in theory the huffman symbol needs 0 bits,
+ but deflate uses 1 bit instead. In case of 0 symbols, no symbols can
+ appear at all, but such huffman tree could still exist (e.g. if distance
+ codes are never used). In both cases, not all symbols of the table will be
+ filled in. Fill them in with an invalid symbol value so returning them from
+ huffmanDecodeSymbol will cause error. */
+ for(i = 0; i < size; ++i) {
+ if(tree->table_len[i] == 16) {
+ /* As length, use a value smaller than FIRSTBITS for the head table,
+ and a value larger than FIRSTBITS for the secondary table, to ensure
+ valid behavior for advanceBits when reading this symbol. */
+ tree->table_len[i] = (i < headsize) ? 1 : (FIRSTBITS + 1);
+ tree->table_value[i] = INVALIDSYMBOL;
+ }
+ }
+ } else {
+ /* A good huffman tree has N * 2 - 1 nodes, of which N - 1 are internal nodes.
+ If that is not the case (due to too long length codes), the table will not
+ have been fully used, and this is an error (not all bit combinations can be
+ decoded): an oversubscribed huffman tree, indicated by error 55. */
+ for(i = 0; i < size; ++i) {
+ if(tree->table_len[i] == 16) return 55;
+ }
+ }
+
+ return 0;
+}
+
+/*
+Second step for the ...makeFromLengths and ...makeFromFrequencies functions.
+numcodes, lengths and maxbitlen must already be filled in correctly. return
+value is error.
+*/
+static unsigned HuffmanTree_makeFromLengths2(HuffmanTree* tree) {
+ unsigned* blcount;
+ unsigned* nextcode;
+ unsigned error = 0;
+ unsigned bits, n;
+
+ tree->codes = (unsigned*)lodepng_malloc(tree->numcodes * sizeof(unsigned));
+ blcount = (unsigned*)lodepng_malloc((tree->maxbitlen + 1) * sizeof(unsigned));
+ nextcode = (unsigned*)lodepng_malloc((tree->maxbitlen + 1) * sizeof(unsigned));
+ if(!tree->codes || !blcount || !nextcode) error = 83; /*alloc fail*/
+
+ if(!error) {
+ for(n = 0; n != tree->maxbitlen + 1; n++) blcount[n] = nextcode[n] = 0;
+ /*step 1: count number of instances of each code length*/
+ for(bits = 0; bits != tree->numcodes; ++bits) ++blcount[tree->lengths[bits]];
+ /*step 2: generate the nextcode values*/
+ for(bits = 1; bits <= tree->maxbitlen; ++bits) {
+ nextcode[bits] = (nextcode[bits - 1] + blcount[bits - 1]) << 1u;
+ }
+ /*step 3: generate all the codes*/
+ for(n = 0; n != tree->numcodes; ++n) {
+ if(tree->lengths[n] != 0) {
+ tree->codes[n] = nextcode[tree->lengths[n]]++;
+ /*remove superfluous bits from the code*/
+ tree->codes[n] &= ((1u << tree->lengths[n]) - 1u);
+ }
+ }
+ }
+
+ lodepng_free(blcount);
+ lodepng_free(nextcode);
+
+ if(!error) error = HuffmanTree_makeTable(tree);
+ return error;
+}
+
+/*
+given the code lengths (as stored in the PNG file), generate the tree as defined
+by Deflate. maxbitlen is the maximum bits that a code in the tree can have.
+return value is error.
+*/
+static unsigned HuffmanTree_makeFromLengths(HuffmanTree* tree, const unsigned* bitlen,
+ size_t numcodes, unsigned maxbitlen) {
+ unsigned i;
+ tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned));
+ if(!tree->lengths) return 83; /*alloc fail*/
+ for(i = 0; i != numcodes; ++i) tree->lengths[i] = bitlen[i];
+ tree->numcodes = (unsigned)numcodes; /*number of symbols*/
+ tree->maxbitlen = maxbitlen;
+ return HuffmanTree_makeFromLengths2(tree);
+}
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/*BPM: Boundary Package Merge, see "A Fast and Space-Economical Algorithm for Length-Limited Coding",
+Jyrki Katajainen, Alistair Moffat, Andrew Turpin, 1995.*/
+
+/*chain node for boundary package merge*/
+typedef struct BPMNode {
+ int weight; /*the sum of all weights in this chain*/
+ unsigned index; /*index of this leaf node (called "count" in the paper)*/
+ struct BPMNode* tail; /*the next nodes in this chain (null if last)*/
+ int in_use;
+} BPMNode;
+
+/*lists of chains*/
+typedef struct BPMLists {
+ /*memory pool*/
+ unsigned memsize;
+ BPMNode* memory;
+ unsigned numfree;
+ unsigned nextfree;
+ BPMNode** freelist;
+ /*two heads of lookahead chains per list*/
+ unsigned listsize;
+ BPMNode** chains0;
+ BPMNode** chains1;
+} BPMLists;
+
+/*creates a new chain node with the given parameters, from the memory in the lists */
+static BPMNode* bpmnode_create(BPMLists* lists, int weight, unsigned index, BPMNode* tail) {
+ unsigned i;
+ BPMNode* result;
+
+ /*memory full, so garbage collect*/
+ if(lists->nextfree >= lists->numfree) {
+ /*mark only those that are in use*/
+ for(i = 0; i != lists->memsize; ++i) lists->memory[i].in_use = 0;
+ for(i = 0; i != lists->listsize; ++i) {
+ BPMNode* node;
+ for(node = lists->chains0[i]; node != 0; node = node->tail) node->in_use = 1;
+ for(node = lists->chains1[i]; node != 0; node = node->tail) node->in_use = 1;
+ }
+ /*collect those that are free*/
+ lists->numfree = 0;
+ for(i = 0; i != lists->memsize; ++i) {
+ if(!lists->memory[i].in_use) lists->freelist[lists->numfree++] = &lists->memory[i];
+ }
+ lists->nextfree = 0;
+ }
+
+ result = lists->freelist[lists->nextfree++];
+ result->weight = weight;
+ result->index = index;
+ result->tail = tail;
+ return result;
+}
+
+/*sort the leaves with stable mergesort*/
+static void bpmnode_sort(BPMNode* leaves, size_t num) {
+ BPMNode* mem = (BPMNode*)lodepng_malloc(sizeof(*leaves) * num);
+ size_t width, counter = 0;
+ for(width = 1; width < num; width *= 2) {
+ BPMNode* a = (counter & 1) ? mem : leaves;
+ BPMNode* b = (counter & 1) ? leaves : mem;
+ size_t p;
+ for(p = 0; p < num; p += 2 * width) {
+ size_t q = (p + width > num) ? num : (p + width);
+ size_t r = (p + 2 * width > num) ? num : (p + 2 * width);
+ size_t i = p, j = q, k;
+ for(k = p; k < r; k++) {
+ if(i < q && (j >= r || a[i].weight <= a[j].weight)) b[k] = a[i++];
+ else b[k] = a[j++];
+ }
+ }
+ counter++;
+ }
+ if(counter & 1) lodepng_memcpy(leaves, mem, sizeof(*leaves) * num);
+ lodepng_free(mem);
+}
+
+/*Boundary Package Merge step, numpresent is the amount of leaves, and c is the current chain.*/
+static void boundaryPM(BPMLists* lists, BPMNode* leaves, size_t numpresent, int c, int num) {
+ unsigned lastindex = lists->chains1[c]->index;
+
+ if(c == 0) {
+ if(lastindex >= numpresent) return;
+ lists->chains0[c] = lists->chains1[c];
+ lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, 0);
+ } else {
+ /*sum of the weights of the head nodes of the previous lookahead chains.*/
+ int sum = lists->chains0[c - 1]->weight + lists->chains1[c - 1]->weight;
+ lists->chains0[c] = lists->chains1[c];
+ if(lastindex < numpresent && sum > leaves[lastindex].weight) {
+ lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, lists->chains1[c]->tail);
+ return;
+ }
+ lists->chains1[c] = bpmnode_create(lists, sum, lastindex, lists->chains1[c - 1]);
+ /*in the end we are only interested in the chain of the last list, so no
+ need to recurse if we're at the last one (this gives measurable speedup)*/
+ if(num + 1 < (int)(2 * numpresent - 2)) {
+ boundaryPM(lists, leaves, numpresent, c - 1, num);
+ boundaryPM(lists, leaves, numpresent, c - 1, num);
+ }
+ }
+}
+
+unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
+ size_t numcodes, unsigned maxbitlen) {
+ unsigned error = 0;
+ unsigned i;
+ size_t numpresent = 0; /*number of symbols with non-zero frequency*/
+ BPMNode* leaves; /*the symbols, only those with > 0 frequency*/
+
+ if(numcodes == 0) return 80; /*error: a tree of 0 symbols is not supposed to be made*/
+ if((1u << maxbitlen) < (unsigned)numcodes) return 80; /*error: represent all symbols*/
+
+ leaves = (BPMNode*)lodepng_malloc(numcodes * sizeof(*leaves));
+ if(!leaves) return 83; /*alloc fail*/
+
+ for(i = 0; i != numcodes; ++i) {
+ if(frequencies[i] > 0) {
+ leaves[numpresent].weight = (int)frequencies[i];
+ leaves[numpresent].index = i;
+ ++numpresent;
+ }
+ }
+
+ lodepng_memset(lengths, 0, numcodes * sizeof(*lengths));
+
+ /*ensure at least two present symbols. There should be at least one symbol
+ according to RFC 1951 section 3.2.7. Some decoders incorrectly require two. To
+ make these work as well ensure there are at least two symbols. The
+ Package-Merge code below also doesn't work correctly if there's only one
+ symbol, it'd give it the theoretical 0 bits but in practice zlib wants 1 bit*/
+ if(numpresent == 0) {
+ lengths[0] = lengths[1] = 1; /*note that for RFC 1951 section 3.2.7, only lengths[0] = 1 is needed*/
+ } else if(numpresent == 1) {
+ lengths[leaves[0].index] = 1;
+ lengths[leaves[0].index == 0 ? 1 : 0] = 1;
+ } else {
+ BPMLists lists;
+ BPMNode* node;
+
+ bpmnode_sort(leaves, numpresent);
+
+ lists.listsize = maxbitlen;
+ lists.memsize = 2 * maxbitlen * (maxbitlen + 1);
+ lists.nextfree = 0;
+ lists.numfree = lists.memsize;
+ lists.memory = (BPMNode*)lodepng_malloc(lists.memsize * sizeof(*lists.memory));
+ lists.freelist = (BPMNode**)lodepng_malloc(lists.memsize * sizeof(BPMNode*));
+ lists.chains0 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
+ lists.chains1 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
+ if(!lists.memory || !lists.freelist || !lists.chains0 || !lists.chains1) error = 83; /*alloc fail*/
+
+ if(!error) {
+ for(i = 0; i != lists.memsize; ++i) lists.freelist[i] = &lists.memory[i];
+
+ bpmnode_create(&lists, leaves[0].weight, 1, 0);
+ bpmnode_create(&lists, leaves[1].weight, 2, 0);
+
+ for(i = 0; i != lists.listsize; ++i) {
+ lists.chains0[i] = &lists.memory[0];
+ lists.chains1[i] = &lists.memory[1];
+ }
+
+ /*each boundaryPM call adds one chain to the last list, and we need 2 * numpresent - 2 chains.*/
+ for(i = 2; i != 2 * numpresent - 2; ++i) boundaryPM(&lists, leaves, numpresent, (int)maxbitlen - 1, (int)i);
+
+ for(node = lists.chains1[maxbitlen - 1]; node; node = node->tail) {
+ for(i = 0; i != node->index; ++i) ++lengths[leaves[i].index];
+ }
+ }
+
+ lodepng_free(lists.memory);
+ lodepng_free(lists.freelist);
+ lodepng_free(lists.chains0);
+ lodepng_free(lists.chains1);
+ }
+
+ lodepng_free(leaves);
+ return error;
+}
+
+/*Create the Huffman tree given the symbol frequencies*/
+static unsigned HuffmanTree_makeFromFrequencies(HuffmanTree* tree, const unsigned* frequencies,
+ size_t mincodes, size_t numcodes, unsigned maxbitlen) {
+ unsigned error = 0;
+ while(!frequencies[numcodes - 1] && numcodes > mincodes) --numcodes; /*trim zeroes*/
+ tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned));
+ if(!tree->lengths) return 83; /*alloc fail*/
+ tree->maxbitlen = maxbitlen;
+ tree->numcodes = (unsigned)numcodes; /*number of symbols*/
+
+ error = lodepng_huffman_code_lengths(tree->lengths, frequencies, numcodes, maxbitlen);
+ if(!error) error = HuffmanTree_makeFromLengths2(tree);
+ return error;
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+/*get the literal and length code tree of a deflated block with fixed tree, as per the deflate specification*/
+static unsigned generateFixedLitLenTree(HuffmanTree* tree) {
+ unsigned i, error = 0;
+ unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
+ if(!bitlen) return 83; /*alloc fail*/
+
+ /*288 possible codes: 0-255=literals, 256=endcode, 257-285=lengthcodes, 286-287=unused*/
+ for(i = 0; i <= 143; ++i) bitlen[i] = 8;
+ for(i = 144; i <= 255; ++i) bitlen[i] = 9;
+ for(i = 256; i <= 279; ++i) bitlen[i] = 7;
+ for(i = 280; i <= 287; ++i) bitlen[i] = 8;
+
+ error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DEFLATE_CODE_SYMBOLS, 15);
+
+ lodepng_free(bitlen);
+ return error;
+}
+
+/*get the distance code tree of a deflated block with fixed tree, as specified in the deflate specification*/
+static unsigned generateFixedDistanceTree(HuffmanTree* tree) {
+ unsigned i, error = 0;
+ unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
+ if(!bitlen) return 83; /*alloc fail*/
+
+ /*there are 32 distance codes, but 30-31 are unused*/
+ for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen[i] = 5;
+ error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DISTANCE_SYMBOLS, 15);
+
+ lodepng_free(bitlen);
+ return error;
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+/*
+returns the code. The bit reader must already have been ensured at least 15 bits
+*/
+static unsigned huffmanDecodeSymbol(LodePNGBitReader* reader, const HuffmanTree* codetree) {
+ unsigned short code = peekBits(reader, FIRSTBITS);
+ unsigned short l = codetree->table_len[code];
+ unsigned short value = codetree->table_value[code];
+ if(l <= FIRSTBITS) {
+ advanceBits(reader, l);
+ return value;
+ } else {
+ unsigned index2;
+ advanceBits(reader, FIRSTBITS);
+ index2 = value + peekBits(reader, l - FIRSTBITS);
+ advanceBits(reader, codetree->table_len[index2] - FIRSTBITS);
+ return codetree->table_value[index2];
+ }
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Inflator (Decompressor) / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*get the tree of a deflated block with fixed tree, as specified in the deflate specification
+Returns error code.*/
+static unsigned getTreeInflateFixed(HuffmanTree* tree_ll, HuffmanTree* tree_d) {
+ unsigned error = generateFixedLitLenTree(tree_ll);
+ if(error) return error;
+ return generateFixedDistanceTree(tree_d);
+}
+
+/*get the tree of a deflated block with dynamic tree, the tree itself is also Huffman compressed with a known tree*/
+static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d,
+ LodePNGBitReader* reader) {
+ /*make sure that length values that aren't filled in will be 0, or a wrong tree will be generated*/
+ unsigned error = 0;
+ unsigned n, HLIT, HDIST, HCLEN, i;
+
+ /*see comments in deflateDynamic for explanation of the context and these variables, it is analogous*/
+ unsigned* bitlen_ll = 0; /*lit,len code lengths*/
+ unsigned* bitlen_d = 0; /*dist code lengths*/
+ /*code length code lengths ("clcl"), the bit lengths of the huffman tree used to compress bitlen_ll and bitlen_d*/
+ unsigned* bitlen_cl = 0;
+ HuffmanTree tree_cl; /*the code tree for code length codes (the huffman tree for compressed huffman trees)*/
+
+ if(!ensureBits17(reader, 14)) return 49; /*error: the bit pointer is or will go past the memory*/
+
+ /*number of literal/length codes + 257. Unlike the spec, the value 257 is added to it here already*/
+ HLIT = readBits(reader, 5) + 257;
+ /*number of distance codes. Unlike the spec, the value 1 is added to it here already*/
+ HDIST = readBits(reader, 5) + 1;
+ /*number of code length codes. Unlike the spec, the value 4 is added to it here already*/
+ HCLEN = readBits(reader, 4) + 4;
+
+ bitlen_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(unsigned));
+ if(!bitlen_cl) return 83 /*alloc fail*/;
+
+ HuffmanTree_init(&tree_cl);
+
+ while(!error) {
+ /*read the code length codes out of 3 * (amount of code length codes) bits*/
+ if(lodepng_gtofl(reader->bp, HCLEN * 3, reader->bitsize)) {
+ ERROR_BREAK(50); /*error: the bit pointer is or will go past the memory*/
+ }
+ for(i = 0; i != HCLEN; ++i) {
+ ensureBits9(reader, 3); /*out of bounds already checked above */
+ bitlen_cl[CLCL_ORDER[i]] = readBits(reader, 3);
+ }
+ for(i = HCLEN; i != NUM_CODE_LENGTH_CODES; ++i) {
+ bitlen_cl[CLCL_ORDER[i]] = 0;
+ }
+
+ error = HuffmanTree_makeFromLengths(&tree_cl, bitlen_cl, NUM_CODE_LENGTH_CODES, 7);
+ if(error) break;
+
+ /*now we can use this tree to read the lengths for the tree that this function will return*/
+ bitlen_ll = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
+ bitlen_d = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
+ if(!bitlen_ll || !bitlen_d) ERROR_BREAK(83 /*alloc fail*/);
+ lodepng_memset(bitlen_ll, 0, NUM_DEFLATE_CODE_SYMBOLS * sizeof(*bitlen_ll));
+ lodepng_memset(bitlen_d, 0, NUM_DISTANCE_SYMBOLS * sizeof(*bitlen_d));
+
+ /*i is the current symbol we're reading in the part that contains the code lengths of lit/len and dist codes*/
+ i = 0;
+ while(i < HLIT + HDIST) {
+ unsigned code;
+ ensureBits25(reader, 22); /* up to 15 bits for huffman code, up to 7 extra bits below*/
+ code = huffmanDecodeSymbol(reader, &tree_cl);
+ if(code <= 15) /*a length code*/ {
+ if(i < HLIT) bitlen_ll[i] = code;
+ else bitlen_d[i - HLIT] = code;
+ ++i;
+ } else if(code == 16) /*repeat previous*/ {
+ unsigned replength = 3; /*read in the 2 bits that indicate repeat length (3-6)*/
+ unsigned value; /*set value to the previous code*/
+
+ if(i == 0) ERROR_BREAK(54); /*can't repeat previous if i is 0*/
+
+ replength += readBits(reader, 2);
+
+ if(i < HLIT + 1) value = bitlen_ll[i - 1];
+ else value = bitlen_d[i - HLIT - 1];
+ /*repeat this value in the next lengths*/
+ for(n = 0; n < replength; ++n) {
+ if(i >= HLIT + HDIST) ERROR_BREAK(13); /*error: i is larger than the amount of codes*/
+ if(i < HLIT) bitlen_ll[i] = value;
+ else bitlen_d[i - HLIT] = value;
+ ++i;
+ }
+ } else if(code == 17) /*repeat "0" 3-10 times*/ {
+ unsigned replength = 3; /*read in the bits that indicate repeat length*/
+ replength += readBits(reader, 3);
+
+ /*repeat this value in the next lengths*/
+ for(n = 0; n < replength; ++n) {
+ if(i >= HLIT + HDIST) ERROR_BREAK(14); /*error: i is larger than the amount of codes*/
+
+ if(i < HLIT) bitlen_ll[i] = 0;
+ else bitlen_d[i - HLIT] = 0;
+ ++i;
+ }
+ } else if(code == 18) /*repeat "0" 11-138 times*/ {
+ unsigned replength = 11; /*read in the bits that indicate repeat length*/
+ replength += readBits(reader, 7);
+
+ /*repeat this value in the next lengths*/
+ for(n = 0; n < replength; ++n) {
+ if(i >= HLIT + HDIST) ERROR_BREAK(15); /*error: i is larger than the amount of codes*/
+
+ if(i < HLIT) bitlen_ll[i] = 0;
+ else bitlen_d[i - HLIT] = 0;
+ ++i;
+ }
+ } else /*if(code == INVALIDSYMBOL)*/ {
+ ERROR_BREAK(16); /*error: tried to read disallowed huffman symbol*/
+ }
+ /*check if any of the ensureBits above went out of bounds*/
+ if(reader->bp > reader->bitsize) {
+ /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
+ (10=no endcode, 11=wrong jump outside of tree)*/
+ /* TODO: revise error codes 10,11,50: the above comment is no longer valid */
+ ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
+ }
+ }
+ if(error) break;
+
+ if(bitlen_ll[256] == 0) ERROR_BREAK(64); /*the length of the end code 256 must be larger than 0*/
+
+ /*now we've finally got HLIT and HDIST, so generate the code trees, and the function is done*/
+ error = HuffmanTree_makeFromLengths(tree_ll, bitlen_ll, NUM_DEFLATE_CODE_SYMBOLS, 15);
+ if(error) break;
+ error = HuffmanTree_makeFromLengths(tree_d, bitlen_d, NUM_DISTANCE_SYMBOLS, 15);
+
+ break; /*end of error-while*/
+ }
+
+ lodepng_free(bitlen_cl);
+ lodepng_free(bitlen_ll);
+ lodepng_free(bitlen_d);
+ HuffmanTree_cleanup(&tree_cl);
+
+ return error;
+}
+
+/*inflate a block with dynamic of fixed Huffman tree. btype must be 1 or 2.*/
+static unsigned inflateHuffmanBlock(ucvector* out, LodePNGBitReader* reader,
+ unsigned btype) {
+ unsigned error = 0;
+ HuffmanTree tree_ll; /*the huffman tree for literal and length codes*/
+ HuffmanTree tree_d; /*the huffman tree for distance codes*/
+
+ HuffmanTree_init(&tree_ll);
+ HuffmanTree_init(&tree_d);
+
+ if(btype == 1) error = getTreeInflateFixed(&tree_ll, &tree_d);
+ else /*if(btype == 2)*/ error = getTreeInflateDynamic(&tree_ll, &tree_d, reader);
+
+ while(!error) /*decode all symbols until end reached, breaks at end code*/ {
+ /*code_ll is literal, length or end code*/
+ unsigned code_ll;
+ ensureBits25(reader, 20); /* up to 15 for the huffman symbol, up to 5 for the length extra bits */
+ code_ll = huffmanDecodeSymbol(reader, &tree_ll);
+ if(code_ll <= 255) /*literal symbol*/ {
+ if(!ucvector_resize(out, out->size + 1)) ERROR_BREAK(83 /*alloc fail*/);
+ out->data[out->size - 1] = (unsigned char)code_ll;
+ } else if(code_ll >= FIRST_LENGTH_CODE_INDEX && code_ll <= LAST_LENGTH_CODE_INDEX) /*length code*/ {
+ unsigned code_d, distance;
+ unsigned numextrabits_l, numextrabits_d; /*extra bits for length and distance*/
+ size_t start, backward, length;
+
+ /*part 1: get length base*/
+ length = LENGTHBASE[code_ll - FIRST_LENGTH_CODE_INDEX];
+
+ /*part 2: get extra bits and add the value of that to length*/
+ numextrabits_l = LENGTHEXTRA[code_ll - FIRST_LENGTH_CODE_INDEX];
+ if(numextrabits_l != 0) {
+ /* bits already ensured above */
+ length += readBits(reader, numextrabits_l);
+ }
+
+ /*part 3: get distance code*/
+ ensureBits32(reader, 28); /* up to 15 for the huffman symbol, up to 13 for the extra bits */
+ code_d = huffmanDecodeSymbol(reader, &tree_d);
+ if(code_d > 29) {
+ if(code_d <= 31) {
+ ERROR_BREAK(18); /*error: invalid distance code (30-31 are never used)*/
+ } else /* if(code_d == INVALIDSYMBOL) */{
+ ERROR_BREAK(16); /*error: tried to read disallowed huffman symbol*/
+ }
+ }
+ distance = DISTANCEBASE[code_d];
+
+ /*part 4: get extra bits from distance*/
+ numextrabits_d = DISTANCEEXTRA[code_d];
+ if(numextrabits_d != 0) {
+ /* bits already ensured above */
+ distance += readBits(reader, numextrabits_d);
+ }
+
+ /*part 5: fill in all the out[n] values based on the length and dist*/
+ start = out->size;
+ if(distance > start) ERROR_BREAK(52); /*too long backward distance*/
+ backward = start - distance;
+
+ if(!ucvector_resize(out, out->size + length)) ERROR_BREAK(83 /*alloc fail*/);
+ if(distance < length) {
+ size_t forward;
+ lodepng_memcpy(out->data + start, out->data + backward, distance);
+ start += distance;
+ for(forward = distance; forward < length; ++forward) {
+ out->data[start++] = out->data[backward++];
+ }
+ } else {
+ lodepng_memcpy(out->data + start, out->data + backward, length);
+ }
+ } else if(code_ll == 256) {
+ break; /*end code, break the loop*/
+ } else /*if(code_ll == INVALIDSYMBOL)*/ {
+ ERROR_BREAK(16); /*error: tried to read disallowed huffman symbol*/
+ }
+ /*check if any of the ensureBits above went out of bounds*/
+ if(reader->bp > reader->bitsize) {
+ /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
+ (10=no endcode, 11=wrong jump outside of tree)*/
+ /* TODO: revise error codes 10,11,50: the above comment is no longer valid */
+ ERROR_BREAK(51); /*error, bit pointer jumps past memory*/
+ }
+ }
+
+ HuffmanTree_cleanup(&tree_ll);
+ HuffmanTree_cleanup(&tree_d);
+
+ return error;
+}
+
+static unsigned inflateNoCompression(ucvector* out, LodePNGBitReader* reader,
+ const LodePNGDecompressSettings* settings) {
+ size_t bytepos;
+ size_t size = reader->size;
+ unsigned LEN, NLEN, error = 0;
+
+ /*go to first boundary of byte*/
+ bytepos = (reader->bp + 7u) >> 3u;
+
+ /*read LEN (2 bytes) and NLEN (2 bytes)*/
+ if(bytepos + 4 >= size) return 52; /*error, bit pointer will jump past memory*/
+ LEN = (unsigned)reader->data[bytepos] + ((unsigned)reader->data[bytepos + 1] << 8u); bytepos += 2;
+ NLEN = (unsigned)reader->data[bytepos] + ((unsigned)reader->data[bytepos + 1] << 8u); bytepos += 2;
+
+ /*check if 16-bit NLEN is really the one's complement of LEN*/
+ if(!settings->ignore_nlen && LEN + NLEN != 65535) {
+ return 21; /*error: NLEN is not one's complement of LEN*/
+ }
+
+ if(!ucvector_resize(out, out->size + LEN)) return 83; /*alloc fail*/
+
+ /*read the literal data: LEN bytes are now stored in the out buffer*/
+ if(bytepos + LEN > size) return 23; /*error: reading outside of in buffer*/
+
+ lodepng_memcpy(out->data + out->size - LEN, reader->data + bytepos, LEN);
+ bytepos += LEN;
+
+ reader->bp = bytepos << 3u;
+
+ return error;
+}
+
+static unsigned lodepng_inflatev(ucvector* out,
+ const unsigned char* in, size_t insize,
+ const LodePNGDecompressSettings* settings) {
+ unsigned BFINAL = 0;
+ LodePNGBitReader reader;
+ unsigned error = LodePNGBitReader_init(&reader, in, insize);
+
+ if(error) return error;
+
+ while(!BFINAL) {
+ unsigned BTYPE;
+ if(!ensureBits9(&reader, 3)) return 52; /*error, bit pointer will jump past memory*/
+ BFINAL = readBits(&reader, 1);
+ BTYPE = readBits(&reader, 2);
+
+ if(BTYPE == 3) return 20; /*error: invalid BTYPE*/
+ else if(BTYPE == 0) error = inflateNoCompression(out, &reader, settings); /*no compression*/
+ else error = inflateHuffmanBlock(out, &reader, BTYPE); /*compression, BTYPE 01 or 10*/
+
+ if(error) return error;
+ }
+
+ return error;
+}
+
+unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
+ const unsigned char* in, size_t insize,
+ const LodePNGDecompressSettings* settings) {
+ ucvector v = ucvector_init(*out, *outsize);
+ unsigned error = lodepng_inflatev(&v, in, insize, settings);
+ *out = v.data;
+ *outsize = v.size;
+ return error;
+}
+
+static unsigned inflatev(ucvector* out, const unsigned char* in, size_t insize,
+ const LodePNGDecompressSettings* settings) {
+ if(settings->custom_inflate) {
+ unsigned error = settings->custom_inflate(&out->data, &out->size, in, insize, settings);
+ out->allocsize = out->size;
+ return error;
+ } else {
+ return lodepng_inflatev(out, in, insize, settings);
+ }
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Deflator (Compressor) / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+static const size_t MAX_SUPPORTED_DEFLATE_LENGTH = 258;
+
+/*search the index in the array, that has the largest value smaller than or equal to the given value,
+given array must be sorted (if no value is smaller, it returns the size of the given array)*/
+static size_t searchCodeIndex(const unsigned* array, size_t array_size, size_t value) {
+ /*binary search (only small gain over linear). TODO: use CPU log2 instruction for getting symbols instead*/
+ size_t left = 1;
+ size_t right = array_size - 1;
+
+ while(left <= right) {
+ size_t mid = (left + right) >> 1;
+ if(array[mid] >= value) right = mid - 1;
+ else left = mid + 1;
+ }
+ if(left >= array_size || array[left] > value) left--;
+ return left;
+}
+
+static void addLengthDistance(uivector* values, size_t length, size_t distance) {
+ /*values in encoded vector are those used by deflate:
+ 0-255: literal bytes
+ 256: end
+ 257-285: length/distance pair (length code, followed by extra length bits, distance code, extra distance bits)
+ 286-287: invalid*/
+
+ unsigned length_code = (unsigned)searchCodeIndex(LENGTHBASE, 29, length);
+ unsigned extra_length = (unsigned)(length - LENGTHBASE[length_code]);
+ unsigned dist_code = (unsigned)searchCodeIndex(DISTANCEBASE, 30, distance);
+ unsigned extra_distance = (unsigned)(distance - DISTANCEBASE[dist_code]);
+
+ size_t pos = values->size;
+ /*TODO: return error when this fails (out of memory)*/
+ unsigned ok = uivector_resize(values, values->size + 4);
+ if(ok) {
+ values->data[pos + 0] = length_code + FIRST_LENGTH_CODE_INDEX;
+ values->data[pos + 1] = extra_length;
+ values->data[pos + 2] = dist_code;
+ values->data[pos + 3] = extra_distance;
+ }
+}
+
+/*3 bytes of data get encoded into two bytes. The hash cannot use more than 3
+bytes as input because 3 is the minimum match length for deflate*/
+static const unsigned HASH_NUM_VALUES = 65536;
+static const unsigned HASH_BIT_MASK = 65535; /*HASH_NUM_VALUES - 1, but C90 does not like that as initializer*/
+
+typedef struct Hash {
+ int* head; /*hash value to head circular pos - can be outdated if went around window*/
+ /*circular pos to prev circular pos*/
+ unsigned short* chain;
+ int* val; /*circular pos to hash value*/
+
+ /*TODO: do this not only for zeros but for any repeated byte. However for PNG
+ it's always going to be the zeros that dominate, so not important for PNG*/
+ int* headz; /*similar to head, but for chainz*/
+ unsigned short* chainz; /*those with same amount of zeros*/
+ unsigned short* zeros; /*length of zeros streak, used as a second hash chain*/
+} Hash;
+
+static unsigned hash_init(Hash* hash, unsigned windowsize) {
+ unsigned i;
+ hash->head = (int*)lodepng_malloc(sizeof(int) * HASH_NUM_VALUES);
+ hash->val = (int*)lodepng_malloc(sizeof(int) * windowsize);
+ hash->chain = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
+
+ hash->zeros = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
+ hash->headz = (int*)lodepng_malloc(sizeof(int) * (MAX_SUPPORTED_DEFLATE_LENGTH + 1));
+ hash->chainz = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
+
+ if(!hash->head || !hash->chain || !hash->val || !hash->headz|| !hash->chainz || !hash->zeros) {
+ return 83; /*alloc fail*/
+ }
+
+ /*initialize hash table*/
+ for(i = 0; i != HASH_NUM_VALUES; ++i) hash->head[i] = -1;
+ for(i = 0; i != windowsize; ++i) hash->val[i] = -1;
+ for(i = 0; i != windowsize; ++i) hash->chain[i] = i; /*same value as index indicates uninitialized*/
+
+ for(i = 0; i <= MAX_SUPPORTED_DEFLATE_LENGTH; ++i) hash->headz[i] = -1;
+ for(i = 0; i != windowsize; ++i) hash->chainz[i] = i; /*same value as index indicates uninitialized*/
+
+ return 0;
+}
+
+static void hash_cleanup(Hash* hash) {
+ lodepng_free(hash->head);
+ lodepng_free(hash->val);
+ lodepng_free(hash->chain);
+
+ lodepng_free(hash->zeros);
+ lodepng_free(hash->headz);
+ lodepng_free(hash->chainz);
+}
+
+
+
+static unsigned getHash(const unsigned char* data, size_t size, size_t pos) {
+ unsigned result = 0;
+ if(pos + 2 < size) {
+ /*A simple shift and xor hash is used. Since the data of PNGs is dominated
+ by zeroes due to the filters, a better hash does not have a significant
+ effect on speed in traversing the chain, and causes more time spend on
+ calculating the hash.*/
+ result ^= ((unsigned)data[pos + 0] << 0u);
+ result ^= ((unsigned)data[pos + 1] << 4u);
+ result ^= ((unsigned)data[pos + 2] << 8u);
+ } else {
+ size_t amount, i;
+ if(pos >= size) return 0;
+ amount = size - pos;
+ for(i = 0; i != amount; ++i) result ^= ((unsigned)data[pos + i] << (i * 8u));
+ }
+ return result & HASH_BIT_MASK;
+}
+
+static unsigned countZeros(const unsigned char* data, size_t size, size_t pos) {
+ const unsigned char* start = data + pos;
+ const unsigned char* end = start + MAX_SUPPORTED_DEFLATE_LENGTH;
+ if(end > data + size) end = data + size;
+ data = start;
+ while(data != end && *data == 0) ++data;
+ /*subtracting two addresses returned as 32-bit number (max value is MAX_SUPPORTED_DEFLATE_LENGTH)*/
+ return (unsigned)(data - start);
+}
+
+/*wpos = pos & (windowsize - 1)*/
+static void updateHashChain(Hash* hash, size_t wpos, unsigned hashval, unsigned short numzeros) {
+ hash->val[wpos] = (int)hashval;
+ if(hash->head[hashval] != -1) hash->chain[wpos] = hash->head[hashval];
+ hash->head[hashval] = (int)wpos;
+
+ hash->zeros[wpos] = numzeros;
+ if(hash->headz[numzeros] != -1) hash->chainz[wpos] = hash->headz[numzeros];
+ hash->headz[numzeros] = (int)wpos;
+}
+
+/*
+LZ77-encode the data. Return value is error code. The input are raw bytes, the output
+is in the form of unsigned integers with codes representing for example literal bytes, or
+length/distance pairs.
+It uses a hash table technique to let it encode faster. When doing LZ77 encoding, a
+sliding window (of windowsize) is used, and all past bytes in that window can be used as
+the "dictionary". A brute force search through all possible distances would be slow, and
+this hash technique is one out of several ways to speed this up.
+*/
+static unsigned encodeLZ77(uivector* out, Hash* hash,
+ const unsigned char* in, size_t inpos, size_t insize, unsigned windowsize,
+ unsigned minmatch, unsigned nicematch, unsigned lazymatching) {
+ size_t pos;
+ unsigned i, error = 0;
+ /*for large window lengths, assume the user wants no compression loss. Otherwise, max hash chain length speedup.*/
+ unsigned maxchainlength = windowsize >= 8192 ? windowsize : windowsize / 8u;
+ unsigned maxlazymatch = windowsize >= 8192 ? MAX_SUPPORTED_DEFLATE_LENGTH : 64;
+
+ unsigned usezeros = 1; /*not sure if setting it to false for windowsize < 8192 is better or worse*/
+ unsigned numzeros = 0;
+
+ unsigned offset; /*the offset represents the distance in LZ77 terminology*/
+ unsigned length;
+ unsigned lazy = 0;
+ unsigned lazylength = 0, lazyoffset = 0;
+ unsigned hashval;
+ unsigned current_offset, current_length;
+ unsigned prev_offset;
+ const unsigned char *lastptr, *foreptr, *backptr;
+ unsigned hashpos;
+
+ if(windowsize == 0 || windowsize > 32768) return 60; /*error: windowsize smaller/larger than allowed*/
+ if((windowsize & (windowsize - 1)) != 0) return 90; /*error: must be power of two*/
+
+ if(nicematch > MAX_SUPPORTED_DEFLATE_LENGTH) nicematch = MAX_SUPPORTED_DEFLATE_LENGTH;
+
+ for(pos = inpos; pos < insize; ++pos) {
+ size_t wpos = pos & (windowsize - 1); /*position for in 'circular' hash buffers*/
+ unsigned chainlength = 0;
+
+ hashval = getHash(in, insize, pos);
+
+ if(usezeros && hashval == 0) {
+ if(numzeros == 0) numzeros = countZeros(in, insize, pos);
+ else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
+ } else {
+ numzeros = 0;
+ }
+
+ updateHashChain(hash, wpos, hashval, numzeros);
+
+ /*the length and offset found for the current position*/
+ length = 0;
+ offset = 0;
+
+ hashpos = hash->chain[wpos];
+
+ lastptr = &in[insize < pos + MAX_SUPPORTED_DEFLATE_LENGTH ? insize : pos + MAX_SUPPORTED_DEFLATE_LENGTH];
+
+ /*search for the longest string*/
+ prev_offset = 0;
+ for(;;) {
+ if(chainlength++ >= maxchainlength) break;
+ current_offset = (unsigned)(hashpos <= wpos ? wpos - hashpos : wpos - hashpos + windowsize);
+
+ if(current_offset < prev_offset) break; /*stop when went completely around the circular buffer*/
+ prev_offset = current_offset;
+ if(current_offset > 0) {
+ /*test the next characters*/
+ foreptr = &in[pos];
+ backptr = &in[pos - current_offset];
+
+ /*common case in PNGs is lots of zeros. Quickly skip over them as a speedup*/
+ if(numzeros >= 3) {
+ unsigned skip = hash->zeros[hashpos];
+ if(skip > numzeros) skip = numzeros;
+ backptr += skip;
+ foreptr += skip;
+ }
+
+ while(foreptr != lastptr && *backptr == *foreptr) /*maximum supported length by deflate is max length*/ {
+ ++backptr;
+ ++foreptr;
+ }
+ current_length = (unsigned)(foreptr - &in[pos]);
+
+ if(current_length > length) {
+ length = current_length; /*the longest length*/
+ offset = current_offset; /*the offset that is related to this longest length*/
+ /*jump out once a length of max length is found (speed gain). This also jumps
+ out if length is MAX_SUPPORTED_DEFLATE_LENGTH*/
+ if(current_length >= nicematch) break;
+ }
+ }
+
+ if(hashpos == hash->chain[hashpos]) break;
+
+ if(numzeros >= 3 && length > numzeros) {
+ hashpos = hash->chainz[hashpos];
+ if(hash->zeros[hashpos] != numzeros) break;
+ } else {
+ hashpos = hash->chain[hashpos];
+ /*outdated hash value, happens if particular value was not encountered in whole last window*/
+ if(hash->val[hashpos] != (int)hashval) break;
+ }
+ }
+
+ if(lazymatching) {
+ if(!lazy && length >= 3 && length <= maxlazymatch && length < MAX_SUPPORTED_DEFLATE_LENGTH) {
+ lazy = 1;
+ lazylength = length;
+ lazyoffset = offset;
+ continue; /*try the next byte*/
+ }
+ if(lazy) {
+ lazy = 0;
+ if(pos == 0) ERROR_BREAK(81);
+ if(length > lazylength + 1) {
+ /*push the previous character as literal*/
+ if(!uivector_push_back(out, in[pos - 1])) ERROR_BREAK(83 /*alloc fail*/);
+ } else {
+ length = lazylength;
+ offset = lazyoffset;
+ hash->head[hashval] = -1; /*the same hashchain update will be done, this ensures no wrong alteration*/
+ hash->headz[numzeros] = -1; /*idem*/
+ --pos;
+ }
+ }
+ }
+ if(length >= 3 && offset > windowsize) ERROR_BREAK(86 /*too big (or overflown negative) offset*/);
+
+ /*encode it as length/distance pair or literal value*/
+ if(length < 3) /*only lengths of 3 or higher are supported as length/distance pair*/ {
+ if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
+ } else if(length < minmatch || (length == 3 && offset > 4096)) {
+ /*compensate for the fact that longer offsets have more extra bits, a
+ length of only 3 may be not worth it then*/
+ if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
+ } else {
+ addLengthDistance(out, length, offset);
+ for(i = 1; i < length; ++i) {
+ ++pos;
+ wpos = pos & (windowsize - 1);
+ hashval = getHash(in, insize, pos);
+ if(usezeros && hashval == 0) {
+ if(numzeros == 0) numzeros = countZeros(in, insize, pos);
+ else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
+ } else {
+ numzeros = 0;
+ }
+ updateHashChain(hash, wpos, hashval, numzeros);
+ }
+ }
+ } /*end of the loop through each character of input*/
+
+ return error;
+}
+
+/* /////////////////////////////////////////////////////////////////////////// */
+
+static unsigned deflateNoCompression(ucvector* out, const unsigned char* data, size_t datasize) {
+ /*non compressed deflate block data: 1 bit BFINAL,2 bits BTYPE,(5 bits): it jumps to start of next byte,
+ 2 bytes LEN, 2 bytes NLEN, LEN bytes literal DATA*/
+
+ size_t i, numdeflateblocks = (datasize + 65534u) / 65535u;
+ unsigned datapos = 0;
+ for(i = 0; i != numdeflateblocks; ++i) {
+ unsigned BFINAL, BTYPE, LEN, NLEN;
+ unsigned char firstbyte;
+ size_t pos = out->size;
+
+ BFINAL = (i == numdeflateblocks - 1);
+ BTYPE = 0;
+
+ LEN = 65535;
+ if(datasize - datapos < 65535u) LEN = (unsigned)datasize - datapos;
+ NLEN = 65535 - LEN;
+
+ if(!ucvector_resize(out, out->size + LEN + 5)) return 83; /*alloc fail*/
+
+ firstbyte = (unsigned char)(BFINAL + ((BTYPE & 1u) << 1u) + ((BTYPE & 2u) << 1u));
+ out->data[pos + 0] = firstbyte;
+ out->data[pos + 1] = (unsigned char)(LEN & 255);
+ out->data[pos + 2] = (unsigned char)(LEN >> 8u);
+ out->data[pos + 3] = (unsigned char)(NLEN & 255);
+ out->data[pos + 4] = (unsigned char)(NLEN >> 8u);
+ lodepng_memcpy(out->data + pos + 5, data + datapos, LEN);
+ datapos += LEN;
+ }
+
+ return 0;
+}
+
+/*
+write the lz77-encoded data, which has lit, len and dist codes, to compressed stream using huffman trees.
+tree_ll: the tree for lit and len codes.
+tree_d: the tree for distance codes.
+*/
+static void writeLZ77data(LodePNGBitWriter* writer, const uivector* lz77_encoded,
+ const HuffmanTree* tree_ll, const HuffmanTree* tree_d) {
+ size_t i = 0;
+ for(i = 0; i != lz77_encoded->size; ++i) {
+ unsigned val = lz77_encoded->data[i];
+ writeBitsReversed(writer, tree_ll->codes[val], tree_ll->lengths[val]);
+ if(val > 256) /*for a length code, 3 more things have to be added*/ {
+ unsigned length_index = val - FIRST_LENGTH_CODE_INDEX;
+ unsigned n_length_extra_bits = LENGTHEXTRA[length_index];
+ unsigned length_extra_bits = lz77_encoded->data[++i];
+
+ unsigned distance_code = lz77_encoded->data[++i];
+
+ unsigned distance_index = distance_code;
+ unsigned n_distance_extra_bits = DISTANCEEXTRA[distance_index];
+ unsigned distance_extra_bits = lz77_encoded->data[++i];
+
+ writeBits(writer, length_extra_bits, n_length_extra_bits);
+ writeBitsReversed(writer, tree_d->codes[distance_code], tree_d->lengths[distance_code]);
+ writeBits(writer, distance_extra_bits, n_distance_extra_bits);
+ }
+ }
+}
+
+/*Deflate for a block of type "dynamic", that is, with freely, optimally, created huffman trees*/
+static unsigned deflateDynamic(LodePNGBitWriter* writer, Hash* hash,
+ const unsigned char* data, size_t datapos, size_t dataend,
+ const LodePNGCompressSettings* settings, unsigned final) {
+ unsigned error = 0;
+
+ /*
+ A block is compressed as follows: The PNG data is lz77 encoded, resulting in
+ literal bytes and length/distance pairs. This is then huffman compressed with
+ two huffman trees. One huffman tree is used for the lit and len values ("ll"),
+ another huffman tree is used for the dist values ("d"). These two trees are
+ stored using their code lengths, and to compress even more these code lengths
+ are also run-length encoded and huffman compressed. This gives a huffman tree
+ of code lengths "cl". The code lengths used to describe this third tree are
+ the code length code lengths ("clcl").
+ */
+
+ /*The lz77 encoded data, represented with integers since there will also be length and distance codes in it*/
+ uivector lz77_encoded;
+ HuffmanTree tree_ll; /*tree for lit,len values*/
+ HuffmanTree tree_d; /*tree for distance codes*/
+ HuffmanTree tree_cl; /*tree for encoding the code lengths representing tree_ll and tree_d*/
+ unsigned* frequencies_ll = 0; /*frequency of lit,len codes*/
+ unsigned* frequencies_d = 0; /*frequency of dist codes*/
+ unsigned* frequencies_cl = 0; /*frequency of code length codes*/
+ unsigned* bitlen_lld = 0; /*lit,len,dist code lengths (int bits), literally (without repeat codes).*/
+ unsigned* bitlen_lld_e = 0; /*bitlen_lld encoded with repeat codes (this is a rudimentary run length compression)*/
+ size_t datasize = dataend - datapos;
+
+ /*
+ If we could call "bitlen_cl" the the code length code lengths ("clcl"), that is the bit lengths of codes to represent
+ tree_cl in CLCL_ORDER, then due to the huffman compression of huffman tree representations ("two levels"), there are
+ some analogies:
+ bitlen_lld is to tree_cl what data is to tree_ll and tree_d.
+ bitlen_lld_e is to bitlen_lld what lz77_encoded is to data.
+ bitlen_cl is to bitlen_lld_e what bitlen_lld is to lz77_encoded.
+ */
+
+ unsigned BFINAL = final;
+ size_t i;
+ size_t numcodes_ll, numcodes_d, numcodes_lld, numcodes_lld_e, numcodes_cl;
+ unsigned HLIT, HDIST, HCLEN;
+
+ uivector_init(&lz77_encoded);
+ HuffmanTree_init(&tree_ll);
+ HuffmanTree_init(&tree_d);
+ HuffmanTree_init(&tree_cl);
+ /* could fit on stack, but >1KB is on the larger side so allocate instead */
+ frequencies_ll = (unsigned*)lodepng_malloc(286 * sizeof(*frequencies_ll));
+ frequencies_d = (unsigned*)lodepng_malloc(30 * sizeof(*frequencies_d));
+ frequencies_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(*frequencies_cl));
+
+ if(!frequencies_ll || !frequencies_d || !frequencies_cl) error = 83; /*alloc fail*/
+
+ /*This while loop never loops due to a break at the end, it is here to
+ allow breaking out of it to the cleanup phase on error conditions.*/
+ while(!error) {
+ lodepng_memset(frequencies_ll, 0, 286 * sizeof(*frequencies_ll));
+ lodepng_memset(frequencies_d, 0, 30 * sizeof(*frequencies_d));
+ lodepng_memset(frequencies_cl, 0, NUM_CODE_LENGTH_CODES * sizeof(*frequencies_cl));
+
+ if(settings->use_lz77) {
+ error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
+ settings->minmatch, settings->nicematch, settings->lazymatching);
+ if(error) break;
+ } else {
+ if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83 /*alloc fail*/);
+ for(i = datapos; i < dataend; ++i) lz77_encoded.data[i - datapos] = data[i]; /*no LZ77, but still will be Huffman compressed*/
+ }
+
+ /*Count the frequencies of lit, len and dist codes*/
+ for(i = 0; i != lz77_encoded.size; ++i) {
+ unsigned symbol = lz77_encoded.data[i];
+ ++frequencies_ll[symbol];
+ if(symbol > 256) {
+ unsigned dist = lz77_encoded.data[i + 2];
+ ++frequencies_d[dist];
+ i += 3;
+ }
+ }
+ frequencies_ll[256] = 1; /*there will be exactly 1 end code, at the end of the block*/
+
+ /*Make both huffman trees, one for the lit and len codes, one for the dist codes*/
+ error = HuffmanTree_makeFromFrequencies(&tree_ll, frequencies_ll, 257, 286, 15);
+ if(error) break;
+ /*2, not 1, is chosen for mincodes: some buggy PNG decoders require at least 2 symbols in the dist tree*/
+ error = HuffmanTree_makeFromFrequencies(&tree_d, frequencies_d, 2, 30, 15);
+ if(error) break;
+
+ numcodes_ll = LODEPNG_MIN(tree_ll.numcodes, 286);
+ numcodes_d = LODEPNG_MIN(tree_d.numcodes, 30);
+ /*store the code lengths of both generated trees in bitlen_lld*/
+ numcodes_lld = numcodes_ll + numcodes_d;
+ bitlen_lld = (unsigned*)lodepng_malloc(numcodes_lld * sizeof(*bitlen_lld));
+ /*numcodes_lld_e never needs more size than bitlen_lld*/
+ bitlen_lld_e = (unsigned*)lodepng_malloc(numcodes_lld * sizeof(*bitlen_lld_e));
+ if(!bitlen_lld || !bitlen_lld_e) ERROR_BREAK(83); /*alloc fail*/
+ numcodes_lld_e = 0;
+
+ for(i = 0; i != numcodes_ll; ++i) bitlen_lld[i] = tree_ll.lengths[i];
+ for(i = 0; i != numcodes_d; ++i) bitlen_lld[numcodes_ll + i] = tree_d.lengths[i];
+
+ /*run-length compress bitlen_ldd into bitlen_lld_e by using repeat codes 16 (copy length 3-6 times),
+ 17 (3-10 zeroes), 18 (11-138 zeroes)*/
+ for(i = 0; i != numcodes_lld; ++i) {
+ unsigned j = 0; /*amount of repetitions*/
+ while(i + j + 1 < numcodes_lld && bitlen_lld[i + j + 1] == bitlen_lld[i]) ++j;
+
+ if(bitlen_lld[i] == 0 && j >= 2) /*repeat code for zeroes*/ {
+ ++j; /*include the first zero*/
+ if(j <= 10) /*repeat code 17 supports max 10 zeroes*/ {
+ bitlen_lld_e[numcodes_lld_e++] = 17;
+ bitlen_lld_e[numcodes_lld_e++] = j - 3;
+ } else /*repeat code 18 supports max 138 zeroes*/ {
+ if(j > 138) j = 138;
+ bitlen_lld_e[numcodes_lld_e++] = 18;
+ bitlen_lld_e[numcodes_lld_e++] = j - 11;
+ }
+ i += (j - 1);
+ } else if(j >= 3) /*repeat code for value other than zero*/ {
+ size_t k;
+ unsigned num = j / 6u, rest = j % 6u;
+ bitlen_lld_e[numcodes_lld_e++] = bitlen_lld[i];
+ for(k = 0; k < num; ++k) {
+ bitlen_lld_e[numcodes_lld_e++] = 16;
+ bitlen_lld_e[numcodes_lld_e++] = 6 - 3;
+ }
+ if(rest >= 3) {
+ bitlen_lld_e[numcodes_lld_e++] = 16;
+ bitlen_lld_e[numcodes_lld_e++] = rest - 3;
+ }
+ else j -= rest;
+ i += j;
+ } else /*too short to benefit from repeat code*/ {
+ bitlen_lld_e[numcodes_lld_e++] = bitlen_lld[i];
+ }
+ }
+
+ /*generate tree_cl, the huffmantree of huffmantrees*/
+ for(i = 0; i != numcodes_lld_e; ++i) {
+ ++frequencies_cl[bitlen_lld_e[i]];
+ /*after a repeat code come the bits that specify the number of repetitions,
+ those don't need to be in the frequencies_cl calculation*/
+ if(bitlen_lld_e[i] >= 16) ++i;
+ }
+
+ error = HuffmanTree_makeFromFrequencies(&tree_cl, frequencies_cl,
+ NUM_CODE_LENGTH_CODES, NUM_CODE_LENGTH_CODES, 7);
+ if(error) break;
+
+ /*compute amount of code-length-code-lengths to output*/
+ numcodes_cl = NUM_CODE_LENGTH_CODES;
+ /*trim zeros at the end (using CLCL_ORDER), but minimum size must be 4 (see HCLEN below)*/
+ while(numcodes_cl > 4u && tree_cl.lengths[CLCL_ORDER[numcodes_cl - 1u]] == 0) {
+ numcodes_cl--;
+ }
+
+ /*
+ Write everything into the output
+
+ After the BFINAL and BTYPE, the dynamic block consists out of the following:
+ - 5 bits HLIT, 5 bits HDIST, 4 bits HCLEN
+ - (HCLEN+4)*3 bits code lengths of code length alphabet
+ - HLIT + 257 code lengths of lit/length alphabet (encoded using the code length
+ alphabet, + possible repetition codes 16, 17, 18)
+ - HDIST + 1 code lengths of distance alphabet (encoded using the code length
+ alphabet, + possible repetition codes 16, 17, 18)
+ - compressed data
+ - 256 (end code)
+ */
+
+ /*Write block type*/
+ writeBits(writer, BFINAL, 1);
+ writeBits(writer, 0, 1); /*first bit of BTYPE "dynamic"*/
+ writeBits(writer, 1, 1); /*second bit of BTYPE "dynamic"*/
+
+ /*write the HLIT, HDIST and HCLEN values*/
+ /*all three sizes take trimmed ending zeroes into account, done either by HuffmanTree_makeFromFrequencies
+ or in the loop for numcodes_cl above, which saves space. */
+ HLIT = (unsigned)(numcodes_ll - 257);
+ HDIST = (unsigned)(numcodes_d - 1);
+ HCLEN = (unsigned)(numcodes_cl - 4);
+ writeBits(writer, HLIT, 5);
+ writeBits(writer, HDIST, 5);
+ writeBits(writer, HCLEN, 4);
+
+ /*write the code lengths of the code length alphabet ("bitlen_cl")*/
+ for(i = 0; i != numcodes_cl; ++i) writeBits(writer, tree_cl.lengths[CLCL_ORDER[i]], 3);
+
+ /*write the lengths of the lit/len AND the dist alphabet*/
+ for(i = 0; i != numcodes_lld_e; ++i) {
+ writeBitsReversed(writer, tree_cl.codes[bitlen_lld_e[i]], tree_cl.lengths[bitlen_lld_e[i]]);
+ /*extra bits of repeat codes*/
+ if(bitlen_lld_e[i] == 16) writeBits(writer, bitlen_lld_e[++i], 2);
+ else if(bitlen_lld_e[i] == 17) writeBits(writer, bitlen_lld_e[++i], 3);
+ else if(bitlen_lld_e[i] == 18) writeBits(writer, bitlen_lld_e[++i], 7);
+ }
+
+ /*write the compressed data symbols*/
+ writeLZ77data(writer, &lz77_encoded, &tree_ll, &tree_d);
+ /*error: the length of the end code 256 must be larger than 0*/
+ if(tree_ll.lengths[256] == 0) ERROR_BREAK(64);
+
+ /*write the end code*/
+ writeBitsReversed(writer, tree_ll.codes[256], tree_ll.lengths[256]);
+
+ break; /*end of error-while*/
+ }
+
+ /*cleanup*/
+ uivector_cleanup(&lz77_encoded);
+ HuffmanTree_cleanup(&tree_ll);
+ HuffmanTree_cleanup(&tree_d);
+ HuffmanTree_cleanup(&tree_cl);
+ lodepng_free(frequencies_ll);
+ lodepng_free(frequencies_d);
+ lodepng_free(frequencies_cl);
+ lodepng_free(bitlen_lld);
+ lodepng_free(bitlen_lld_e);
+
+ return error;
+}
+
+static unsigned deflateFixed(LodePNGBitWriter* writer, Hash* hash,
+ const unsigned char* data,
+ size_t datapos, size_t dataend,
+ const LodePNGCompressSettings* settings, unsigned final) {
+ HuffmanTree tree_ll; /*tree for literal values and length codes*/
+ HuffmanTree tree_d; /*tree for distance codes*/
+
+ unsigned BFINAL = final;
+ unsigned error = 0;
+ size_t i;
+
+ HuffmanTree_init(&tree_ll);
+ HuffmanTree_init(&tree_d);
+
+ error = generateFixedLitLenTree(&tree_ll);
+ if(!error) error = generateFixedDistanceTree(&tree_d);
+
+ if(!error) {
+ writeBits(writer, BFINAL, 1);
+ writeBits(writer, 1, 1); /*first bit of BTYPE*/
+ writeBits(writer, 0, 1); /*second bit of BTYPE*/
+
+ if(settings->use_lz77) /*LZ77 encoded*/ {
+ uivector lz77_encoded;
+ uivector_init(&lz77_encoded);
+ error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
+ settings->minmatch, settings->nicematch, settings->lazymatching);
+ if(!error) writeLZ77data(writer, &lz77_encoded, &tree_ll, &tree_d);
+ uivector_cleanup(&lz77_encoded);
+ } else /*no LZ77, but still will be Huffman compressed*/ {
+ for(i = datapos; i < dataend; ++i) {
+ writeBitsReversed(writer, tree_ll.codes[data[i]], tree_ll.lengths[data[i]]);
+ }
+ }
+ /*add END code*/
+ if(!error) writeBitsReversed(writer,tree_ll.codes[256], tree_ll.lengths[256]);
+ }
+
+ /*cleanup*/
+ HuffmanTree_cleanup(&tree_ll);
+ HuffmanTree_cleanup(&tree_d);
+
+ return error;
+}
+
+static unsigned lodepng_deflatev(ucvector* out, const unsigned char* in, size_t insize,
+ const LodePNGCompressSettings* settings) {
+ unsigned error = 0;
+ size_t i, blocksize, numdeflateblocks;
+ Hash hash;
+ LodePNGBitWriter writer;
+
+ LodePNGBitWriter_init(&writer, out);
+
+ if(settings->btype > 2) return 61;
+ else if(settings->btype == 0) return deflateNoCompression(out, in, insize);
+ else if(settings->btype == 1) blocksize = insize;
+ else /*if(settings->btype == 2)*/ {
+ /*on PNGs, deflate blocks of 65-262k seem to give most dense encoding*/
+ blocksize = insize / 8u + 8;
+ if(blocksize < 65536) blocksize = 65536;
+ if(blocksize > 262144) blocksize = 262144;
+ }
+
+ numdeflateblocks = (insize + blocksize - 1) / blocksize;
+ if(numdeflateblocks == 0) numdeflateblocks = 1;
+
+ error = hash_init(&hash, settings->windowsize);
+
+ if(!error) {
+ for(i = 0; i != numdeflateblocks && !error; ++i) {
+ unsigned final = (i == numdeflateblocks - 1);
+ size_t start = i * blocksize;
+ size_t end = start + blocksize;
+ if(end > insize) end = insize;
+
+ if(settings->btype == 1) error = deflateFixed(&writer, &hash, in, start, end, settings, final);
+ else if(settings->btype == 2) error = deflateDynamic(&writer, &hash, in, start, end, settings, final);
+ }
+ }
+
+ hash_cleanup(&hash);
+
+ return error;
+}
+
+unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
+ const unsigned char* in, size_t insize,
+ const LodePNGCompressSettings* settings) {
+ ucvector v = ucvector_init(*out, *outsize);
+ unsigned error = lodepng_deflatev(&v, in, insize, settings);
+ *out = v.data;
+ *outsize = v.size;
+ return error;
+}
+
+static unsigned deflate(unsigned char** out, size_t* outsize,
+ const unsigned char* in, size_t insize,
+ const LodePNGCompressSettings* settings) {
+ if(settings->custom_deflate) {
+ return settings->custom_deflate(out, outsize, in, insize, settings);
+ } else {
+ return lodepng_deflate(out, outsize, in, insize, settings);
+ }
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Adler32 / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+static unsigned update_adler32(unsigned adler, const unsigned char* data, unsigned len) {
+ unsigned s1 = adler & 0xffffu;
+ unsigned s2 = (adler >> 16u) & 0xffffu;
+
+ while(len != 0u) {
+ unsigned i;
+ /*at least 5552 sums can be done before the sums overflow, saving a lot of module divisions*/
+ unsigned amount = len > 5552u ? 5552u : len;
+ len -= amount;
+ for(i = 0; i != amount; ++i) {
+ s1 += (*data++);
+ s2 += s1;
+ }
+ s1 %= 65521u;
+ s2 %= 65521u;
+ }
+
+ return (s2 << 16u) | s1;
+}
+
+/*Return the adler32 of the bytes data[0..len-1]*/
+static unsigned adler32(const unsigned char* data, unsigned len) {
+ return update_adler32(1u, data, len);
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Zlib / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+static unsigned lodepng_zlib_decompressv(ucvector* out,
+ const unsigned char* in, size_t insize,
+ const LodePNGDecompressSettings* settings) {
+ unsigned error = 0;
+ unsigned CM, CINFO, FDICT;
+
+ if(insize < 2) return 53; /*error, size of zlib data too small*/
+ /*read information from zlib header*/
+ if((in[0] * 256 + in[1]) % 31 != 0) {
+ /*error: 256 * in[0] + in[1] must be a multiple of 31, the FCHECK value is supposed to be made that way*/
+ return 24;
+ }
+
+ CM = in[0] & 15;
+ CINFO = (in[0] >> 4) & 15;
+ /*FCHECK = in[1] & 31;*/ /*FCHECK is already tested above*/
+ FDICT = (in[1] >> 5) & 1;
+ /*FLEVEL = (in[1] >> 6) & 3;*/ /*FLEVEL is not used here*/
+
+ if(CM != 8 || CINFO > 7) {
+ /*error: only compression method 8: inflate with sliding window of 32k is supported by the PNG spec*/
+ return 25;
+ }
+ if(FDICT != 0) {
+ /*error: the specification of PNG says about the zlib stream:
+ "The additional flags shall not specify a preset dictionary."*/
+ return 26;
+ }
+
+ error = inflatev(out, in + 2, insize - 2, settings);
+ if(error) return error;
+
+ if(!settings->ignore_adler32) {
+ unsigned ADLER32 = lodepng_read32bitInt(&in[insize - 4]);
+ unsigned checksum = adler32(out->data, (unsigned)(out->size));
+ if(checksum != ADLER32) return 58; /*error, adler checksum not correct, data must be corrupted*/
+ }
+
+ return 0; /*no error*/
+}
+
+
+unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
+ size_t insize, const LodePNGDecompressSettings* settings) {
+ ucvector v = ucvector_init(*out, *outsize);
+ unsigned error = lodepng_zlib_decompressv(&v, in, insize, settings);
+ *out = v.data;
+ *outsize = v.size;
+ return error;
+}
+
+/*expected_size is expected output size, to avoid intermediate allocations. Set to 0 if not known. */
+static unsigned zlib_decompress(unsigned char** out, size_t* outsize, size_t expected_size,
+ const unsigned char* in, size_t insize, const LodePNGDecompressSettings* settings) {
+ if(settings->custom_zlib) {
+ return settings->custom_zlib(out, outsize, in, insize, settings);
+ } else {
+ unsigned error;
+ ucvector v = ucvector_init(*out, *outsize);
+ if(expected_size) {
+ /*reserve the memory to avoid intermediate reallocations*/
+ ucvector_resize(&v, *outsize + expected_size);
+ v.size = *outsize;
+ }
+ error = lodepng_zlib_decompressv(&v, in, insize, settings);
+ *out = v.data;
+ *outsize = v.size;
+ return error;
+ }
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
+ size_t insize, const LodePNGCompressSettings* settings) {
+ size_t i;
+ unsigned error;
+ unsigned char* deflatedata = 0;
+ size_t deflatesize = 0;
+
+ error = deflate(&deflatedata, &deflatesize, in, insize, settings);
+
+ *out = NULL;
+ *outsize = 0;
+ if(!error) {
+ *outsize = deflatesize + 6;
+ *out = (unsigned char*)lodepng_malloc(*outsize);
+ if(!*out) error = 83; /*alloc fail*/
+ }
+
+ if(!error) {
+ unsigned ADLER32 = adler32(in, (unsigned)insize);
+ /*zlib data: 1 byte CMF (CM+CINFO), 1 byte FLG, deflate data, 4 byte ADLER32 checksum of the Decompressed data*/
+ unsigned CMF = 120; /*0b01111000: CM 8, CINFO 7. With CINFO 7, any window size up to 32768 can be used.*/
+ unsigned FLEVEL = 0;
+ unsigned FDICT = 0;
+ unsigned CMFFLG = 256 * CMF + FDICT * 32 + FLEVEL * 64;
+ unsigned FCHECK = 31 - CMFFLG % 31;
+ CMFFLG += FCHECK;
+
+ (*out)[0] = (unsigned char)(CMFFLG >> 8);
+ (*out)[1] = (unsigned char)(CMFFLG & 255);
+ for(i = 0; i != deflatesize; ++i) (*out)[i + 2] = deflatedata[i];
+ lodepng_set32bitInt(&(*out)[*outsize - 4], ADLER32);
+ }
+
+ lodepng_free(deflatedata);
+ return error;
+}
+
+/* compress using the default or custom zlib function */
+static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
+ size_t insize, const LodePNGCompressSettings* settings) {
+ if(settings->custom_zlib) {
+ return settings->custom_zlib(out, outsize, in, insize, settings);
+ } else {
+ return lodepng_zlib_compress(out, outsize, in, insize, settings);
+ }
+}
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#else /*no LODEPNG_COMPILE_ZLIB*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+static unsigned zlib_decompress(unsigned char** out, size_t* outsize, size_t expected_size,
+ const unsigned char* in, size_t insize, const LodePNGDecompressSettings* settings) {
+ if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
+ (void)expected_size;
+ return settings->custom_zlib(out, outsize, in, insize, settings);
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+#ifdef LODEPNG_COMPILE_ENCODER
+static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
+ size_t insize, const LodePNGCompressSettings* settings) {
+ if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
+ return settings->custom_zlib(out, outsize, in, insize, settings);
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/*this is a good tradeoff between speed and compression ratio*/
+#define DEFAULT_WINDOWSIZE 2048
+
+void lodepng_compress_settings_init(LodePNGCompressSettings* settings) {
+ /*compress with dynamic huffman tree (not in the mathematical sense, just not the predefined one)*/
+ settings->btype = 2;
+ settings->use_lz77 = 1;
+ settings->windowsize = DEFAULT_WINDOWSIZE;
+ settings->minmatch = 3;
+ settings->nicematch = 128;
+ settings->lazymatching = 1;
+
+ settings->custom_zlib = 0;
+ settings->custom_deflate = 0;
+ settings->custom_context = 0;
+}
+
+const LodePNGCompressSettings lodepng_default_compress_settings = {2, 1, DEFAULT_WINDOWSIZE, 3, 128, 1, 0, 0, 0};
+
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings) {
+ settings->ignore_adler32 = 0;
+ settings->ignore_nlen = 0;
+
+ settings->custom_zlib = 0;
+ settings->custom_inflate = 0;
+ settings->custom_context = 0;
+}
+
+const LodePNGDecompressSettings lodepng_default_decompress_settings = {0, 0, 0, 0, 0};
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // End of Zlib related code. Begin of PNG related code. // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_PNG
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / CRC32 / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+
+#ifndef LODEPNG_NO_COMPILE_CRC
+/* CRC polynomial: 0xedb88320 */
+static unsigned lodepng_crc32_table[256] = {
+ 0u, 1996959894u, 3993919788u, 2567524794u, 124634137u, 1886057615u, 3915621685u, 2657392035u,
+ 249268274u, 2044508324u, 3772115230u, 2547177864u, 162941995u, 2125561021u, 3887607047u, 2428444049u,
+ 498536548u, 1789927666u, 4089016648u, 2227061214u, 450548861u, 1843258603u, 4107580753u, 2211677639u,
+ 325883990u, 1684777152u, 4251122042u, 2321926636u, 335633487u, 1661365465u, 4195302755u, 2366115317u,
+ 997073096u, 1281953886u, 3579855332u, 2724688242u, 1006888145u, 1258607687u, 3524101629u, 2768942443u,
+ 901097722u, 1119000684u, 3686517206u, 2898065728u, 853044451u, 1172266101u, 3705015759u, 2882616665u,
+ 651767980u, 1373503546u, 3369554304u, 3218104598u, 565507253u, 1454621731u, 3485111705u, 3099436303u,
+ 671266974u, 1594198024u, 3322730930u, 2970347812u, 795835527u, 1483230225u, 3244367275u, 3060149565u,
+ 1994146192u, 31158534u, 2563907772u, 4023717930u, 1907459465u, 112637215u, 2680153253u, 3904427059u,
+ 2013776290u, 251722036u, 2517215374u, 3775830040u, 2137656763u, 141376813u, 2439277719u, 3865271297u,
+ 1802195444u, 476864866u, 2238001368u, 4066508878u, 1812370925u, 453092731u, 2181625025u, 4111451223u,
+ 1706088902u, 314042704u, 2344532202u, 4240017532u, 1658658271u, 366619977u, 2362670323u, 4224994405u,
+ 1303535960u, 984961486u, 2747007092u, 3569037538u, 1256170817u, 1037604311u, 2765210733u, 3554079995u,
+ 1131014506u, 879679996u, 2909243462u, 3663771856u, 1141124467u, 855842277u, 2852801631u, 3708648649u,
+ 1342533948u, 654459306u, 3188396048u, 3373015174u, 1466479909u, 544179635u, 3110523913u, 3462522015u,
+ 1591671054u, 702138776u, 2966460450u, 3352799412u, 1504918807u, 783551873u, 3082640443u, 3233442989u,
+ 3988292384u, 2596254646u, 62317068u, 1957810842u, 3939845945u, 2647816111u, 81470997u, 1943803523u,
+ 3814918930u, 2489596804u, 225274430u, 2053790376u, 3826175755u, 2466906013u, 167816743u, 2097651377u,
+ 4027552580u, 2265490386u, 503444072u, 1762050814u, 4150417245u, 2154129355u, 426522225u, 1852507879u,
+ 4275313526u, 2312317920u, 282753626u, 1742555852u, 4189708143u, 2394877945u, 397917763u, 1622183637u,
+ 3604390888u, 2714866558u, 953729732u, 1340076626u, 3518719985u, 2797360999u, 1068828381u, 1219638859u,
+ 3624741850u, 2936675148u, 906185462u, 1090812512u, 3747672003u, 2825379669u, 829329135u, 1181335161u,
+ 3412177804u, 3160834842u, 628085408u, 1382605366u, 3423369109u, 3138078467u, 570562233u, 1426400815u,
+ 3317316542u, 2998733608u, 733239954u, 1555261956u, 3268935591u, 3050360625u, 752459403u, 1541320221u,
+ 2607071920u, 3965973030u, 1969922972u, 40735498u, 2617837225u, 3943577151u, 1913087877u, 83908371u,
+ 2512341634u, 3803740692u, 2075208622u, 213261112u, 2463272603u, 3855990285u, 2094854071u, 198958881u,
+ 2262029012u, 4057260610u, 1759359992u, 534414190u, 2176718541u, 4139329115u, 1873836001u, 414664567u,
+ 2282248934u, 4279200368u, 1711684554u, 285281116u, 2405801727u, 4167216745u, 1634467795u, 376229701u,
+ 2685067896u, 3608007406u, 1308918612u, 956543938u, 2808555105u, 3495958263u, 1231636301u, 1047427035u,
+ 2932959818u, 3654703836u, 1088359270u, 936918000u, 2847714899u, 3736837829u, 1202900863u, 817233897u,
+ 3183342108u, 3401237130u, 1404277552u, 615818150u, 3134207493u, 3453421203u, 1423857449u, 601450431u,
+ 3009837614u, 3294710456u, 1567103746u, 711928724u, 3020668471u, 3272380065u, 1510334235u, 755167117u
+};
+
+/*Return the CRC of the bytes buf[0..len-1].*/
+unsigned lodepng_crc32(const unsigned char* data, size_t length) {
+ unsigned r = 0xffffffffu;
+ size_t i;
+ for(i = 0; i < length; ++i) {
+ r = lodepng_crc32_table[(r ^ data[i]) & 0xffu] ^ (r >> 8u);
+ }
+ return r ^ 0xffffffffu;
+}
+#else /* !LODEPNG_NO_COMPILE_CRC */
+unsigned lodepng_crc32(const unsigned char* data, size_t length);
+#endif /* !LODEPNG_NO_COMPILE_CRC */
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Reading and writing PNG color channel bits / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/* The color channel bits of less-than-8-bit pixels are read with the MSB of bytes first,
+so LodePNGBitWriter and LodePNGBitReader can't be used for those. */
+
+static unsigned char readBitFromReversedStream(size_t* bitpointer, const unsigned char* bitstream) {
+ unsigned char result = (unsigned char)((bitstream[(*bitpointer) >> 3] >> (7 - ((*bitpointer) & 0x7))) & 1);
+ ++(*bitpointer);
+ return result;
+}
+
+/* TODO: make this faster */
+static unsigned readBitsFromReversedStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits) {
+ unsigned result = 0;
+ size_t i;
+ for(i = 0 ; i < nbits; ++i) {
+ result <<= 1u;
+ result |= (unsigned)readBitFromReversedStream(bitpointer, bitstream);
+ }
+ return result;
+}
+
+static void setBitOfReversedStream(size_t* bitpointer, unsigned char* bitstream, unsigned char bit) {
+ /*the current bit in bitstream may be 0 or 1 for this to work*/
+ if(bit == 0) bitstream[(*bitpointer) >> 3u] &= (unsigned char)(~(1u << (7u - ((*bitpointer) & 7u))));
+ else bitstream[(*bitpointer) >> 3u] |= (1u << (7u - ((*bitpointer) & 7u)));
+ ++(*bitpointer);
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / PNG chunks / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+unsigned lodepng_chunk_length(const unsigned char* chunk) {
+ return lodepng_read32bitInt(&chunk[0]);
+}
+
+void lodepng_chunk_type(char type[5], const unsigned char* chunk) {
+ unsigned i;
+ for(i = 0; i != 4; ++i) type[i] = (char)chunk[4 + i];
+ type[4] = 0; /*null termination char*/
+}
+
+unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type) {
+ if(lodepng_strlen(type) != 4) return 0;
+ return (chunk[4] == type[0] && chunk[5] == type[1] && chunk[6] == type[2] && chunk[7] == type[3]);
+}
+
+unsigned char lodepng_chunk_ancillary(const unsigned char* chunk) {
+ return((chunk[4] & 32) != 0);
+}
+
+unsigned char lodepng_chunk_private(const unsigned char* chunk) {
+ return((chunk[6] & 32) != 0);
+}
+
+unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk) {
+ return((chunk[7] & 32) != 0);
+}
+
+unsigned char* lodepng_chunk_data(unsigned char* chunk) {
+ return &chunk[8];
+}
+
+const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk) {
+ return &chunk[8];
+}
+
+unsigned lodepng_chunk_check_crc(const unsigned char* chunk) {
+ unsigned length = lodepng_chunk_length(chunk);
+ unsigned CRC = lodepng_read32bitInt(&chunk[length + 8]);
+ /*the CRC is taken of the data and the 4 chunk type letters, not the length*/
+ unsigned checksum = lodepng_crc32(&chunk[4], length + 4);
+ if(CRC != checksum) return 1;
+ else return 0;
+}
+
+void lodepng_chunk_generate_crc(unsigned char* chunk) {
+ unsigned length = lodepng_chunk_length(chunk);
+ unsigned CRC = lodepng_crc32(&chunk[4], length + 4);
+ lodepng_set32bitInt(chunk + 8 + length, CRC);
+}
+
+unsigned char* lodepng_chunk_next(unsigned char* chunk, unsigned char* end) {
+ if(chunk >= end || end - chunk < 12) return end; /*too small to contain a chunk*/
+ if(chunk[0] == 0x89 && chunk[1] == 0x50 && chunk[2] == 0x4e && chunk[3] == 0x47
+ && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) {
+ /* Is PNG magic header at start of PNG file. Jump to first actual chunk. */
+ return chunk + 8;
+ } else {
+ size_t total_chunk_length;
+ unsigned char* result;
+ if(lodepng_addofl(lodepng_chunk_length(chunk), 12, &total_chunk_length)) return end;
+ result = chunk + total_chunk_length;
+ if(result < chunk) return end; /*pointer overflow*/
+ return result;
+ }
+}
+
+const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk, const unsigned char* end) {
+ if(chunk >= end || end - chunk < 12) return end; /*too small to contain a chunk*/
+ if(chunk[0] == 0x89 && chunk[1] == 0x50 && chunk[2] == 0x4e && chunk[3] == 0x47
+ && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) {
+ /* Is PNG magic header at start of PNG file. Jump to first actual chunk. */
+ return chunk + 8;
+ } else {
+ size_t total_chunk_length;
+ const unsigned char* result;
+ if(lodepng_addofl(lodepng_chunk_length(chunk), 12, &total_chunk_length)) return end;
+ result = chunk + total_chunk_length;
+ if(result < chunk) return end; /*pointer overflow*/
+ return result;
+ }
+}
+
+unsigned char* lodepng_chunk_find(unsigned char* chunk, unsigned char* end, const char type[5]) {
+ for(;;) {
+ if(chunk >= end || end - chunk < 12) return 0; /* past file end: chunk + 12 > end */
+ if(lodepng_chunk_type_equals(chunk, type)) return chunk;
+ chunk = lodepng_chunk_next(chunk, end);
+ }
+}
+
+const unsigned char* lodepng_chunk_find_const(const unsigned char* chunk, const unsigned char* end, const char type[5]) {
+ for(;;) {
+ if(chunk >= end || end - chunk < 12) return 0; /* past file end: chunk + 12 > end */
+ if(lodepng_chunk_type_equals(chunk, type)) return chunk;
+ chunk = lodepng_chunk_next_const(chunk, end);
+ }
+}
+
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outsize, const unsigned char* chunk) {
+ unsigned i;
+ size_t total_chunk_length, new_length;
+ unsigned char *chunk_start, *new_buffer;
+
+ if(lodepng_addofl(lodepng_chunk_length(chunk), 12, &total_chunk_length)) return 77;
+ if(lodepng_addofl(*outsize, total_chunk_length, &new_length)) return 77;
+
+ new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
+ if(!new_buffer) return 83; /*alloc fail*/
+ (*out) = new_buffer;
+ (*outsize) = new_length;
+ chunk_start = &(*out)[new_length - total_chunk_length];
+
+ for(i = 0; i != total_chunk_length; ++i) chunk_start[i] = chunk[i];
+
+ return 0;
+}
+
+/*Sets length and name and allocates the space for data and crc but does not
+set data or crc yet. Returns the start of the chunk in chunk. The start of
+the data is at chunk + 8. To finalize chunk, add the data, then use
+lodepng_chunk_generate_crc */
+static unsigned lodepng_chunk_init(unsigned char** chunk,
+ ucvector* out,
+ unsigned length, const char* type) {
+ size_t new_length = out->size;
+ if(lodepng_addofl(new_length, length, &new_length)) return 77;
+ if(lodepng_addofl(new_length, 12, &new_length)) return 77;
+ if(!ucvector_resize(out, new_length)) return 83; /*alloc fail*/
+ *chunk = out->data + new_length - length - 12u;
+
+ /*1: length*/
+ lodepng_set32bitInt(*chunk, length);
+
+ /*2: chunk name (4 letters)*/
+ lodepng_memcpy(*chunk + 4, type, 4);
+
+ return 0;
+}
+
+/* like lodepng_chunk_create but with custom allocsize */
+static unsigned lodepng_chunk_createv(ucvector* out,
+ unsigned length, const char* type, const unsigned char* data) {
+ unsigned char* chunk;
+ CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, length, type));
+
+ /*3: the data*/
+ lodepng_memcpy(chunk + 8, data, length);
+
+ /*4: CRC (of the chunkname characters and the data)*/
+ lodepng_chunk_generate_crc(chunk);
+
+ return 0;
+}
+
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outsize,
+ unsigned length, const char* type, const unsigned char* data) {
+ ucvector v = ucvector_init(*out, *outsize);
+ unsigned error = lodepng_chunk_createv(&v, length, type, data);
+ *out = v.data;
+ *outsize = v.size;
+ return error;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Color types, channels, bits / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*checks if the colortype is valid and the bitdepth bd is allowed for this colortype.
+Return value is a LodePNG error code.*/
+static unsigned checkColorValidity(LodePNGColorType colortype, unsigned bd) {
+ switch(colortype) {
+ case LCT_GREY: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8 || bd == 16)) return 37; break;
+ case LCT_RGB: if(!( bd == 8 || bd == 16)) return 37; break;
+ case LCT_PALETTE: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8 )) return 37; break;
+ case LCT_GREY_ALPHA: if(!( bd == 8 || bd == 16)) return 37; break;
+ case LCT_RGBA: if(!( bd == 8 || bd == 16)) return 37; break;
+ case LCT_MAX_OCTET_VALUE: return 31; /* invalid color type */
+ default: return 31; /* invalid color type */
+ }
+ return 0; /*allowed color type / bits combination*/
+}
+
+static unsigned getNumColorChannels(LodePNGColorType colortype) {
+ switch(colortype) {
+ case LCT_GREY: return 1;
+ case LCT_RGB: return 3;
+ case LCT_PALETTE: return 1;
+ case LCT_GREY_ALPHA: return 2;
+ case LCT_RGBA: return 4;
+ case LCT_MAX_OCTET_VALUE: return 0; /* invalid color type */
+ default: return 0; /*invalid color type*/
+ }
+}
+
+static unsigned lodepng_get_bpp_lct(LodePNGColorType colortype, unsigned bitdepth) {
+ /*bits per pixel is amount of channels * bits per channel*/
+ return getNumColorChannels(colortype) * bitdepth;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+void lodepng_color_mode_init(LodePNGColorMode* info) {
+ info->key_defined = 0;
+ info->key_r = info->key_g = info->key_b = 0;
+ info->colortype = LCT_RGBA;
+ info->bitdepth = 8;
+ info->palette = 0;
+ info->palettesize = 0;
+}
+
+/*allocates palette memory if needed, and initializes all colors to black*/
+static void lodepng_color_mode_alloc_palette(LodePNGColorMode* info) {
+ size_t i;
+ /*if the palette is already allocated, it will have size 1024 so no reallocation needed in that case*/
+ /*the palette must have room for up to 256 colors with 4 bytes each.*/
+ if(!info->palette) info->palette = (unsigned char*)lodepng_malloc(1024);
+ if(!info->palette) return; /*alloc fail*/
+ for(i = 0; i != 256; ++i) {
+ /*Initialize all unused colors with black, the value used for invalid palette indices.
+ This is an error according to the PNG spec, but common PNG decoders make it black instead.
+ That makes color conversion slightly faster due to no error handling needed.*/
+ info->palette[i * 4 + 0] = 0;
+ info->palette[i * 4 + 1] = 0;
+ info->palette[i * 4 + 2] = 0;
+ info->palette[i * 4 + 3] = 255;
+ }
+}
+
+void lodepng_color_mode_cleanup(LodePNGColorMode* info) {
+ lodepng_palette_clear(info);
+}
+
+unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source) {
+ lodepng_color_mode_cleanup(dest);
+ lodepng_memcpy(dest, source, sizeof(LodePNGColorMode));
+ if(source->palette) {
+ dest->palette = (unsigned char*)lodepng_malloc(1024);
+ if(!dest->palette && source->palettesize) return 83; /*alloc fail*/
+ lodepng_memcpy(dest->palette, source->palette, source->palettesize * 4);
+ }
+ return 0;
+}
+
+LodePNGColorMode lodepng_color_mode_make(LodePNGColorType colortype, unsigned bitdepth) {
+ LodePNGColorMode result;
+ lodepng_color_mode_init(&result);
+ result.colortype = colortype;
+ result.bitdepth = bitdepth;
+ return result;
+}
+
+static int lodepng_color_mode_equal(const LodePNGColorMode* a, const LodePNGColorMode* b) {
+ size_t i;
+ if(a->colortype != b->colortype) return 0;
+ if(a->bitdepth != b->bitdepth) return 0;
+ if(a->key_defined != b->key_defined) return 0;
+ if(a->key_defined) {
+ if(a->key_r != b->key_r) return 0;
+ if(a->key_g != b->key_g) return 0;
+ if(a->key_b != b->key_b) return 0;
+ }
+ if(a->palettesize != b->palettesize) return 0;
+ for(i = 0; i != a->palettesize * 4; ++i) {
+ if(a->palette[i] != b->palette[i]) return 0;
+ }
+ return 1;
+}
+
+void lodepng_palette_clear(LodePNGColorMode* info) {
+ if(info->palette) lodepng_free(info->palette);
+ info->palette = 0;
+ info->palettesize = 0;
+}
+
+unsigned lodepng_palette_add(LodePNGColorMode* info,
+ unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+ if(!info->palette) /*allocate palette if empty*/ {
+ lodepng_color_mode_alloc_palette(info);
+ if(!info->palette) return 83; /*alloc fail*/
+ }
+ if(info->palettesize >= 256) {
+ return 108; /*too many palette values*/
+ }
+ info->palette[4 * info->palettesize + 0] = r;
+ info->palette[4 * info->palettesize + 1] = g;
+ info->palette[4 * info->palettesize + 2] = b;
+ info->palette[4 * info->palettesize + 3] = a;
+ ++info->palettesize;
+ return 0;
+}
+
+/*calculate bits per pixel out of colortype and bitdepth*/
+unsigned lodepng_get_bpp(const LodePNGColorMode* info) {
+ return lodepng_get_bpp_lct(info->colortype, info->bitdepth);
+}
+
+unsigned lodepng_get_channels(const LodePNGColorMode* info) {
+ return getNumColorChannels(info->colortype);
+}
+
+unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info) {
+ return info->colortype == LCT_GREY || info->colortype == LCT_GREY_ALPHA;
+}
+
+unsigned lodepng_is_alpha_type(const LodePNGColorMode* info) {
+ return (info->colortype & 4) != 0; /*4 or 6*/
+}
+
+unsigned lodepng_is_palette_type(const LodePNGColorMode* info) {
+ return info->colortype == LCT_PALETTE;
+}
+
+unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info) {
+ size_t i;
+ for(i = 0; i != info->palettesize; ++i) {
+ if(info->palette[i * 4 + 3] < 255) return 1;
+ }
+ return 0;
+}
+
+unsigned lodepng_can_have_alpha(const LodePNGColorMode* info) {
+ return info->key_defined
+ || lodepng_is_alpha_type(info)
+ || lodepng_has_palette_alpha(info);
+}
+
+static size_t lodepng_get_raw_size_lct(unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth) {
+ size_t bpp = lodepng_get_bpp_lct(colortype, bitdepth);
+ size_t n = (size_t)w * (size_t)h;
+ return ((n / 8u) * bpp) + ((n & 7u) * bpp + 7u) / 8u;
+}
+
+size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color) {
+ return lodepng_get_raw_size_lct(w, h, color->colortype, color->bitdepth);
+}
+
+
+#ifdef LODEPNG_COMPILE_PNG
+
+/*in an idat chunk, each scanline is a multiple of 8 bits, unlike the lodepng output buffer,
+and in addition has one extra byte per line: the filter byte. So this gives a larger
+result than lodepng_get_raw_size. Set h to 1 to get the size of 1 row including filter byte. */
+static size_t lodepng_get_raw_size_idat(unsigned w, unsigned h, unsigned bpp) {
+ /* + 1 for the filter byte, and possibly plus padding bits per line. */
+ /* Ignoring casts, the expression is equal to (w * bpp + 7) / 8 + 1, but avoids overflow of w * bpp */
+ size_t line = ((size_t)(w / 8u) * bpp) + 1u + ((w & 7u) * bpp + 7u) / 8u;
+ return (size_t)h * line;
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*Safely checks whether size_t overflow can be caused due to amount of pixels.
+This check is overcautious rather than precise. If this check indicates no overflow,
+you can safely compute in a size_t (but not an unsigned):
+-(size_t)w * (size_t)h * 8
+-amount of bytes in IDAT (including filter, padding and Adam7 bytes)
+-amount of bytes in raw color model
+Returns 1 if overflow possible, 0 if not.
+*/
+static int lodepng_pixel_overflow(unsigned w, unsigned h,
+ const LodePNGColorMode* pngcolor, const LodePNGColorMode* rawcolor) {
+ size_t bpp = LODEPNG_MAX(lodepng_get_bpp(pngcolor), lodepng_get_bpp(rawcolor));
+ size_t numpixels, total;
+ size_t line; /* bytes per line in worst case */
+
+ if(lodepng_mulofl((size_t)w, (size_t)h, &numpixels)) return 1;
+ if(lodepng_mulofl(numpixels, 8, &total)) return 1; /* bit pointer with 8-bit color, or 8 bytes per channel color */
+
+ /* Bytes per scanline with the expression "(w / 8u) * bpp) + ((w & 7u) * bpp + 7u) / 8u" */
+ if(lodepng_mulofl((size_t)(w / 8u), bpp, &line)) return 1;
+ if(lodepng_addofl(line, ((w & 7u) * bpp + 7u) / 8u, &line)) return 1;
+
+ if(lodepng_addofl(line, 5, &line)) return 1; /* 5 bytes overhead per line: 1 filterbyte, 4 for Adam7 worst case */
+ if(lodepng_mulofl(line, h, &total)) return 1; /* Total bytes in worst case */
+
+ return 0; /* no overflow */
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+
+static void LodePNGUnknownChunks_init(LodePNGInfo* info) {
+ unsigned i;
+ for(i = 0; i != 3; ++i) info->unknown_chunks_data[i] = 0;
+ for(i = 0; i != 3; ++i) info->unknown_chunks_size[i] = 0;
+}
+
+static void LodePNGUnknownChunks_cleanup(LodePNGInfo* info) {
+ unsigned i;
+ for(i = 0; i != 3; ++i) lodepng_free(info->unknown_chunks_data[i]);
+}
+
+static unsigned LodePNGUnknownChunks_copy(LodePNGInfo* dest, const LodePNGInfo* src) {
+ unsigned i;
+
+ LodePNGUnknownChunks_cleanup(dest);
+
+ for(i = 0; i != 3; ++i) {
+ size_t j;
+ dest->unknown_chunks_size[i] = src->unknown_chunks_size[i];
+ dest->unknown_chunks_data[i] = (unsigned char*)lodepng_malloc(src->unknown_chunks_size[i]);
+ if(!dest->unknown_chunks_data[i] && dest->unknown_chunks_size[i]) return 83; /*alloc fail*/
+ for(j = 0; j < src->unknown_chunks_size[i]; ++j) {
+ dest->unknown_chunks_data[i][j] = src->unknown_chunks_data[i][j];
+ }
+ }
+
+ return 0;
+}
+
+/******************************************************************************/
+
+static void LodePNGText_init(LodePNGInfo* info) {
+ info->text_num = 0;
+ info->text_keys = NULL;
+ info->text_strings = NULL;
+}
+
+static void LodePNGText_cleanup(LodePNGInfo* info) {
+ size_t i;
+ for(i = 0; i != info->text_num; ++i) {
+ string_cleanup(&info->text_keys[i]);
+ string_cleanup(&info->text_strings[i]);
+ }
+ lodepng_free(info->text_keys);
+ lodepng_free(info->text_strings);
+}
+
+static unsigned LodePNGText_copy(LodePNGInfo* dest, const LodePNGInfo* source) {
+ size_t i = 0;
+ dest->text_keys = 0;
+ dest->text_strings = 0;
+ dest->text_num = 0;
+ for(i = 0; i != source->text_num; ++i) {
+ CERROR_TRY_RETURN(lodepng_add_text(dest, source->text_keys[i], source->text_strings[i]));
+ }
+ return 0;
+}
+
+static unsigned lodepng_add_text_sized(LodePNGInfo* info, const char* key, const char* str, size_t size) {
+ char** new_keys = (char**)(lodepng_realloc(info->text_keys, sizeof(char*) * (info->text_num + 1)));
+ char** new_strings = (char**)(lodepng_realloc(info->text_strings, sizeof(char*) * (info->text_num + 1)));
+
+ if(new_keys) info->text_keys = new_keys;
+ if(new_strings) info->text_strings = new_strings;
+
+ if(!new_keys || !new_strings) return 83; /*alloc fail*/
+
+ ++info->text_num;
+ info->text_keys[info->text_num - 1] = alloc_string(key);
+ info->text_strings[info->text_num - 1] = alloc_string_sized(str, size);
+ if(!info->text_keys[info->text_num - 1] || !info->text_strings[info->text_num - 1]) return 83; /*alloc fail*/
+
+ return 0;
+}
+
+unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str) {
+ return lodepng_add_text_sized(info, key, str, lodepng_strlen(str));
+}
+
+void lodepng_clear_text(LodePNGInfo* info) {
+ LodePNGText_cleanup(info);
+}
+
+/******************************************************************************/
+
+static void LodePNGIText_init(LodePNGInfo* info) {
+ info->itext_num = 0;
+ info->itext_keys = NULL;
+ info->itext_langtags = NULL;
+ info->itext_transkeys = NULL;
+ info->itext_strings = NULL;
+}
+
+static void LodePNGIText_cleanup(LodePNGInfo* info) {
+ size_t i;
+ for(i = 0; i != info->itext_num; ++i) {
+ string_cleanup(&info->itext_keys[i]);
+ string_cleanup(&info->itext_langtags[i]);
+ string_cleanup(&info->itext_transkeys[i]);
+ string_cleanup(&info->itext_strings[i]);
+ }
+ lodepng_free(info->itext_keys);
+ lodepng_free(info->itext_langtags);
+ lodepng_free(info->itext_transkeys);
+ lodepng_free(info->itext_strings);
+}
+
+static unsigned LodePNGIText_copy(LodePNGInfo* dest, const LodePNGInfo* source) {
+ size_t i = 0;
+ dest->itext_keys = 0;
+ dest->itext_langtags = 0;
+ dest->itext_transkeys = 0;
+ dest->itext_strings = 0;
+ dest->itext_num = 0;
+ for(i = 0; i != source->itext_num; ++i) {
+ CERROR_TRY_RETURN(lodepng_add_itext(dest, source->itext_keys[i], source->itext_langtags[i],
+ source->itext_transkeys[i], source->itext_strings[i]));
+ }
+ return 0;
+}
+
+void lodepng_clear_itext(LodePNGInfo* info) {
+ LodePNGIText_cleanup(info);
+}
+
+static unsigned lodepng_add_itext_sized(LodePNGInfo* info, const char* key, const char* langtag,
+ const char* transkey, const char* str, size_t size) {
+ char** new_keys = (char**)(lodepng_realloc(info->itext_keys, sizeof(char*) * (info->itext_num + 1)));
+ char** new_langtags = (char**)(lodepng_realloc(info->itext_langtags, sizeof(char*) * (info->itext_num + 1)));
+ char** new_transkeys = (char**)(lodepng_realloc(info->itext_transkeys, sizeof(char*) * (info->itext_num + 1)));
+ char** new_strings = (char**)(lodepng_realloc(info->itext_strings, sizeof(char*) * (info->itext_num + 1)));
+
+ if(new_keys) info->itext_keys = new_keys;
+ if(new_langtags) info->itext_langtags = new_langtags;
+ if(new_transkeys) info->itext_transkeys = new_transkeys;
+ if(new_strings) info->itext_strings = new_strings;
+
+ if(!new_keys || !new_langtags || !new_transkeys || !new_strings) return 83; /*alloc fail*/
+
+ ++info->itext_num;
+
+ info->itext_keys[info->itext_num - 1] = alloc_string(key);
+ info->itext_langtags[info->itext_num - 1] = alloc_string(langtag);
+ info->itext_transkeys[info->itext_num - 1] = alloc_string(transkey);
+ info->itext_strings[info->itext_num - 1] = alloc_string_sized(str, size);
+
+ return 0;
+}
+
+unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
+ const char* transkey, const char* str) {
+ return lodepng_add_itext_sized(info, key, langtag, transkey, str, lodepng_strlen(str));
+}
+
+/* same as set but does not delete */
+static unsigned lodepng_assign_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size) {
+ if(profile_size == 0) return 100; /*invalid ICC profile size*/
+
+ info->iccp_name = alloc_string(name);
+ info->iccp_profile = (unsigned char*)lodepng_malloc(profile_size);
+
+ if(!info->iccp_name || !info->iccp_profile) return 83; /*alloc fail*/
+
+ lodepng_memcpy(info->iccp_profile, profile, profile_size);
+ info->iccp_profile_size = profile_size;
+
+ return 0; /*ok*/
+}
+
+unsigned lodepng_set_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size) {
+ if(info->iccp_name) lodepng_clear_icc(info);
+ info->iccp_defined = 1;
+
+ return lodepng_assign_icc(info, name, profile, profile_size);
+}
+
+void lodepng_clear_icc(LodePNGInfo* info) {
+ string_cleanup(&info->iccp_name);
+ lodepng_free(info->iccp_profile);
+ info->iccp_profile = NULL;
+ info->iccp_profile_size = 0;
+ info->iccp_defined = 0;
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+void lodepng_info_init(LodePNGInfo* info) {
+ lodepng_color_mode_init(&info->color);
+ info->interlace_method = 0;
+ info->compression_method = 0;
+ info->filter_method = 0;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ info->background_defined = 0;
+ info->background_r = info->background_g = info->background_b = 0;
+
+ LodePNGText_init(info);
+ LodePNGIText_init(info);
+
+ info->time_defined = 0;
+ info->phys_defined = 0;
+
+ info->gama_defined = 0;
+ info->chrm_defined = 0;
+ info->srgb_defined = 0;
+ info->iccp_defined = 0;
+ info->iccp_name = NULL;
+ info->iccp_profile = NULL;
+
+ LodePNGUnknownChunks_init(info);
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+}
+
+void lodepng_info_cleanup(LodePNGInfo* info) {
+ lodepng_color_mode_cleanup(&info->color);
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ LodePNGText_cleanup(info);
+ LodePNGIText_cleanup(info);
+
+ lodepng_clear_icc(info);
+
+ LodePNGUnknownChunks_cleanup(info);
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+}
+
+unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source) {
+ lodepng_info_cleanup(dest);
+ lodepng_memcpy(dest, source, sizeof(LodePNGInfo));
+ lodepng_color_mode_init(&dest->color);
+ CERROR_TRY_RETURN(lodepng_color_mode_copy(&dest->color, &source->color));
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ CERROR_TRY_RETURN(LodePNGText_copy(dest, source));
+ CERROR_TRY_RETURN(LodePNGIText_copy(dest, source));
+ if(source->iccp_defined) {
+ CERROR_TRY_RETURN(lodepng_assign_icc(dest, source->iccp_name, source->iccp_profile, source->iccp_profile_size));
+ }
+
+ LodePNGUnknownChunks_init(dest);
+ CERROR_TRY_RETURN(LodePNGUnknownChunks_copy(dest, source));
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+ return 0;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*index: bitgroup index, bits: bitgroup size(1, 2 or 4), in: bitgroup value, out: octet array to add bits to*/
+static void addColorBits(unsigned char* out, size_t index, unsigned bits, unsigned in) {
+ unsigned m = bits == 1 ? 7 : bits == 2 ? 3 : 1; /*8 / bits - 1*/
+ /*p = the partial index in the byte, e.g. with 4 palettebits it is 0 for first half or 1 for second half*/
+ unsigned p = index & m;
+ in &= (1u << bits) - 1u; /*filter out any other bits of the input value*/
+ in = in << (bits * (m - p));
+ if(p == 0) out[index * bits / 8u] = in;
+ else out[index * bits / 8u] |= in;
+}
+
+typedef struct ColorTree ColorTree;
+
+/*
+One node of a color tree
+This is the data structure used to count the number of unique colors and to get a palette
+index for a color. It's like an octree, but because the alpha channel is used too, each
+node has 16 instead of 8 children.
+*/
+struct ColorTree {
+ ColorTree* children[16]; /*up to 16 pointers to ColorTree of next level*/
+ int index; /*the payload. Only has a meaningful value if this is in the last level*/
+};
+
+static void color_tree_init(ColorTree* tree) {
+ lodepng_memset(tree->children, 0, 16 * sizeof(*tree->children));
+ tree->index = -1;
+}
+
+static void color_tree_cleanup(ColorTree* tree) {
+ int i;
+ for(i = 0; i != 16; ++i) {
+ if(tree->children[i]) {
+ color_tree_cleanup(tree->children[i]);
+ lodepng_free(tree->children[i]);
+ }
+ }
+}
+
+/*returns -1 if color not present, its index otherwise*/
+static int color_tree_get(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+ int bit = 0;
+ for(bit = 0; bit < 8; ++bit) {
+ int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
+ if(!tree->children[i]) return -1;
+ else tree = tree->children[i];
+ }
+ return tree ? tree->index : -1;
+}
+
+#ifdef LODEPNG_COMPILE_ENCODER
+static int color_tree_has(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+ return color_tree_get(tree, r, g, b, a) >= 0;
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+/*color is not allowed to already exist.
+Index should be >= 0 (it's signed to be compatible with using -1 for "doesn't exist")
+Returns error code, or 0 if ok*/
+static unsigned color_tree_add(ColorTree* tree,
+ unsigned char r, unsigned char g, unsigned char b, unsigned char a, unsigned index) {
+ int bit;
+ for(bit = 0; bit < 8; ++bit) {
+ int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
+ if(!tree->children[i]) {
+ tree->children[i] = (ColorTree*)lodepng_malloc(sizeof(ColorTree));
+ if(!tree->children[i]) return 83; /*alloc fail*/
+ color_tree_init(tree->children[i]);
+ }
+ tree = tree->children[i];
+ }
+ tree->index = (int)index;
+ return 0;
+}
+
+/*put a pixel, given its RGBA color, into image of any color type*/
+static unsigned rgba8ToPixel(unsigned char* out, size_t i,
+ const LodePNGColorMode* mode, ColorTree* tree /*for palette*/,
+ unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+ if(mode->colortype == LCT_GREY) {
+ unsigned char gray = r; /*((unsigned short)r + g + b) / 3u;*/
+ if(mode->bitdepth == 8) out[i] = gray;
+ else if(mode->bitdepth == 16) out[i * 2 + 0] = out[i * 2 + 1] = gray;
+ else {
+ /*take the most significant bits of gray*/
+ gray = ((unsigned)gray >> (8u - mode->bitdepth)) & ((1u << mode->bitdepth) - 1u);
+ addColorBits(out, i, mode->bitdepth, gray);
+ }
+ } else if(mode->colortype == LCT_RGB) {
+ if(mode->bitdepth == 8) {
+ out[i * 3 + 0] = r;
+ out[i * 3 + 1] = g;
+ out[i * 3 + 2] = b;
+ } else {
+ out[i * 6 + 0] = out[i * 6 + 1] = r;
+ out[i * 6 + 2] = out[i * 6 + 3] = g;
+ out[i * 6 + 4] = out[i * 6 + 5] = b;
+ }
+ } else if(mode->colortype == LCT_PALETTE) {
+ int index = color_tree_get(tree, r, g, b, a);
+ if(index < 0) return 82; /*color not in palette*/
+ if(mode->bitdepth == 8) out[i] = index;
+ else addColorBits(out, i, mode->bitdepth, (unsigned)index);
+ } else if(mode->colortype == LCT_GREY_ALPHA) {
+ unsigned char gray = r; /*((unsigned short)r + g + b) / 3u;*/
+ if(mode->bitdepth == 8) {
+ out[i * 2 + 0] = gray;
+ out[i * 2 + 1] = a;
+ } else if(mode->bitdepth == 16) {
+ out[i * 4 + 0] = out[i * 4 + 1] = gray;
+ out[i * 4 + 2] = out[i * 4 + 3] = a;
+ }
+ } else if(mode->colortype == LCT_RGBA) {
+ if(mode->bitdepth == 8) {
+ out[i * 4 + 0] = r;
+ out[i * 4 + 1] = g;
+ out[i * 4 + 2] = b;
+ out[i * 4 + 3] = a;
+ } else {
+ out[i * 8 + 0] = out[i * 8 + 1] = r;
+ out[i * 8 + 2] = out[i * 8 + 3] = g;
+ out[i * 8 + 4] = out[i * 8 + 5] = b;
+ out[i * 8 + 6] = out[i * 8 + 7] = a;
+ }
+ }
+
+ return 0; /*no error*/
+}
+
+/*put a pixel, given its RGBA16 color, into image of any color 16-bitdepth type*/
+static void rgba16ToPixel(unsigned char* out, size_t i,
+ const LodePNGColorMode* mode,
+ unsigned short r, unsigned short g, unsigned short b, unsigned short a) {
+ if(mode->colortype == LCT_GREY) {
+ unsigned short gray = r; /*((unsigned)r + g + b) / 3u;*/
+ out[i * 2 + 0] = (gray >> 8) & 255;
+ out[i * 2 + 1] = gray & 255;
+ } else if(mode->colortype == LCT_RGB) {
+ out[i * 6 + 0] = (r >> 8) & 255;
+ out[i * 6 + 1] = r & 255;
+ out[i * 6 + 2] = (g >> 8) & 255;
+ out[i * 6 + 3] = g & 255;
+ out[i * 6 + 4] = (b >> 8) & 255;
+ out[i * 6 + 5] = b & 255;
+ } else if(mode->colortype == LCT_GREY_ALPHA) {
+ unsigned short gray = r; /*((unsigned)r + g + b) / 3u;*/
+ out[i * 4 + 0] = (gray >> 8) & 255;
+ out[i * 4 + 1] = gray & 255;
+ out[i * 4 + 2] = (a >> 8) & 255;
+ out[i * 4 + 3] = a & 255;
+ } else if(mode->colortype == LCT_RGBA) {
+ out[i * 8 + 0] = (r >> 8) & 255;
+ out[i * 8 + 1] = r & 255;
+ out[i * 8 + 2] = (g >> 8) & 255;
+ out[i * 8 + 3] = g & 255;
+ out[i * 8 + 4] = (b >> 8) & 255;
+ out[i * 8 + 5] = b & 255;
+ out[i * 8 + 6] = (a >> 8) & 255;
+ out[i * 8 + 7] = a & 255;
+ }
+}
+
+/*Get RGBA8 color of pixel with index i (y * width + x) from the raw image with given color type.*/
+static void getPixelColorRGBA8(unsigned char* r, unsigned char* g,
+ unsigned char* b, unsigned char* a,
+ const unsigned char* in, size_t i,
+ const LodePNGColorMode* mode) {
+ if(mode->colortype == LCT_GREY) {
+ if(mode->bitdepth == 8) {
+ *r = *g = *b = in[i];
+ if(mode->key_defined && *r == mode->key_r) *a = 0;
+ else *a = 255;
+ } else if(mode->bitdepth == 16) {
+ *r = *g = *b = in[i * 2 + 0];
+ if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0;
+ else *a = 255;
+ } else {
+ unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
+ size_t j = i * mode->bitdepth;
+ unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
+ *r = *g = *b = (value * 255) / highest;
+ if(mode->key_defined && value == mode->key_r) *a = 0;
+ else *a = 255;
+ }
+ } else if(mode->colortype == LCT_RGB) {
+ if(mode->bitdepth == 8) {
+ *r = in[i * 3 + 0]; *g = in[i * 3 + 1]; *b = in[i * 3 + 2];
+ if(mode->key_defined && *r == mode->key_r && *g == mode->key_g && *b == mode->key_b) *a = 0;
+ else *a = 255;
+ } else {
+ *r = in[i * 6 + 0];
+ *g = in[i * 6 + 2];
+ *b = in[i * 6 + 4];
+ if(mode->key_defined && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
+ && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
+ && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0;
+ else *a = 255;
+ }
+ } else if(mode->colortype == LCT_PALETTE) {
+ unsigned index;
+ if(mode->bitdepth == 8) index = in[i];
+ else {
+ size_t j = i * mode->bitdepth;
+ index = readBitsFromReversedStream(&j, in, mode->bitdepth);
+ }
+ /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/
+ *r = mode->palette[index * 4 + 0];
+ *g = mode->palette[index * 4 + 1];
+ *b = mode->palette[index * 4 + 2];
+ *a = mode->palette[index * 4 + 3];
+ } else if(mode->colortype == LCT_GREY_ALPHA) {
+ if(mode->bitdepth == 8) {
+ *r = *g = *b = in[i * 2 + 0];
+ *a = in[i * 2 + 1];
+ } else {
+ *r = *g = *b = in[i * 4 + 0];
+ *a = in[i * 4 + 2];
+ }
+ } else if(mode->colortype == LCT_RGBA) {
+ if(mode->bitdepth == 8) {
+ *r = in[i * 4 + 0];
+ *g = in[i * 4 + 1];
+ *b = in[i * 4 + 2];
+ *a = in[i * 4 + 3];
+ } else {
+ *r = in[i * 8 + 0];
+ *g = in[i * 8 + 2];
+ *b = in[i * 8 + 4];
+ *a = in[i * 8 + 6];
+ }
+ }
+}
+
+/*Similar to getPixelColorRGBA8, but with all the for loops inside of the color
+mode test cases, optimized to convert the colors much faster, when converting
+to the common case of RGBA with 8 bit per channel. buffer must be RGBA with
+enough memory.*/
+static void getPixelColorsRGBA8(unsigned char* LODEPNG_RESTRICT buffer, size_t numpixels,
+ const unsigned char* LODEPNG_RESTRICT in,
+ const LodePNGColorMode* mode) {
+ unsigned num_channels = 4;
+ size_t i;
+ if(mode->colortype == LCT_GREY) {
+ if(mode->bitdepth == 8) {
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ buffer[0] = buffer[1] = buffer[2] = in[i];
+ buffer[3] = 255;
+ }
+ if(mode->key_defined) {
+ buffer -= numpixels * num_channels;
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ if(buffer[0] == mode->key_r) buffer[3] = 0;
+ }
+ }
+ } else if(mode->bitdepth == 16) {
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ buffer[0] = buffer[1] = buffer[2] = in[i * 2];
+ buffer[3] = mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r ? 0 : 255;
+ }
+ } else {
+ unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
+ size_t j = 0;
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
+ buffer[0] = buffer[1] = buffer[2] = (value * 255) / highest;
+ buffer[3] = mode->key_defined && value == mode->key_r ? 0 : 255;
+ }
+ }
+ } else if(mode->colortype == LCT_RGB) {
+ if(mode->bitdepth == 8) {
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ lodepng_memcpy(buffer, &in[i * 3], 3);
+ buffer[3] = 255;
+ }
+ if(mode->key_defined) {
+ buffer -= numpixels * num_channels;
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ if(buffer[0] == mode->key_r && buffer[1]== mode->key_g && buffer[2] == mode->key_b) buffer[3] = 0;
+ }
+ }
+ } else {
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ buffer[0] = in[i * 6 + 0];
+ buffer[1] = in[i * 6 + 2];
+ buffer[2] = in[i * 6 + 4];
+ buffer[3] = mode->key_defined
+ && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
+ && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
+ && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b ? 0 : 255;
+ }
+ }
+ } else if(mode->colortype == LCT_PALETTE) {
+ if(mode->bitdepth == 8) {
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ unsigned index = in[i];
+ /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/
+ lodepng_memcpy(buffer, &mode->palette[index * 4], 4);
+ }
+ } else {
+ size_t j = 0;
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ unsigned index = readBitsFromReversedStream(&j, in, mode->bitdepth);
+ /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/
+ lodepng_memcpy(buffer, &mode->palette[index * 4], 4);
+ }
+ }
+ } else if(mode->colortype == LCT_GREY_ALPHA) {
+ if(mode->bitdepth == 8) {
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ buffer[0] = buffer[1] = buffer[2] = in[i * 2 + 0];
+ buffer[3] = in[i * 2 + 1];
+ }
+ } else {
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ buffer[0] = buffer[1] = buffer[2] = in[i * 4 + 0];
+ buffer[3] = in[i * 4 + 2];
+ }
+ }
+ } else if(mode->colortype == LCT_RGBA) {
+ if(mode->bitdepth == 8) {
+ lodepng_memcpy(buffer, in, numpixels * 4);
+ } else {
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ buffer[0] = in[i * 8 + 0];
+ buffer[1] = in[i * 8 + 2];
+ buffer[2] = in[i * 8 + 4];
+ buffer[3] = in[i * 8 + 6];
+ }
+ }
+ }
+}
+
+/*Similar to getPixelColorsRGBA8, but with 3-channel RGB output.*/
+static void getPixelColorsRGB8(unsigned char* LODEPNG_RESTRICT buffer, size_t numpixels,
+ const unsigned char* LODEPNG_RESTRICT in,
+ const LodePNGColorMode* mode) {
+ const unsigned num_channels = 3;
+ size_t i;
+ if(mode->colortype == LCT_GREY) {
+ if(mode->bitdepth == 8) {
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ buffer[0] = buffer[1] = buffer[2] = in[i];
+ }
+ } else if(mode->bitdepth == 16) {
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ buffer[0] = buffer[1] = buffer[2] = in[i * 2];
+ }
+ } else {
+ unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
+ size_t j = 0;
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
+ buffer[0] = buffer[1] = buffer[2] = (value * 255) / highest;
+ }
+ }
+ } else if(mode->colortype == LCT_RGB) {
+ if(mode->bitdepth == 8) {
+ lodepng_memcpy(buffer, in, numpixels * 3);
+ } else {
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ buffer[0] = in[i * 6 + 0];
+ buffer[1] = in[i * 6 + 2];
+ buffer[2] = in[i * 6 + 4];
+ }
+ }
+ } else if(mode->colortype == LCT_PALETTE) {
+ if(mode->bitdepth == 8) {
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ unsigned index = in[i];
+ /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/
+ lodepng_memcpy(buffer, &mode->palette[index * 4], 3);
+ }
+ } else {
+ size_t j = 0;
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ unsigned index = readBitsFromReversedStream(&j, in, mode->bitdepth);
+ /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/
+ lodepng_memcpy(buffer, &mode->palette[index * 4], 3);
+ }
+ }
+ } else if(mode->colortype == LCT_GREY_ALPHA) {
+ if(mode->bitdepth == 8) {
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ buffer[0] = buffer[1] = buffer[2] = in[i * 2 + 0];
+ }
+ } else {
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ buffer[0] = buffer[1] = buffer[2] = in[i * 4 + 0];
+ }
+ }
+ } else if(mode->colortype == LCT_RGBA) {
+ if(mode->bitdepth == 8) {
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ lodepng_memcpy(buffer, &in[i * 4], 3);
+ }
+ } else {
+ for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+ buffer[0] = in[i * 8 + 0];
+ buffer[1] = in[i * 8 + 2];
+ buffer[2] = in[i * 8 + 4];
+ }
+ }
+ }
+}
+
+/*Get RGBA16 color of pixel with index i (y * width + x) from the raw image with
+given color type, but the given color type must be 16-bit itself.*/
+static void getPixelColorRGBA16(unsigned short* r, unsigned short* g, unsigned short* b, unsigned short* a,
+ const unsigned char* in, size_t i, const LodePNGColorMode* mode) {
+ if(mode->colortype == LCT_GREY) {
+ *r = *g = *b = 256 * in[i * 2 + 0] + in[i * 2 + 1];
+ if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0;
+ else *a = 65535;
+ } else if(mode->colortype == LCT_RGB) {
+ *r = 256u * in[i * 6 + 0] + in[i * 6 + 1];
+ *g = 256u * in[i * 6 + 2] + in[i * 6 + 3];
+ *b = 256u * in[i * 6 + 4] + in[i * 6 + 5];
+ if(mode->key_defined
+ && 256u * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
+ && 256u * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
+ && 256u * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0;
+ else *a = 65535;
+ } else if(mode->colortype == LCT_GREY_ALPHA) {
+ *r = *g = *b = 256u * in[i * 4 + 0] + in[i * 4 + 1];
+ *a = 256u * in[i * 4 + 2] + in[i * 4 + 3];
+ } else if(mode->colortype == LCT_RGBA) {
+ *r = 256u * in[i * 8 + 0] + in[i * 8 + 1];
+ *g = 256u * in[i * 8 + 2] + in[i * 8 + 3];
+ *b = 256u * in[i * 8 + 4] + in[i * 8 + 5];
+ *a = 256u * in[i * 8 + 6] + in[i * 8 + 7];
+ }
+}
+
+unsigned lodepng_convert(unsigned char* out, const unsigned char* in,
+ const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in,
+ unsigned w, unsigned h) {
+ size_t i;
+ ColorTree tree;
+ size_t numpixels = (size_t)w * (size_t)h;
+ unsigned error = 0;
+
+ if(mode_in->colortype == LCT_PALETTE && !mode_in->palette) {
+ return 107; /* error: must provide palette if input mode is palette */
+ }
+
+ if(lodepng_color_mode_equal(mode_out, mode_in)) {
+ size_t numbytes = lodepng_get_raw_size(w, h, mode_in);
+ lodepng_memcpy(out, in, numbytes);
+ return 0;
+ }
+
+ if(mode_out->colortype == LCT_PALETTE) {
+ size_t palettesize = mode_out->palettesize;
+ const unsigned char* palette = mode_out->palette;
+ size_t palsize = (size_t)1u << mode_out->bitdepth;
+ /*if the user specified output palette but did not give the values, assume
+ they want the values of the input color type (assuming that one is palette).
+ Note that we never create a new palette ourselves.*/
+ if(palettesize == 0) {
+ palettesize = mode_in->palettesize;
+ palette = mode_in->palette;
+ /*if the input was also palette with same bitdepth, then the color types are also
+ equal, so copy literally. This to preserve the exact indices that were in the PNG
+ even in case there are duplicate colors in the palette.*/
+ if(mode_in->colortype == LCT_PALETTE && mode_in->bitdepth == mode_out->bitdepth) {
+ size_t numbytes = lodepng_get_raw_size(w, h, mode_in);
+ lodepng_memcpy(out, in, numbytes);
+ return 0;
+ }
+ }
+ if(palettesize < palsize) palsize = palettesize;
+ color_tree_init(&tree);
+ for(i = 0; i != palsize; ++i) {
+ const unsigned char* p = &palette[i * 4];
+ error = color_tree_add(&tree, p[0], p[1], p[2], p[3], (unsigned)i);
+ if(error) break;
+ }
+ }
+
+ if(!error) {
+ if(mode_in->bitdepth == 16 && mode_out->bitdepth == 16) {
+ for(i = 0; i != numpixels; ++i) {
+ unsigned short r = 0, g = 0, b = 0, a = 0;
+ getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
+ rgba16ToPixel(out, i, mode_out, r, g, b, a);
+ }
+ } else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGBA) {
+ getPixelColorsRGBA8(out, numpixels, in, mode_in);
+ } else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGB) {
+ getPixelColorsRGB8(out, numpixels, in, mode_in);
+ } else {
+ unsigned char r = 0, g = 0, b = 0, a = 0;
+ for(i = 0; i != numpixels; ++i) {
+ getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
+ error = rgba8ToPixel(out, i, mode_out, &tree, r, g, b, a);
+ if(error) break;
+ }
+ }
+ }
+
+ if(mode_out->colortype == LCT_PALETTE) {
+ color_tree_cleanup(&tree);
+ }
+
+ return error;
+}
+
+
+/* Converts a single rgb color without alpha from one type to another, color bits truncated to
+their bitdepth. In case of single channel (gray or palette), only the r channel is used. Slow
+function, do not use to process all pixels of an image. Alpha channel not supported on purpose:
+this is for bKGD, supporting alpha may prevent it from finding a color in the palette, from the
+specification it looks like bKGD should ignore the alpha values of the palette since it can use
+any palette index but doesn't have an alpha channel. Idem with ignoring color key. */
+unsigned lodepng_convert_rgb(
+ unsigned* r_out, unsigned* g_out, unsigned* b_out,
+ unsigned r_in, unsigned g_in, unsigned b_in,
+ const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in) {
+ unsigned r = 0, g = 0, b = 0;
+ unsigned mul = 65535 / ((1u << mode_in->bitdepth) - 1u); /*65535, 21845, 4369, 257, 1*/
+ unsigned shift = 16 - mode_out->bitdepth;
+
+ if(mode_in->colortype == LCT_GREY || mode_in->colortype == LCT_GREY_ALPHA) {
+ r = g = b = r_in * mul;
+ } else if(mode_in->colortype == LCT_RGB || mode_in->colortype == LCT_RGBA) {
+ r = r_in * mul;
+ g = g_in * mul;
+ b = b_in * mul;
+ } else if(mode_in->colortype == LCT_PALETTE) {
+ if(r_in >= mode_in->palettesize) return 82;
+ r = mode_in->palette[r_in * 4 + 0] * 257u;
+ g = mode_in->palette[r_in * 4 + 1] * 257u;
+ b = mode_in->palette[r_in * 4 + 2] * 257u;
+ } else {
+ return 31;
+ }
+
+ /* now convert to output format */
+ if(mode_out->colortype == LCT_GREY || mode_out->colortype == LCT_GREY_ALPHA) {
+ *r_out = r >> shift ;
+ } else if(mode_out->colortype == LCT_RGB || mode_out->colortype == LCT_RGBA) {
+ *r_out = r >> shift ;
+ *g_out = g >> shift ;
+ *b_out = b >> shift ;
+ } else if(mode_out->colortype == LCT_PALETTE) {
+ unsigned i;
+ /* a 16-bit color cannot be in the palette */
+ if((r >> 8) != (r & 255) || (g >> 8) != (g & 255) || (b >> 8) != (b & 255)) return 82;
+ for(i = 0; i < mode_out->palettesize; i++) {
+ unsigned j = i * 4;
+ if((r >> 8) == mode_out->palette[j + 0] && (g >> 8) == mode_out->palette[j + 1] &&
+ (b >> 8) == mode_out->palette[j + 2]) {
+ *r_out = i;
+ return 0;
+ }
+ }
+ return 82;
+ } else {
+ return 31;
+ }
+
+ return 0;
+}
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+void lodepng_color_stats_init(LodePNGColorStats* stats) {
+ /*stats*/
+ stats->colored = 0;
+ stats->key = 0;
+ stats->key_r = stats->key_g = stats->key_b = 0;
+ stats->alpha = 0;
+ stats->numcolors = 0;
+ stats->bits = 1;
+ stats->numpixels = 0;
+ /*settings*/
+ stats->allow_palette = 1;
+ stats->allow_greyscale = 1;
+}
+
+/*function used for debug purposes with C++*/
+/*void printColorStats(LodePNGColorStats* p) {
+ std::cout << "colored: " << (int)p->colored << ", ";
+ std::cout << "key: " << (int)p->key << ", ";
+ std::cout << "key_r: " << (int)p->key_r << ", ";
+ std::cout << "key_g: " << (int)p->key_g << ", ";
+ std::cout << "key_b: " << (int)p->key_b << ", ";
+ std::cout << "alpha: " << (int)p->alpha << ", ";
+ std::cout << "numcolors: " << (int)p->numcolors << ", ";
+ std::cout << "bits: " << (int)p->bits << std::endl;
+}*/
+
+/*Returns how many bits needed to represent given value (max 8 bit)*/
+static unsigned getValueRequiredBits(unsigned char value) {
+ if(value == 0 || value == 255) return 1;
+ /*The scaling of 2-bit and 4-bit values uses multiples of 85 and 17*/
+ if(value % 17 == 0) return value % 85 == 0 ? 2 : 4;
+ return 8;
+}
+
+/*stats must already have been inited. */
+unsigned lodepng_compute_color_stats(LodePNGColorStats* stats,
+ const unsigned char* in, unsigned w, unsigned h,
+ const LodePNGColorMode* mode_in) {
+ size_t i;
+ ColorTree tree;
+ size_t numpixels = (size_t)w * (size_t)h;
+ unsigned error = 0;
+
+ /* mark things as done already if it would be impossible to have a more expensive case */
+ unsigned colored_done = lodepng_is_greyscale_type(mode_in) ? 1 : 0;
+ unsigned alpha_done = lodepng_can_have_alpha(mode_in) ? 0 : 1;
+ unsigned numcolors_done = 0;
+ unsigned bpp = lodepng_get_bpp(mode_in);
+ unsigned bits_done = (stats->bits == 1 && bpp == 1) ? 1 : 0;
+ unsigned sixteen = 0; /* whether the input image is 16 bit */
+ unsigned maxnumcolors = 257;
+ if(bpp <= 8) maxnumcolors = LODEPNG_MIN(257, stats->numcolors + (1u << bpp));
+
+ stats->numpixels += numpixels;
+
+ /*if palette not allowed, no need to compute numcolors*/
+ if(!stats->allow_palette) numcolors_done = 1;
+
+ color_tree_init(&tree);
+
+ /*If the stats was already filled in from previous data, fill its palette in tree
+ and mark things as done already if we know they are the most expensive case already*/
+ if(stats->alpha) alpha_done = 1;
+ if(stats->colored) colored_done = 1;
+ if(stats->bits == 16) numcolors_done = 1;
+ if(stats->bits >= bpp) bits_done = 1;
+ if(stats->numcolors >= maxnumcolors) numcolors_done = 1;
+
+ if(!numcolors_done) {
+ for(i = 0; i < stats->numcolors; i++) {
+ const unsigned char* color = &stats->palette[i * 4];
+ error = color_tree_add(&tree, color[0], color[1], color[2], color[3], i);
+ if(error) goto cleanup;
+ }
+ }
+
+ /*Check if the 16-bit input is truly 16-bit*/
+ if(mode_in->bitdepth == 16 && !sixteen) {
+ unsigned short r = 0, g = 0, b = 0, a = 0;
+ for(i = 0; i != numpixels; ++i) {
+ getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
+ if((r & 255) != ((r >> 8) & 255) || (g & 255) != ((g >> 8) & 255) ||
+ (b & 255) != ((b >> 8) & 255) || (a & 255) != ((a >> 8) & 255)) /*first and second byte differ*/ {
+ stats->bits = 16;
+ sixteen = 1;
+ bits_done = 1;
+ numcolors_done = 1; /*counting colors no longer useful, palette doesn't support 16-bit*/
+ break;
+ }
+ }
+ }
+
+ if(sixteen) {
+ unsigned short r = 0, g = 0, b = 0, a = 0;
+
+ for(i = 0; i != numpixels; ++i) {
+ getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
+
+ if(!colored_done && (r != g || r != b)) {
+ stats->colored = 1;
+ colored_done = 1;
+ }
+
+ if(!alpha_done) {
+ unsigned matchkey = (r == stats->key_r && g == stats->key_g && b == stats->key_b);
+ if(a != 65535 && (a != 0 || (stats->key && !matchkey))) {
+ stats->alpha = 1;
+ stats->key = 0;
+ alpha_done = 1;
+ } else if(a == 0 && !stats->alpha && !stats->key) {
+ stats->key = 1;
+ stats->key_r = r;
+ stats->key_g = g;
+ stats->key_b = b;
+ } else if(a == 65535 && stats->key && matchkey) {
+ /* Color key cannot be used if an opaque pixel also has that RGB color. */
+ stats->alpha = 1;
+ stats->key = 0;
+ alpha_done = 1;
+ }
+ }
+ if(alpha_done && numcolors_done && colored_done && bits_done) break;
+ }
+
+ if(stats->key && !stats->alpha) {
+ for(i = 0; i != numpixels; ++i) {
+ getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
+ if(a != 0 && r == stats->key_r && g == stats->key_g && b == stats->key_b) {
+ /* Color key cannot be used if an opaque pixel also has that RGB color. */
+ stats->alpha = 1;
+ stats->key = 0;
+ alpha_done = 1;
+ }
+ }
+ }
+ } else /* < 16-bit */ {
+ unsigned char r = 0, g = 0, b = 0, a = 0;
+ for(i = 0; i != numpixels; ++i) {
+ getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
+
+ if(!bits_done && stats->bits < 8) {
+ /*only r is checked, < 8 bits is only relevant for grayscale*/
+ unsigned bits = getValueRequiredBits(r);
+ if(bits > stats->bits) stats->bits = bits;
+ }
+ bits_done = (stats->bits >= bpp);
+
+ if(!colored_done && (r != g || r != b)) {
+ stats->colored = 1;
+ colored_done = 1;
+ if(stats->bits < 8) stats->bits = 8; /*PNG has no colored modes with less than 8-bit per channel*/
+ }
+
+ if(!alpha_done) {
+ unsigned matchkey = (r == stats->key_r && g == stats->key_g && b == stats->key_b);
+ if(a != 255 && (a != 0 || (stats->key && !matchkey))) {
+ stats->alpha = 1;
+ stats->key = 0;
+ alpha_done = 1;
+ if(stats->bits < 8) stats->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+ } else if(a == 0 && !stats->alpha && !stats->key) {
+ stats->key = 1;
+ stats->key_r = r;
+ stats->key_g = g;
+ stats->key_b = b;
+ } else if(a == 255 && stats->key && matchkey) {
+ /* Color key cannot be used if an opaque pixel also has that RGB color. */
+ stats->alpha = 1;
+ stats->key = 0;
+ alpha_done = 1;
+ if(stats->bits < 8) stats->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+ }
+ }
+
+ if(!numcolors_done) {
+ if(!color_tree_has(&tree, r, g, b, a)) {
+ error = color_tree_add(&tree, r, g, b, a, stats->numcolors);
+ if(error) goto cleanup;
+ if(stats->numcolors < 256) {
+ unsigned char* p = stats->palette;
+ unsigned n = stats->numcolors;
+ p[n * 4 + 0] = r;
+ p[n * 4 + 1] = g;
+ p[n * 4 + 2] = b;
+ p[n * 4 + 3] = a;
+ }
+ ++stats->numcolors;
+ numcolors_done = stats->numcolors >= maxnumcolors;
+ }
+ }
+
+ if(alpha_done && numcolors_done && colored_done && bits_done) break;
+ }
+
+ if(stats->key && !stats->alpha) {
+ for(i = 0; i != numpixels; ++i) {
+ getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
+ if(a != 0 && r == stats->key_r && g == stats->key_g && b == stats->key_b) {
+ /* Color key cannot be used if an opaque pixel also has that RGB color. */
+ stats->alpha = 1;
+ stats->key = 0;
+ alpha_done = 1;
+ if(stats->bits < 8) stats->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+ }
+ }
+ }
+
+ /*make the stats's key always 16-bit for consistency - repeat each byte twice*/
+ stats->key_r += (stats->key_r << 8);
+ stats->key_g += (stats->key_g << 8);
+ stats->key_b += (stats->key_b << 8);
+ }
+
+cleanup:
+ color_tree_cleanup(&tree);
+ return error;
+}
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+/*Adds a single color to the color stats. The stats must already have been inited. The color must be given as 16-bit
+(with 2 bytes repeating for 8-bit and 65535 for opaque alpha channel). This function is expensive, do not call it for
+all pixels of an image but only for a few additional values. */
+static unsigned lodepng_color_stats_add(LodePNGColorStats* stats,
+ unsigned r, unsigned g, unsigned b, unsigned a) {
+ unsigned error = 0;
+ unsigned char image[8];
+ LodePNGColorMode mode;
+ lodepng_color_mode_init(&mode);
+ image[0] = r >> 8; image[1] = r; image[2] = g >> 8; image[3] = g;
+ image[4] = b >> 8; image[5] = b; image[6] = a >> 8; image[7] = a;
+ mode.bitdepth = 16;
+ mode.colortype = LCT_RGBA;
+ error = lodepng_compute_color_stats(stats, image, 1, 1, &mode);
+ lodepng_color_mode_cleanup(&mode);
+ return error;
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+/*Computes a minimal PNG color model that can contain all colors as indicated by the stats.
+The stats should be computed with lodepng_compute_color_stats.
+mode_in is raw color profile of the image the stats were computed on, to copy palette order from when relevant.
+Minimal PNG color model means the color type and bit depth that gives smallest amount of bits in the output image,
+e.g. gray if only grayscale pixels, palette if less than 256 colors, color key if only single transparent color, ...
+This is used if auto_convert is enabled (it is by default).
+*/
+static unsigned auto_choose_color(LodePNGColorMode* mode_out,
+ const LodePNGColorMode* mode_in,
+ const LodePNGColorStats* stats) {
+ unsigned error = 0;
+ unsigned palettebits;
+ size_t i, n;
+ size_t numpixels = stats->numpixels;
+ unsigned palette_ok, gray_ok;
+
+ unsigned alpha = stats->alpha;
+ unsigned key = stats->key;
+ unsigned bits = stats->bits;
+
+ mode_out->key_defined = 0;
+
+ if(key && numpixels <= 16) {
+ alpha = 1; /*too few pixels to justify tRNS chunk overhead*/
+ key = 0;
+ if(bits < 8) bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+ }
+
+ gray_ok = !stats->colored;
+ if(!stats->allow_greyscale) gray_ok = 0;
+ if(!gray_ok && bits < 8) bits = 8;
+
+ n = stats->numcolors;
+ palettebits = n <= 2 ? 1 : (n <= 4 ? 2 : (n <= 16 ? 4 : 8));
+ palette_ok = n <= 256 && bits <= 8 && n != 0; /*n==0 means likely numcolors wasn't computed*/
+ if(numpixels < n * 2) palette_ok = 0; /*don't add palette overhead if image has only a few pixels*/
+ if(gray_ok && !alpha && bits <= palettebits) palette_ok = 0; /*gray is less overhead*/
+ if(!stats->allow_palette) palette_ok = 0;
+
+ if(palette_ok) {
+ const unsigned char* p = stats->palette;
+ lodepng_palette_clear(mode_out); /*remove potential earlier palette*/
+ for(i = 0; i != stats->numcolors; ++i) {
+ error = lodepng_palette_add(mode_out, p[i * 4 + 0], p[i * 4 + 1], p[i * 4 + 2], p[i * 4 + 3]);
+ if(error) break;
+ }
+
+ mode_out->colortype = LCT_PALETTE;
+ mode_out->bitdepth = palettebits;
+
+ if(mode_in->colortype == LCT_PALETTE && mode_in->palettesize >= mode_out->palettesize
+ && mode_in->bitdepth == mode_out->bitdepth) {
+ /*If input should have same palette colors, keep original to preserve its order and prevent conversion*/
+ lodepng_color_mode_cleanup(mode_out);
+ lodepng_color_mode_copy(mode_out, mode_in);
+ }
+ } else /*8-bit or 16-bit per channel*/ {
+ mode_out->bitdepth = bits;
+ mode_out->colortype = alpha ? (gray_ok ? LCT_GREY_ALPHA : LCT_RGBA)
+ : (gray_ok ? LCT_GREY : LCT_RGB);
+ if(key) {
+ unsigned mask = (1u << mode_out->bitdepth) - 1u; /*stats always uses 16-bit, mask converts it*/
+ mode_out->key_r = stats->key_r & mask;
+ mode_out->key_g = stats->key_g & mask;
+ mode_out->key_b = stats->key_b & mask;
+ mode_out->key_defined = 1;
+ }
+ }
+
+ return error;
+}
+
+#endif /* #ifdef LODEPNG_COMPILE_ENCODER */
+
+/*
+Paeth predictor, used by PNG filter type 4
+The parameters are of type short, but should come from unsigned chars, the shorts
+are only needed to make the paeth calculation correct.
+*/
+static unsigned char paethPredictor(short a, short b, short c) {
+ short pa = LODEPNG_ABS(b - c);
+ short pb = LODEPNG_ABS(a - c);
+ short pc = LODEPNG_ABS(a + b - c - c);
+ /* return input value associated with smallest of pa, pb, pc (with certain priority if equal) */
+ if(pb < pa) { a = b; pa = pb; }
+ return (pc < pa) ? c : a;
+}
+
+/*shared values used by multiple Adam7 related functions*/
+
+static const unsigned ADAM7_IX[7] = { 0, 4, 0, 2, 0, 1, 0 }; /*x start values*/
+static const unsigned ADAM7_IY[7] = { 0, 0, 4, 0, 2, 0, 1 }; /*y start values*/
+static const unsigned ADAM7_DX[7] = { 8, 8, 4, 4, 2, 2, 1 }; /*x delta values*/
+static const unsigned ADAM7_DY[7] = { 8, 8, 8, 4, 4, 2, 2 }; /*y delta values*/
+
+/*
+Outputs various dimensions and positions in the image related to the Adam7 reduced images.
+passw: output containing the width of the 7 passes
+passh: output containing the height of the 7 passes
+filter_passstart: output containing the index of the start and end of each
+ reduced image with filter bytes
+padded_passstart output containing the index of the start and end of each
+ reduced image when without filter bytes but with padded scanlines
+passstart: output containing the index of the start and end of each reduced
+ image without padding between scanlines, but still padding between the images
+w, h: width and height of non-interlaced image
+bpp: bits per pixel
+"padded" is only relevant if bpp is less than 8 and a scanline or image does not
+ end at a full byte
+*/
+static void Adam7_getpassvalues(unsigned passw[7], unsigned passh[7], size_t filter_passstart[8],
+ size_t padded_passstart[8], size_t passstart[8], unsigned w, unsigned h, unsigned bpp) {
+ /*the passstart values have 8 values: the 8th one indicates the byte after the end of the 7th (= last) pass*/
+ unsigned i;
+
+ /*calculate width and height in pixels of each pass*/
+ for(i = 0; i != 7; ++i) {
+ passw[i] = (w + ADAM7_DX[i] - ADAM7_IX[i] - 1) / ADAM7_DX[i];
+ passh[i] = (h + ADAM7_DY[i] - ADAM7_IY[i] - 1) / ADAM7_DY[i];
+ if(passw[i] == 0) passh[i] = 0;
+ if(passh[i] == 0) passw[i] = 0;
+ }
+
+ filter_passstart[0] = padded_passstart[0] = passstart[0] = 0;
+ for(i = 0; i != 7; ++i) {
+ /*if passw[i] is 0, it's 0 bytes, not 1 (no filtertype-byte)*/
+ filter_passstart[i + 1] = filter_passstart[i]
+ + ((passw[i] && passh[i]) ? passh[i] * (1u + (passw[i] * bpp + 7u) / 8u) : 0);
+ /*bits padded if needed to fill full byte at end of each scanline*/
+ padded_passstart[i + 1] = padded_passstart[i] + passh[i] * ((passw[i] * bpp + 7u) / 8u);
+ /*only padded at end of reduced image*/
+ passstart[i + 1] = passstart[i] + (passh[i] * passw[i] * bpp + 7u) / 8u;
+ }
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / PNG Decoder / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*read the information from the header and store it in the LodePNGInfo. return value is error*/
+unsigned lodepng_inspect(unsigned* w, unsigned* h, LodePNGState* state,
+ const unsigned char* in, size_t insize) {
+ unsigned width, height;
+ LodePNGInfo* info = &state->info_png;
+ if(insize == 0 || in == 0) {
+ CERROR_RETURN_ERROR(state->error, 48); /*error: the given data is empty*/
+ }
+ if(insize < 33) {
+ CERROR_RETURN_ERROR(state->error, 27); /*error: the data length is smaller than the length of a PNG header*/
+ }
+
+ /*when decoding a new PNG image, make sure all parameters created after previous decoding are reset*/
+ /* TODO: remove this. One should use a new LodePNGState for new sessions */
+ lodepng_info_cleanup(info);
+ lodepng_info_init(info);
+
+ if(in[0] != 137 || in[1] != 80 || in[2] != 78 || in[3] != 71
+ || in[4] != 13 || in[5] != 10 || in[6] != 26 || in[7] != 10) {
+ CERROR_RETURN_ERROR(state->error, 28); /*error: the first 8 bytes are not the correct PNG signature*/
+ }
+ if(lodepng_chunk_length(in + 8) != 13) {
+ CERROR_RETURN_ERROR(state->error, 94); /*error: header size must be 13 bytes*/
+ }
+ if(!lodepng_chunk_type_equals(in + 8, "IHDR")) {
+ CERROR_RETURN_ERROR(state->error, 29); /*error: it doesn't start with a IHDR chunk!*/
+ }
+
+ /*read the values given in the header*/
+ width = lodepng_read32bitInt(&in[16]);
+ height = lodepng_read32bitInt(&in[20]);
+ /*TODO: remove the undocumented feature that allows to give null pointers to width or height*/
+ if(w) *w = width;
+ if(h) *h = height;
+ info->color.bitdepth = in[24];
+ info->color.colortype = (LodePNGColorType)in[25];
+ info->compression_method = in[26];
+ info->filter_method = in[27];
+ info->interlace_method = in[28];
+
+ /*errors returned only after the parsing so other values are still output*/
+
+ /*error: invalid image size*/
+ if(width == 0 || height == 0) CERROR_RETURN_ERROR(state->error, 93);
+ /*error: invalid colortype or bitdepth combination*/
+ state->error = checkColorValidity(info->color.colortype, info->color.bitdepth);
+ if(state->error) return state->error;
+ /*error: only compression method 0 is allowed in the specification*/
+ if(info->compression_method != 0) CERROR_RETURN_ERROR(state->error, 32);
+ /*error: only filter method 0 is allowed in the specification*/
+ if(info->filter_method != 0) CERROR_RETURN_ERROR(state->error, 33);
+ /*error: only interlace methods 0 and 1 exist in the specification*/
+ if(info->interlace_method > 1) CERROR_RETURN_ERROR(state->error, 34);
+
+ if(!state->decoder.ignore_crc) {
+ unsigned CRC = lodepng_read32bitInt(&in[29]);
+ unsigned checksum = lodepng_crc32(&in[12], 17);
+ if(CRC != checksum) {
+ CERROR_RETURN_ERROR(state->error, 57); /*invalid CRC*/
+ }
+ }
+
+ return state->error;
+}
+
+static unsigned unfilterScanline(unsigned char* recon, const unsigned char* scanline, const unsigned char* precon,
+ size_t bytewidth, unsigned char filterType, size_t length) {
+ /*
+ For PNG filter method 0
+ unfilter a PNG image scanline by scanline. when the pixels are smaller than 1 byte,
+ the filter works byte per byte (bytewidth = 1)
+ precon is the previous unfiltered scanline, recon the result, scanline the current one
+ the incoming scanlines do NOT include the filtertype byte, that one is given in the parameter filterType instead
+ recon and scanline MAY be the same memory address! precon must be disjoint.
+ */
+
+ size_t i;
+ switch(filterType) {
+ case 0:
+ for(i = 0; i != length; ++i) recon[i] = scanline[i];
+ break;
+ case 1:
+ for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i];
+ for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + recon[i - bytewidth];
+ break;
+ case 2:
+ if(precon) {
+ for(i = 0; i != length; ++i) recon[i] = scanline[i] + precon[i];
+ } else {
+ for(i = 0; i != length; ++i) recon[i] = scanline[i];
+ }
+ break;
+ case 3:
+ if(precon) {
+ for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i] + (precon[i] >> 1u);
+ for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + ((recon[i - bytewidth] + precon[i]) >> 1u);
+ } else {
+ for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i];
+ for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + (recon[i - bytewidth] >> 1u);
+ }
+ break;
+ case 4:
+ if(precon) {
+ for(i = 0; i != bytewidth; ++i) {
+ recon[i] = (scanline[i] + precon[i]); /*paethPredictor(0, precon[i], 0) is always precon[i]*/
+ }
+
+ /* Unroll independent paths of the paeth predictor. A 6x and 8x version would also be possible but that
+ adds too much code. Whether this actually speeds anything up at all depends on compiler and settings. */
+ if(bytewidth >= 4) {
+ for(; i + 3 < length; i += 4) {
+ size_t j = i - bytewidth;
+ unsigned char s0 = scanline[i + 0], s1 = scanline[i + 1], s2 = scanline[i + 2], s3 = scanline[i + 3];
+ unsigned char r0 = recon[j + 0], r1 = recon[j + 1], r2 = recon[j + 2], r3 = recon[j + 3];
+ unsigned char p0 = precon[i + 0], p1 = precon[i + 1], p2 = precon[i + 2], p3 = precon[i + 3];
+ unsigned char q0 = precon[j + 0], q1 = precon[j + 1], q2 = precon[j + 2], q3 = precon[j + 3];
+ recon[i + 0] = s0 + paethPredictor(r0, p0, q0);
+ recon[i + 1] = s1 + paethPredictor(r1, p1, q1);
+ recon[i + 2] = s2 + paethPredictor(r2, p2, q2);
+ recon[i + 3] = s3 + paethPredictor(r3, p3, q3);
+ }
+ } else if(bytewidth >= 3) {
+ for(; i + 2 < length; i += 3) {
+ size_t j = i - bytewidth;
+ unsigned char s0 = scanline[i + 0], s1 = scanline[i + 1], s2 = scanline[i + 2];
+ unsigned char r0 = recon[j + 0], r1 = recon[j + 1], r2 = recon[j + 2];
+ unsigned char p0 = precon[i + 0], p1 = precon[i + 1], p2 = precon[i + 2];
+ unsigned char q0 = precon[j + 0], q1 = precon[j + 1], q2 = precon[j + 2];
+ recon[i + 0] = s0 + paethPredictor(r0, p0, q0);
+ recon[i + 1] = s1 + paethPredictor(r1, p1, q1);
+ recon[i + 2] = s2 + paethPredictor(r2, p2, q2);
+ }
+ } else if(bytewidth >= 2) {
+ for(; i + 1 < length; i += 2) {
+ size_t j = i - bytewidth;
+ unsigned char s0 = scanline[i + 0], s1 = scanline[i + 1];
+ unsigned char r0 = recon[j + 0], r1 = recon[j + 1];
+ unsigned char p0 = precon[i + 0], p1 = precon[i + 1];
+ unsigned char q0 = precon[j + 0], q1 = precon[j + 1];
+ recon[i + 0] = s0 + paethPredictor(r0, p0, q0);
+ recon[i + 1] = s1 + paethPredictor(r1, p1, q1);
+ }
+ }
+
+ for(; i != length; ++i) {
+ recon[i] = (scanline[i] + paethPredictor(recon[i - bytewidth], precon[i], precon[i - bytewidth]));
+ }
+ } else {
+ for(i = 0; i != bytewidth; ++i) {
+ recon[i] = scanline[i];
+ }
+ for(i = bytewidth; i < length; ++i) {
+ /*paethPredictor(recon[i - bytewidth], 0, 0) is always recon[i - bytewidth]*/
+ recon[i] = (scanline[i] + recon[i - bytewidth]);
+ }
+ }
+ break;
+ default: return 36; /*error: invalid filter type given*/
+ }
+ return 0;
+}
+
+static unsigned unfilter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) {
+ /*
+ For PNG filter method 0
+ this function unfilters a single image (e.g. without interlacing this is called once, with Adam7 seven times)
+ out must have enough bytes allocated already, in must have the scanlines + 1 filtertype byte per scanline
+ w and h are image dimensions or dimensions of reduced image, bpp is bits per pixel
+ in and out are allowed to be the same memory address (but aren't the same size since in has the extra filter bytes)
+ */
+
+ unsigned y;
+ unsigned char* prevline = 0;
+
+ /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
+ size_t bytewidth = (bpp + 7u) / 8u;
+ /*the width of a scanline in bytes, not including the filter type*/
+ size_t linebytes = lodepng_get_raw_size_idat(w, 1, bpp) - 1u;
+
+ for(y = 0; y < h; ++y) {
+ size_t outindex = linebytes * y;
+ size_t inindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
+ unsigned char filterType = in[inindex];
+
+ CERROR_TRY_RETURN(unfilterScanline(&out[outindex], &in[inindex + 1], prevline, bytewidth, filterType, linebytes));
+
+ prevline = &out[outindex];
+ }
+
+ return 0;
+}
+
+/*
+in: Adam7 interlaced image, with no padding bits between scanlines, but between
+ reduced images so that each reduced image starts at a byte.
+out: the same pixels, but re-ordered so that they're now a non-interlaced image with size w*h
+bpp: bits per pixel
+out has the following size in bits: w * h * bpp.
+in is possibly bigger due to padding bits between reduced images.
+out must be big enough AND must be 0 everywhere if bpp < 8 in the current implementation
+(because that's likely a little bit faster)
+NOTE: comments about padding bits are only relevant if bpp < 8
+*/
+static void Adam7_deinterlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) {
+ unsigned passw[7], passh[7];
+ size_t filter_passstart[8], padded_passstart[8], passstart[8];
+ unsigned i;
+
+ Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+ if(bpp >= 8) {
+ for(i = 0; i != 7; ++i) {
+ unsigned x, y, b;
+ size_t bytewidth = bpp / 8u;
+ for(y = 0; y < passh[i]; ++y)
+ for(x = 0; x < passw[i]; ++x) {
+ size_t pixelinstart = passstart[i] + (y * passw[i] + x) * bytewidth;
+ size_t pixeloutstart = ((ADAM7_IY[i] + (size_t)y * ADAM7_DY[i]) * (size_t)w
+ + ADAM7_IX[i] + (size_t)x * ADAM7_DX[i]) * bytewidth;
+ for(b = 0; b < bytewidth; ++b) {
+ out[pixeloutstart + b] = in[pixelinstart + b];
+ }
+ }
+ }
+ } else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/ {
+ for(i = 0; i != 7; ++i) {
+ unsigned x, y, b;
+ unsigned ilinebits = bpp * passw[i];
+ unsigned olinebits = bpp * w;
+ size_t obp, ibp; /*bit pointers (for out and in buffer)*/
+ for(y = 0; y < passh[i]; ++y)
+ for(x = 0; x < passw[i]; ++x) {
+ ibp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
+ obp = (ADAM7_IY[i] + (size_t)y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + (size_t)x * ADAM7_DX[i]) * bpp;
+ for(b = 0; b < bpp; ++b) {
+ unsigned char bit = readBitFromReversedStream(&ibp, in);
+ setBitOfReversedStream(&obp, out, bit);
+ }
+ }
+ }
+ }
+}
+
+static void removePaddingBits(unsigned char* out, const unsigned char* in,
+ size_t olinebits, size_t ilinebits, unsigned h) {
+ /*
+ After filtering there are still padding bits if scanlines have non multiple of 8 bit amounts. They need
+ to be removed (except at last scanline of (Adam7-reduced) image) before working with pure image buffers
+ for the Adam7 code, the color convert code and the output to the user.
+ in and out are allowed to be the same buffer, in may also be higher but still overlapping; in must
+ have >= ilinebits*h bits, out must have >= olinebits*h bits, olinebits must be <= ilinebits
+ also used to move bits after earlier such operations happened, e.g. in a sequence of reduced images from Adam7
+ only useful if (ilinebits - olinebits) is a value in the range 1..7
+ */
+ unsigned y;
+ size_t diff = ilinebits - olinebits;
+ size_t ibp = 0, obp = 0; /*input and output bit pointers*/
+ for(y = 0; y < h; ++y) {
+ size_t x;
+ for(x = 0; x < olinebits; ++x) {
+ unsigned char bit = readBitFromReversedStream(&ibp, in);
+ setBitOfReversedStream(&obp, out, bit);
+ }
+ ibp += diff;
+ }
+}
+
+/*out must be buffer big enough to contain full image, and in must contain the full decompressed data from
+the IDAT chunks (with filter index bytes and possible padding bits)
+return value is error*/
+static unsigned postProcessScanlines(unsigned char* out, unsigned char* in,
+ unsigned w, unsigned h, const LodePNGInfo* info_png) {
+ /*
+ This function converts the filtered-padded-interlaced data into pure 2D image buffer with the PNG's colortype.
+ Steps:
+ *) if no Adam7: 1) unfilter 2) remove padding bits (= possible extra bits per scanline if bpp < 8)
+ *) if adam7: 1) 7x unfilter 2) 7x remove padding bits 3) Adam7_deinterlace
+ NOTE: the in buffer will be overwritten with intermediate data!
+ */
+ unsigned bpp = lodepng_get_bpp(&info_png->color);
+ if(bpp == 0) return 31; /*error: invalid colortype*/
+
+ if(info_png->interlace_method == 0) {
+ if(bpp < 8 && w * bpp != ((w * bpp + 7u) / 8u) * 8u) {
+ CERROR_TRY_RETURN(unfilter(in, in, w, h, bpp));
+ removePaddingBits(out, in, w * bpp, ((w * bpp + 7u) / 8u) * 8u, h);
+ }
+ /*we can immediately filter into the out buffer, no other steps needed*/
+ else CERROR_TRY_RETURN(unfilter(out, in, w, h, bpp));
+ } else /*interlace_method is 1 (Adam7)*/ {
+ unsigned passw[7], passh[7]; size_t filter_passstart[8], padded_passstart[8], passstart[8];
+ unsigned i;
+
+ Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+ for(i = 0; i != 7; ++i) {
+ CERROR_TRY_RETURN(unfilter(&in[padded_passstart[i]], &in[filter_passstart[i]], passw[i], passh[i], bpp));
+ /*TODO: possible efficiency improvement: if in this reduced image the bits fit nicely in 1 scanline,
+ move bytes instead of bits or move not at all*/
+ if(bpp < 8) {
+ /*remove padding bits in scanlines; after this there still may be padding
+ bits between the different reduced images: each reduced image still starts nicely at a byte*/
+ removePaddingBits(&in[passstart[i]], &in[padded_passstart[i]], passw[i] * bpp,
+ ((passw[i] * bpp + 7u) / 8u) * 8u, passh[i]);
+ }
+ }
+
+ Adam7_deinterlace(out, in, w, h, bpp);
+ }
+
+ return 0;
+}
+
+static unsigned readChunk_PLTE(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength) {
+ unsigned pos = 0, i;
+ color->palettesize = chunkLength / 3u;
+ if(color->palettesize == 0 || color->palettesize > 256) return 38; /*error: palette too small or big*/
+ lodepng_color_mode_alloc_palette(color);
+ if(!color->palette && color->palettesize) {
+ color->palettesize = 0;
+ return 83; /*alloc fail*/
+ }
+
+ for(i = 0; i != color->palettesize; ++i) {
+ color->palette[4 * i + 0] = data[pos++]; /*R*/
+ color->palette[4 * i + 1] = data[pos++]; /*G*/
+ color->palette[4 * i + 2] = data[pos++]; /*B*/
+ color->palette[4 * i + 3] = 255; /*alpha*/
+ }
+
+ return 0; /* OK */
+}
+
+static unsigned readChunk_tRNS(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength) {
+ unsigned i;
+ if(color->colortype == LCT_PALETTE) {
+ /*error: more alpha values given than there are palette entries*/
+ if(chunkLength > color->palettesize) return 39;
+
+ for(i = 0; i != chunkLength; ++i) color->palette[4 * i + 3] = data[i];
+ } else if(color->colortype == LCT_GREY) {
+ /*error: this chunk must be 2 bytes for grayscale image*/
+ if(chunkLength != 2) return 30;
+
+ color->key_defined = 1;
+ color->key_r = color->key_g = color->key_b = 256u * data[0] + data[1];
+ } else if(color->colortype == LCT_RGB) {
+ /*error: this chunk must be 6 bytes for RGB image*/
+ if(chunkLength != 6) return 41;
+
+ color->key_defined = 1;
+ color->key_r = 256u * data[0] + data[1];
+ color->key_g = 256u * data[2] + data[3];
+ color->key_b = 256u * data[4] + data[5];
+ }
+ else return 42; /*error: tRNS chunk not allowed for other color models*/
+
+ return 0; /* OK */
+}
+
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+/*background color chunk (bKGD)*/
+static unsigned readChunk_bKGD(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+ if(info->color.colortype == LCT_PALETTE) {
+ /*error: this chunk must be 1 byte for indexed color image*/
+ if(chunkLength != 1) return 43;
+
+ /*error: invalid palette index, or maybe this chunk appeared before PLTE*/
+ if(data[0] >= info->color.palettesize) return 103;
+
+ info->background_defined = 1;
+ info->background_r = info->background_g = info->background_b = data[0];
+ } else if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA) {
+ /*error: this chunk must be 2 bytes for grayscale image*/
+ if(chunkLength != 2) return 44;
+
+ /*the values are truncated to bitdepth in the PNG file*/
+ info->background_defined = 1;
+ info->background_r = info->background_g = info->background_b = 256u * data[0] + data[1];
+ } else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA) {
+ /*error: this chunk must be 6 bytes for grayscale image*/
+ if(chunkLength != 6) return 45;
+
+ /*the values are truncated to bitdepth in the PNG file*/
+ info->background_defined = 1;
+ info->background_r = 256u * data[0] + data[1];
+ info->background_g = 256u * data[2] + data[3];
+ info->background_b = 256u * data[4] + data[5];
+ }
+
+ return 0; /* OK */
+}
+
+/*text chunk (tEXt)*/
+static unsigned readChunk_tEXt(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+ unsigned error = 0;
+ char *key = 0, *str = 0;
+
+ while(!error) /*not really a while loop, only used to break on error*/ {
+ unsigned length, string2_begin;
+
+ length = 0;
+ while(length < chunkLength && data[length] != 0) ++length;
+ /*even though it's not allowed by the standard, no error is thrown if
+ there's no null termination char, if the text is empty*/
+ if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
+
+ key = (char*)lodepng_malloc(length + 1);
+ if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
+
+ lodepng_memcpy(key, data, length);
+ key[length] = 0;
+
+ string2_begin = length + 1; /*skip keyword null terminator*/
+
+ length = (unsigned)(chunkLength < string2_begin ? 0 : chunkLength - string2_begin);
+ str = (char*)lodepng_malloc(length + 1);
+ if(!str) CERROR_BREAK(error, 83); /*alloc fail*/
+
+ lodepng_memcpy(str, data + string2_begin, length);
+ str[length] = 0;
+
+ error = lodepng_add_text(info, key, str);
+
+ break;
+ }
+
+ lodepng_free(key);
+ lodepng_free(str);
+
+ return error;
+}
+
+/*compressed text chunk (zTXt)*/
+static unsigned readChunk_zTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
+ const unsigned char* data, size_t chunkLength) {
+ unsigned error = 0;
+
+ unsigned length, string2_begin;
+ char *key = 0;
+ unsigned char* str = 0;
+ size_t size = 0;
+
+ while(!error) /*not really a while loop, only used to break on error*/ {
+ for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
+ if(length + 2 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
+ if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
+
+ key = (char*)lodepng_malloc(length + 1);
+ if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
+
+ lodepng_memcpy(key, data, length);
+ key[length] = 0;
+
+ if(data[length + 1] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
+
+ string2_begin = length + 2;
+ if(string2_begin > chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
+
+ length = (unsigned)chunkLength - string2_begin;
+ /*will fail if zlib error, e.g. if length is too small*/
+ error = zlib_decompress(&str, &size, 0, &data[string2_begin],
+ length, zlibsettings);
+ if(error) break;
+ error = lodepng_add_text_sized(info, key, (char*)str, size);
+
+ break;
+ }
+
+ lodepng_free(key);
+ lodepng_free(str);
+
+ return error;
+}
+
+/*international text chunk (iTXt)*/
+static unsigned readChunk_iTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
+ const unsigned char* data, size_t chunkLength) {
+ unsigned error = 0;
+ unsigned i;
+
+ unsigned length, begin, compressed;
+ char *key = 0, *langtag = 0, *transkey = 0;
+
+ while(!error) /*not really a while loop, only used to break on error*/ {
+ /*Quick check if the chunk length isn't too small. Even without check
+ it'd still fail with other error checks below if it's too short. This just gives a different error code.*/
+ if(chunkLength < 5) CERROR_BREAK(error, 30); /*iTXt chunk too short*/
+
+ /*read the key*/
+ for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
+ if(length + 3 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination char, corrupt?*/
+ if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
+
+ key = (char*)lodepng_malloc(length + 1);
+ if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
+
+ lodepng_memcpy(key, data, length);
+ key[length] = 0;
+
+ /*read the compression method*/
+ compressed = data[length + 1];
+ if(data[length + 2] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
+
+ /*even though it's not allowed by the standard, no error is thrown if
+ there's no null termination char, if the text is empty for the next 3 texts*/
+
+ /*read the langtag*/
+ begin = length + 3;
+ length = 0;
+ for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length;
+
+ langtag = (char*)lodepng_malloc(length + 1);
+ if(!langtag) CERROR_BREAK(error, 83); /*alloc fail*/
+
+ lodepng_memcpy(langtag, data + begin, length);
+ langtag[length] = 0;
+
+ /*read the transkey*/
+ begin += length + 1;
+ length = 0;
+ for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length;
+
+ transkey = (char*)lodepng_malloc(length + 1);
+ if(!transkey) CERROR_BREAK(error, 83); /*alloc fail*/
+
+ lodepng_memcpy(transkey, data + begin, length);
+ transkey[length] = 0;
+
+ /*read the actual text*/
+ begin += length + 1;
+
+ length = (unsigned)chunkLength < begin ? 0 : (unsigned)chunkLength - begin;
+
+ if(compressed) {
+ unsigned char* str = 0;
+ size_t size = 0;
+ /*will fail if zlib error, e.g. if length is too small*/
+ error = zlib_decompress(&str, &size, 0, &data[begin],
+ length, zlibsettings);
+ if(!error) error = lodepng_add_itext_sized(info, key, langtag, transkey, (char*)str, size);
+ lodepng_free(str);
+ } else {
+ error = lodepng_add_itext_sized(info, key, langtag, transkey, (char*)(data + begin), length);
+ }
+
+ break;
+ }
+
+ lodepng_free(key);
+ lodepng_free(langtag);
+ lodepng_free(transkey);
+
+ return error;
+}
+
+static unsigned readChunk_tIME(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+ if(chunkLength != 7) return 73; /*invalid tIME chunk size*/
+
+ info->time_defined = 1;
+ info->time.year = 256u * data[0] + data[1];
+ info->time.month = data[2];
+ info->time.day = data[3];
+ info->time.hour = data[4];
+ info->time.minute = data[5];
+ info->time.second = data[6];
+
+ return 0; /* OK */
+}
+
+static unsigned readChunk_pHYs(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+ if(chunkLength != 9) return 74; /*invalid pHYs chunk size*/
+
+ info->phys_defined = 1;
+ info->phys_x = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3];
+ info->phys_y = 16777216u * data[4] + 65536u * data[5] + 256u * data[6] + data[7];
+ info->phys_unit = data[8];
+
+ return 0; /* OK */
+}
+
+static unsigned readChunk_gAMA(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+ if(chunkLength != 4) return 96; /*invalid gAMA chunk size*/
+
+ info->gama_defined = 1;
+ info->gama_gamma = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3];
+
+ return 0; /* OK */
+}
+
+static unsigned readChunk_cHRM(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+ if(chunkLength != 32) return 97; /*invalid cHRM chunk size*/
+
+ info->chrm_defined = 1;
+ info->chrm_white_x = 16777216u * data[ 0] + 65536u * data[ 1] + 256u * data[ 2] + data[ 3];
+ info->chrm_white_y = 16777216u * data[ 4] + 65536u * data[ 5] + 256u * data[ 6] + data[ 7];
+ info->chrm_red_x = 16777216u * data[ 8] + 65536u * data[ 9] + 256u * data[10] + data[11];
+ info->chrm_red_y = 16777216u * data[12] + 65536u * data[13] + 256u * data[14] + data[15];
+ info->chrm_green_x = 16777216u * data[16] + 65536u * data[17] + 256u * data[18] + data[19];
+ info->chrm_green_y = 16777216u * data[20] + 65536u * data[21] + 256u * data[22] + data[23];
+ info->chrm_blue_x = 16777216u * data[24] + 65536u * data[25] + 256u * data[26] + data[27];
+ info->chrm_blue_y = 16777216u * data[28] + 65536u * data[29] + 256u * data[30] + data[31];
+
+ return 0; /* OK */
+}
+
+static unsigned readChunk_sRGB(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+ if(chunkLength != 1) return 98; /*invalid sRGB chunk size (this one is never ignored)*/
+
+ info->srgb_defined = 1;
+ info->srgb_intent = data[0];
+
+ return 0; /* OK */
+}
+
+static unsigned readChunk_iCCP(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
+ const unsigned char* data, size_t chunkLength) {
+ unsigned error = 0;
+ unsigned i;
+ size_t size = 0;
+
+ unsigned length, string2_begin;
+
+ info->iccp_defined = 1;
+ if(info->iccp_name) lodepng_clear_icc(info);
+
+ for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
+ if(length + 2 >= chunkLength) return 75; /*no null termination, corrupt?*/
+ if(length < 1 || length > 79) return 89; /*keyword too short or long*/
+
+ info->iccp_name = (char*)lodepng_malloc(length + 1);
+ if(!info->iccp_name) return 83; /*alloc fail*/
+
+ info->iccp_name[length] = 0;
+ for(i = 0; i != length; ++i) info->iccp_name[i] = (char)data[i];
+
+ if(data[length + 1] != 0) return 72; /*the 0 byte indicating compression must be 0*/
+
+ string2_begin = length + 2;
+ if(string2_begin > chunkLength) return 75; /*no null termination, corrupt?*/
+
+ length = (unsigned)chunkLength - string2_begin;
+ error = zlib_decompress(&info->iccp_profile, &size, 0,
+ &data[string2_begin],
+ length, zlibsettings);
+ info->iccp_profile_size = size;
+ if(!error && !info->iccp_profile_size) error = 100; /*invalid ICC profile size*/
+ return error;
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+unsigned lodepng_inspect_chunk(LodePNGState* state, size_t pos,
+ const unsigned char* in, size_t insize) {
+ const unsigned char* chunk = in + pos;
+ unsigned chunkLength;
+ const unsigned char* data;
+ unsigned unhandled = 0;
+ unsigned error = 0;
+
+ if(pos + 4 > insize) return 30;
+ chunkLength = lodepng_chunk_length(chunk);
+ if(chunkLength > 2147483647) return 63;
+ data = lodepng_chunk_data_const(chunk);
+ if(data + chunkLength + 4 > in + insize) return 30;
+
+ if(lodepng_chunk_type_equals(chunk, "PLTE")) {
+ error = readChunk_PLTE(&state->info_png.color, data, chunkLength);
+ } else if(lodepng_chunk_type_equals(chunk, "tRNS")) {
+ error = readChunk_tRNS(&state->info_png.color, data, chunkLength);
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ } else if(lodepng_chunk_type_equals(chunk, "bKGD")) {
+ error = readChunk_bKGD(&state->info_png, data, chunkLength);
+ } else if(lodepng_chunk_type_equals(chunk, "tEXt")) {
+ error = readChunk_tEXt(&state->info_png, data, chunkLength);
+ } else if(lodepng_chunk_type_equals(chunk, "zTXt")) {
+ error = readChunk_zTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+ } else if(lodepng_chunk_type_equals(chunk, "iTXt")) {
+ error = readChunk_iTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+ } else if(lodepng_chunk_type_equals(chunk, "tIME")) {
+ error = readChunk_tIME(&state->info_png, data, chunkLength);
+ } else if(lodepng_chunk_type_equals(chunk, "pHYs")) {
+ error = readChunk_pHYs(&state->info_png, data, chunkLength);
+ } else if(lodepng_chunk_type_equals(chunk, "gAMA")) {
+ error = readChunk_gAMA(&state->info_png, data, chunkLength);
+ } else if(lodepng_chunk_type_equals(chunk, "cHRM")) {
+ error = readChunk_cHRM(&state->info_png, data, chunkLength);
+ } else if(lodepng_chunk_type_equals(chunk, "sRGB")) {
+ error = readChunk_sRGB(&state->info_png, data, chunkLength);
+ } else if(lodepng_chunk_type_equals(chunk, "iCCP")) {
+ error = readChunk_iCCP(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+ } else {
+ /* unhandled chunk is ok (is not an error) */
+ unhandled = 1;
+ }
+
+ if(!error && !unhandled && !state->decoder.ignore_crc) {
+ if(lodepng_chunk_check_crc(chunk)) return 57; /*invalid CRC*/
+ }
+
+ return error;
+}
+
+/*read a PNG, the result will be in the same color type as the PNG (hence "generic")*/
+static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h,
+ LodePNGState* state,
+ const unsigned char* in, size_t insize) {
+ unsigned char IEND = 0;
+ const unsigned char* chunk;
+ unsigned char* idat; /*the data from idat chunks, zlib compressed*/
+ size_t idatsize = 0;
+ unsigned char* scanlines = 0;
+ size_t scanlines_size = 0, expected_size = 0;
+ size_t outsize = 0;
+
+ /*for unknown chunk order*/
+ unsigned unknown = 0;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ unsigned critical_pos = 1; /*1 = after IHDR, 2 = after PLTE, 3 = after IDAT*/
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+
+ /* safe output values in case error happens */
+ *out = 0;
+ *w = *h = 0;
+
+ state->error = lodepng_inspect(w, h, state, in, insize); /*reads header and resets other parameters in state->info_png*/
+ if(state->error) return;
+
+ if(lodepng_pixel_overflow(*w, *h, &state->info_png.color, &state->info_raw)) {
+ CERROR_RETURN(state->error, 92); /*overflow possible due to amount of pixels*/
+ }
+
+ /*the input filesize is a safe upper bound for the sum of idat chunks size*/
+ idat = (unsigned char*)lodepng_malloc(insize);
+ if(!idat) CERROR_RETURN(state->error, 83); /*alloc fail*/
+
+ chunk = &in[33]; /*first byte of the first chunk after the header*/
+
+ /*loop through the chunks, ignoring unknown chunks and stopping at IEND chunk.
+ IDAT data is put at the start of the in buffer*/
+ while(!IEND && !state->error) {
+ unsigned chunkLength;
+ const unsigned char* data; /*the data in the chunk*/
+
+ /*error: size of the in buffer too small to contain next chunk*/
+ if((size_t)((chunk - in) + 12) > insize || chunk < in) {
+ if(state->decoder.ignore_end) break; /*other errors may still happen though*/
+ CERROR_BREAK(state->error, 30);
+ }
+
+ /*length of the data of the chunk, excluding the length bytes, chunk type and CRC bytes*/
+ chunkLength = lodepng_chunk_length(chunk);
+ /*error: chunk length larger than the max PNG chunk size*/
+ if(chunkLength > 2147483647) {
+ if(state->decoder.ignore_end) break; /*other errors may still happen though*/
+ CERROR_BREAK(state->error, 63);
+ }
+
+ if((size_t)((chunk - in) + chunkLength + 12) > insize || (chunk + chunkLength + 12) < in) {
+ CERROR_BREAK(state->error, 64); /*error: size of the in buffer too small to contain next chunk*/
+ }
+
+ data = lodepng_chunk_data_const(chunk);
+
+ unknown = 0;
+
+ /*IDAT chunk, containing compressed image data*/
+ if(lodepng_chunk_type_equals(chunk, "IDAT")) {
+ size_t newsize;
+ if(lodepng_addofl(idatsize, chunkLength, &newsize)) CERROR_BREAK(state->error, 95);
+ if(newsize > insize) CERROR_BREAK(state->error, 95);
+ lodepng_memcpy(idat + idatsize, data, chunkLength);
+ idatsize += chunkLength;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ critical_pos = 3;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+ } else if(lodepng_chunk_type_equals(chunk, "IEND")) {
+ /*IEND chunk*/
+ IEND = 1;
+ } else if(lodepng_chunk_type_equals(chunk, "PLTE")) {
+ /*palette chunk (PLTE)*/
+ state->error = readChunk_PLTE(&state->info_png.color, data, chunkLength);
+ if(state->error) break;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ critical_pos = 2;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+ } else if(lodepng_chunk_type_equals(chunk, "tRNS")) {
+ /*palette transparency chunk (tRNS). Even though this one is an ancillary chunk , it is still compiled
+ in without 'LODEPNG_COMPILE_ANCILLARY_CHUNKS' because it contains essential color information that
+ affects the alpha channel of pixels. */
+ state->error = readChunk_tRNS(&state->info_png.color, data, chunkLength);
+ if(state->error) break;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ /*background color chunk (bKGD)*/
+ } else if(lodepng_chunk_type_equals(chunk, "bKGD")) {
+ state->error = readChunk_bKGD(&state->info_png, data, chunkLength);
+ if(state->error) break;
+ } else if(lodepng_chunk_type_equals(chunk, "tEXt")) {
+ /*text chunk (tEXt)*/
+ if(state->decoder.read_text_chunks) {
+ state->error = readChunk_tEXt(&state->info_png, data, chunkLength);
+ if(state->error) break;
+ }
+ } else if(lodepng_chunk_type_equals(chunk, "zTXt")) {
+ /*compressed text chunk (zTXt)*/
+ if(state->decoder.read_text_chunks) {
+ state->error = readChunk_zTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+ if(state->error) break;
+ }
+ } else if(lodepng_chunk_type_equals(chunk, "iTXt")) {
+ /*international text chunk (iTXt)*/
+ if(state->decoder.read_text_chunks) {
+ state->error = readChunk_iTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+ if(state->error) break;
+ }
+ } else if(lodepng_chunk_type_equals(chunk, "tIME")) {
+ state->error = readChunk_tIME(&state->info_png, data, chunkLength);
+ if(state->error) break;
+ } else if(lodepng_chunk_type_equals(chunk, "pHYs")) {
+ state->error = readChunk_pHYs(&state->info_png, data, chunkLength);
+ if(state->error) break;
+ } else if(lodepng_chunk_type_equals(chunk, "gAMA")) {
+ state->error = readChunk_gAMA(&state->info_png, data, chunkLength);
+ if(state->error) break;
+ } else if(lodepng_chunk_type_equals(chunk, "cHRM")) {
+ state->error = readChunk_cHRM(&state->info_png, data, chunkLength);
+ if(state->error) break;
+ } else if(lodepng_chunk_type_equals(chunk, "sRGB")) {
+ state->error = readChunk_sRGB(&state->info_png, data, chunkLength);
+ if(state->error) break;
+ } else if(lodepng_chunk_type_equals(chunk, "iCCP")) {
+ state->error = readChunk_iCCP(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+ if(state->error) break;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+ } else /*it's not an implemented chunk type, so ignore it: skip over the data*/ {
+ /*error: unknown critical chunk (5th bit of first byte of chunk type is 0)*/
+ if(!state->decoder.ignore_critical && !lodepng_chunk_ancillary(chunk)) {
+ CERROR_BREAK(state->error, 69);
+ }
+
+ unknown = 1;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ if(state->decoder.remember_unknown_chunks) {
+ state->error = lodepng_chunk_append(&state->info_png.unknown_chunks_data[critical_pos - 1],
+ &state->info_png.unknown_chunks_size[critical_pos - 1], chunk);
+ if(state->error) break;
+ }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+ }
+
+ if(!state->decoder.ignore_crc && !unknown) /*check CRC if wanted, only on known chunk types*/ {
+ if(lodepng_chunk_check_crc(chunk)) CERROR_BREAK(state->error, 57); /*invalid CRC*/
+ }
+
+ if(!IEND) chunk = lodepng_chunk_next_const(chunk, in + insize);
+ }
+
+ if(state->info_png.color.colortype == LCT_PALETTE && !state->info_png.color.palette) {
+ state->error = 106; /* error: PNG file must have PLTE chunk if color type is palette */
+ }
+
+ if(!state->error) {
+ /*predict output size, to allocate exact size for output buffer to avoid more dynamic allocation.
+ If the decompressed size does not match the prediction, the image must be corrupt.*/
+ if(state->info_png.interlace_method == 0) {
+ size_t bpp = lodepng_get_bpp(&state->info_png.color);
+ expected_size = lodepng_get_raw_size_idat(*w, *h, bpp);
+ } else {
+ size_t bpp = lodepng_get_bpp(&state->info_png.color);
+ /*Adam-7 interlaced: expected size is the sum of the 7 sub-images sizes*/
+ expected_size = 0;
+ expected_size += lodepng_get_raw_size_idat((*w + 7) >> 3, (*h + 7) >> 3, bpp);
+ if(*w > 4) expected_size += lodepng_get_raw_size_idat((*w + 3) >> 3, (*h + 7) >> 3, bpp);
+ expected_size += lodepng_get_raw_size_idat((*w + 3) >> 2, (*h + 3) >> 3, bpp);
+ if(*w > 2) expected_size += lodepng_get_raw_size_idat((*w + 1) >> 2, (*h + 3) >> 2, bpp);
+ expected_size += lodepng_get_raw_size_idat((*w + 1) >> 1, (*h + 1) >> 2, bpp);
+ if(*w > 1) expected_size += lodepng_get_raw_size_idat((*w + 0) >> 1, (*h + 1) >> 1, bpp);
+ expected_size += lodepng_get_raw_size_idat((*w + 0), (*h + 0) >> 1, bpp);
+ }
+
+ state->error = zlib_decompress(&scanlines, &scanlines_size, expected_size, idat, idatsize, &state->decoder.zlibsettings);
+ }
+ if(!state->error && scanlines_size != expected_size) state->error = 91; /*decompressed size doesn't match prediction*/
+ lodepng_free(idat);
+
+ if(!state->error) {
+ outsize = lodepng_get_raw_size(*w, *h, &state->info_png.color);
+ *out = (unsigned char*)lodepng_malloc(outsize);
+ if(!*out) state->error = 83; /*alloc fail*/
+ }
+ if(!state->error) {
+ lodepng_memset(*out, 0, outsize);
+ state->error = postProcessScanlines(*out, scanlines, *w, *h, &state->info_png);
+ }
+ lodepng_free(scanlines);
+}
+
+unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h,
+ LodePNGState* state,
+ const unsigned char* in, size_t insize) {
+ *out = 0;
+ decodeGeneric(out, w, h, state, in, insize);
+ if(state->error) return state->error;
+ if(!state->decoder.color_convert || lodepng_color_mode_equal(&state->info_raw, &state->info_png.color)) {
+ /*same color type, no copying or converting of data needed*/
+ /*store the info_png color settings on the info_raw so that the info_raw still reflects what colortype
+ the raw image has to the end user*/
+ if(!state->decoder.color_convert) {
+ state->error = lodepng_color_mode_copy(&state->info_raw, &state->info_png.color);
+ if(state->error) return state->error;
+ }
+ } else { /*color conversion needed*/
+ unsigned char* data = *out;
+ size_t outsize;
+
+ /*TODO: check if this works according to the statement in the documentation: "The converter can convert
+ from grayscale input color type, to 8-bit grayscale or grayscale with alpha"*/
+ if(!(state->info_raw.colortype == LCT_RGB || state->info_raw.colortype == LCT_RGBA)
+ && !(state->info_raw.bitdepth == 8)) {
+ return 56; /*unsupported color mode conversion*/
+ }
+
+ outsize = lodepng_get_raw_size(*w, *h, &state->info_raw);
+ *out = (unsigned char*)lodepng_malloc(outsize);
+ if(!(*out)) {
+ state->error = 83; /*alloc fail*/
+ }
+ else state->error = lodepng_convert(*out, data, &state->info_raw,
+ &state->info_png.color, *w, *h);
+ lodepng_free(data);
+ }
+ return state->error;
+}
+
+unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in,
+ size_t insize, LodePNGColorType colortype, unsigned bitdepth) {
+ unsigned error;
+ LodePNGState state;
+ lodepng_state_init(&state);
+ state.info_raw.colortype = colortype;
+ state.info_raw.bitdepth = bitdepth;
+ error = lodepng_decode(out, w, h, &state, in, insize);
+ lodepng_state_cleanup(&state);
+ return error;
+}
+
+unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize) {
+ return lodepng_decode_memory(out, w, h, in, insize, LCT_RGBA, 8);
+}
+
+unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize) {
+ return lodepng_decode_memory(out, w, h, in, insize, LCT_RGB, 8);
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename,
+ LodePNGColorType colortype, unsigned bitdepth) {
+ unsigned char* buffer = 0;
+ size_t buffersize;
+ unsigned error;
+ /* safe output values in case error happens */
+ *out = 0;
+ *w = *h = 0;
+ error = lodepng_load_file(&buffer, &buffersize, filename);
+ if(!error) error = lodepng_decode_memory(out, w, h, buffer, buffersize, colortype, bitdepth);
+ lodepng_free(buffer);
+ return error;
+}
+
+unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename) {
+ return lodepng_decode_file(out, w, h, filename, LCT_RGBA, 8);
+}
+
+unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename) {
+ return lodepng_decode_file(out, w, h, filename, LCT_RGB, 8);
+}
+#endif /*LODEPNG_COMPILE_DISK*/
+
+void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings) {
+ settings->color_convert = 1;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ settings->read_text_chunks = 1;
+ settings->remember_unknown_chunks = 0;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+ settings->ignore_crc = 0;
+ settings->ignore_critical = 0;
+ settings->ignore_end = 0;
+ lodepng_decompress_settings_init(&settings->zlibsettings);
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER)
+
+void lodepng_state_init(LodePNGState* state) {
+#ifdef LODEPNG_COMPILE_DECODER
+ lodepng_decoder_settings_init(&state->decoder);
+#endif /*LODEPNG_COMPILE_DECODER*/
+#ifdef LODEPNG_COMPILE_ENCODER
+ lodepng_encoder_settings_init(&state->encoder);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+ lodepng_color_mode_init(&state->info_raw);
+ lodepng_info_init(&state->info_png);
+ state->error = 1;
+}
+
+void lodepng_state_cleanup(LodePNGState* state) {
+ lodepng_color_mode_cleanup(&state->info_raw);
+ lodepng_info_cleanup(&state->info_png);
+}
+
+void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source) {
+ lodepng_state_cleanup(dest);
+ *dest = *source;
+ lodepng_color_mode_init(&dest->info_raw);
+ lodepng_info_init(&dest->info_png);
+ dest->error = lodepng_color_mode_copy(&dest->info_raw, &source->info_raw); if(dest->error) return;
+ dest->error = lodepng_info_copy(&dest->info_png, &source->info_png); if(dest->error) return;
+}
+
+#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / PNG Encoder / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+
+static unsigned writeSignature(ucvector* out) {
+ size_t pos = out->size;
+ const unsigned char signature[] = {137, 80, 78, 71, 13, 10, 26, 10};
+ /*8 bytes PNG signature, aka the magic bytes*/
+ if(!ucvector_resize(out, out->size + 8)) return 83; /*alloc fail*/
+ lodepng_memcpy(out->data + pos, signature, 8);
+ return 0;
+}
+
+static unsigned addChunk_IHDR(ucvector* out, unsigned w, unsigned h,
+ LodePNGColorType colortype, unsigned bitdepth, unsigned interlace_method) {
+ unsigned char *chunk, *data;
+ CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 13, "IHDR"));
+ data = chunk + 8;
+
+ lodepng_set32bitInt(data + 0, w); /*width*/
+ lodepng_set32bitInt(data + 4, h); /*height*/
+ data[8] = (unsigned char)bitdepth; /*bit depth*/
+ data[9] = (unsigned char)colortype; /*color type*/
+ data[10] = 0; /*compression method*/
+ data[11] = 0; /*filter method*/
+ data[12] = interlace_method; /*interlace method*/
+
+ lodepng_chunk_generate_crc(chunk);
+ return 0;
+}
+
+/* only adds the chunk if needed (there is a key or palette with alpha) */
+static unsigned addChunk_PLTE(ucvector* out, const LodePNGColorMode* info) {
+ unsigned char* chunk;
+ size_t i, j = 8;
+
+ CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, info->palettesize * 3, "PLTE"));
+
+ for(i = 0; i != info->palettesize; ++i) {
+ /*add all channels except alpha channel*/
+ chunk[j++] = info->palette[i * 4 + 0];
+ chunk[j++] = info->palette[i * 4 + 1];
+ chunk[j++] = info->palette[i * 4 + 2];
+ }
+
+ lodepng_chunk_generate_crc(chunk);
+ return 0;
+}
+
+static unsigned addChunk_tRNS(ucvector* out, const LodePNGColorMode* info) {
+ unsigned char* chunk = 0;
+
+ if(info->colortype == LCT_PALETTE) {
+ size_t i, amount = info->palettesize;
+ /*the tail of palette values that all have 255 as alpha, does not have to be encoded*/
+ for(i = info->palettesize; i != 0; --i) {
+ if(info->palette[4 * (i - 1) + 3] != 255) break;
+ --amount;
+ }
+ if(amount) {
+ CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, amount, "tRNS"));
+ /*add the alpha channel values from the palette*/
+ for(i = 0; i != amount; ++i) chunk[8 + i] = info->palette[4 * i + 3];
+ }
+ } else if(info->colortype == LCT_GREY) {
+ if(info->key_defined) {
+ CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 2, "tRNS"));
+ chunk[8] = (unsigned char)(info->key_r >> 8);
+ chunk[9] = (unsigned char)(info->key_r & 255);
+ }
+ } else if(info->colortype == LCT_RGB) {
+ if(info->key_defined) {
+ CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 6, "tRNS"));
+ chunk[8] = (unsigned char)(info->key_r >> 8);
+ chunk[9] = (unsigned char)(info->key_r & 255);
+ chunk[10] = (unsigned char)(info->key_g >> 8);
+ chunk[11] = (unsigned char)(info->key_g & 255);
+ chunk[12] = (unsigned char)(info->key_b >> 8);
+ chunk[13] = (unsigned char)(info->key_b & 255);
+ }
+ }
+
+ if(chunk) lodepng_chunk_generate_crc(chunk);
+ return 0;
+}
+
+static unsigned addChunk_IDAT(ucvector* out, const unsigned char* data, size_t datasize,
+ LodePNGCompressSettings* zlibsettings) {
+ unsigned error = 0;
+ unsigned char* zlib = 0;
+ size_t zlibsize = 0;
+
+ error = zlib_compress(&zlib, &zlibsize, data, datasize, zlibsettings);
+ if(!error) {
+ error = lodepng_chunk_createv(out, zlibsize, "IDAT", zlib);
+ }
+ lodepng_free(zlib);
+ return error;
+}
+
+static unsigned addChunk_IEND(ucvector* out) {
+ return lodepng_chunk_createv(out, 0, "IEND", 0);
+}
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+
+static unsigned addChunk_tEXt(ucvector* out, const char* keyword, const char* textstring) {
+ unsigned char* chunk = 0;
+ size_t keysize = lodepng_strlen(keyword), textsize = lodepng_strlen(textstring);
+ size_t size = keysize + 1 + textsize;
+ if(keysize < 1 || keysize > 79) return 89; /*error: invalid keyword size*/
+ CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, size, "tEXt"));
+ lodepng_memcpy(chunk + 8, keyword, keysize);
+ chunk[8 + keysize] = 0; /*null termination char*/
+ lodepng_memcpy(chunk + 9 + keysize, textstring, textsize);
+ lodepng_chunk_generate_crc(chunk);
+ return 0;
+}
+
+static unsigned addChunk_zTXt(ucvector* out, const char* keyword, const char* textstring,
+ LodePNGCompressSettings* zlibsettings) {
+ unsigned error = 0;
+ unsigned char* chunk = 0;
+ unsigned char* compressed = 0;
+ size_t compressedsize = 0;
+ size_t textsize = lodepng_strlen(textstring);
+ size_t keysize = lodepng_strlen(keyword);
+ if(keysize < 1 || keysize > 79) return 89; /*error: invalid keyword size*/
+
+ error = zlib_compress(&compressed, &compressedsize,
+ (const unsigned char*)textstring, textsize, zlibsettings);
+ if(!error) {
+ size_t size = keysize + 2 + compressedsize;
+ error = lodepng_chunk_init(&chunk, out, size, "zTXt");
+ }
+ if(!error) {
+ lodepng_memcpy(chunk + 8, keyword, keysize);
+ chunk[8 + keysize] = 0; /*null termination char*/
+ chunk[9 + keysize] = 0; /*compression method: 0*/
+ lodepng_memcpy(chunk + 10 + keysize, compressed, compressedsize);
+ lodepng_chunk_generate_crc(chunk);
+ }
+
+ lodepng_free(compressed);
+ return error;
+}
+
+static unsigned addChunk_iTXt(ucvector* out, unsigned compress, const char* keyword, const char* langtag,
+ const char* transkey, const char* textstring, LodePNGCompressSettings* zlibsettings) {
+ unsigned error = 0;
+ unsigned char* chunk = 0;
+ unsigned char* compressed = 0;
+ size_t compressedsize = 0;
+ size_t textsize = lodepng_strlen(textstring);
+ size_t keysize = lodepng_strlen(keyword), langsize = lodepng_strlen(langtag), transsize = lodepng_strlen(transkey);
+
+ if(keysize < 1 || keysize > 79) return 89; /*error: invalid keyword size*/
+
+ if(compress) {
+ error = zlib_compress(&compressed, &compressedsize,
+ (const unsigned char*)textstring, textsize, zlibsettings);
+ }
+ if(!error) {
+ size_t size = keysize + 3 + langsize + 1 + transsize + 1 + (compress ? compressedsize : textsize);
+ error = lodepng_chunk_init(&chunk, out, size, "iTXt");
+ }
+ if(!error) {
+ size_t pos = 8;
+ lodepng_memcpy(chunk + pos, keyword, keysize);
+ pos += keysize;
+ chunk[pos++] = 0; /*null termination char*/
+ chunk[pos++] = (compress ? 1 : 0); /*compression flag*/
+ chunk[pos++] = 0; /*compression method: 0*/
+ lodepng_memcpy(chunk + pos, langtag, langsize);
+ pos += langsize;
+ chunk[pos++] = 0; /*null termination char*/
+ lodepng_memcpy(chunk + pos, transkey, transsize);
+ pos += transsize;
+ chunk[pos++] = 0; /*null termination char*/
+ if(compress) {
+ lodepng_memcpy(chunk + pos, compressed, compressedsize);
+ } else {
+ lodepng_memcpy(chunk + pos, textstring, textsize);
+ }
+ lodepng_chunk_generate_crc(chunk);
+ }
+
+ lodepng_free(compressed);
+ return error;
+}
+
+static unsigned addChunk_bKGD(ucvector* out, const LodePNGInfo* info) {
+ unsigned char* chunk = 0;
+ if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA) {
+ CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 2, "bKGD"));
+ chunk[8] = (unsigned char)(info->background_r >> 8);
+ chunk[9] = (unsigned char)(info->background_r & 255);
+ } else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA) {
+ CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 6, "bKGD"));
+ chunk[8] = (unsigned char)(info->background_r >> 8);
+ chunk[9] = (unsigned char)(info->background_r & 255);
+ chunk[10] = (unsigned char)(info->background_g >> 8);
+ chunk[11] = (unsigned char)(info->background_g & 255);
+ chunk[12] = (unsigned char)(info->background_b >> 8);
+ chunk[13] = (unsigned char)(info->background_b & 255);
+ } else if(info->color.colortype == LCT_PALETTE) {
+ CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 1, "bKGD"));
+ chunk[8] = (unsigned char)(info->background_r & 255); /*palette index*/
+ }
+ if(chunk) lodepng_chunk_generate_crc(chunk);
+ return 0;
+}
+
+static unsigned addChunk_tIME(ucvector* out, const LodePNGTime* time) {
+ unsigned char* chunk;
+ CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 7, "tIME"));
+ chunk[8] = (unsigned char)(time->year >> 8);
+ chunk[9] = (unsigned char)(time->year & 255);
+ chunk[10] = (unsigned char)time->month;
+ chunk[11] = (unsigned char)time->day;
+ chunk[12] = (unsigned char)time->hour;
+ chunk[13] = (unsigned char)time->minute;
+ chunk[14] = (unsigned char)time->second;
+ lodepng_chunk_generate_crc(chunk);
+ return 0;
+}
+
+static unsigned addChunk_pHYs(ucvector* out, const LodePNGInfo* info) {
+ unsigned char* chunk;
+ CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 9, "pHYs"));
+ lodepng_set32bitInt(chunk + 8, info->phys_x);
+ lodepng_set32bitInt(chunk + 12, info->phys_y);
+ chunk[16] = info->phys_unit;
+ lodepng_chunk_generate_crc(chunk);
+ return 0;
+}
+
+static unsigned addChunk_gAMA(ucvector* out, const LodePNGInfo* info) {
+ unsigned char* chunk;
+ CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 4, "gAMA"));
+ lodepng_set32bitInt(chunk + 8, info->gama_gamma);
+ lodepng_chunk_generate_crc(chunk);
+ return 0;
+}
+
+static unsigned addChunk_cHRM(ucvector* out, const LodePNGInfo* info) {
+ unsigned char* chunk;
+ CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 32, "cHRM"));
+ lodepng_set32bitInt(chunk + 8, info->chrm_white_x);
+ lodepng_set32bitInt(chunk + 12, info->chrm_white_y);
+ lodepng_set32bitInt(chunk + 16, info->chrm_red_x);
+ lodepng_set32bitInt(chunk + 20, info->chrm_red_y);
+ lodepng_set32bitInt(chunk + 24, info->chrm_green_x);
+ lodepng_set32bitInt(chunk + 28, info->chrm_green_y);
+ lodepng_set32bitInt(chunk + 32, info->chrm_blue_x);
+ lodepng_set32bitInt(chunk + 36, info->chrm_blue_y);
+ lodepng_chunk_generate_crc(chunk);
+ return 0;
+}
+
+static unsigned addChunk_sRGB(ucvector* out, const LodePNGInfo* info) {
+ unsigned char data = info->srgb_intent;
+ return lodepng_chunk_createv(out, 1, "sRGB", &data);
+}
+
+static unsigned addChunk_iCCP(ucvector* out, const LodePNGInfo* info, LodePNGCompressSettings* zlibsettings) {
+ unsigned error = 0;
+ unsigned char* chunk = 0;
+ unsigned char* compressed = 0;
+ size_t compressedsize = 0;
+ size_t keysize = lodepng_strlen(info->iccp_name);
+
+ if(keysize < 1 || keysize > 79) return 89; /*error: invalid keyword size*/
+ error = zlib_compress(&compressed, &compressedsize,
+ info->iccp_profile, info->iccp_profile_size, zlibsettings);
+ if(!error) {
+ size_t size = keysize + 2 + compressedsize;
+ error = lodepng_chunk_init(&chunk, out, size, "iCCP");
+ }
+ if(!error) {
+ lodepng_memcpy(chunk + 8, info->iccp_name, keysize);
+ chunk[8 + keysize] = 0; /*null termination char*/
+ chunk[9 + keysize] = 0; /*compression method: 0*/
+ lodepng_memcpy(chunk + 10 + keysize, compressed, compressedsize);
+ lodepng_chunk_generate_crc(chunk);
+ }
+
+ lodepng_free(compressed);
+ return error;
+}
+
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+static void filterScanline(unsigned char* out, const unsigned char* scanline, const unsigned char* prevline,
+ size_t length, size_t bytewidth, unsigned char filterType) {
+ size_t i;
+ switch(filterType) {
+ case 0: /*None*/
+ for(i = 0; i != length; ++i) out[i] = scanline[i];
+ break;
+ case 1: /*Sub*/
+ for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
+ for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - scanline[i - bytewidth];
+ break;
+ case 2: /*Up*/
+ if(prevline) {
+ for(i = 0; i != length; ++i) out[i] = scanline[i] - prevline[i];
+ } else {
+ for(i = 0; i != length; ++i) out[i] = scanline[i];
+ }
+ break;
+ case 3: /*Average*/
+ if(prevline) {
+ for(i = 0; i != bytewidth; ++i) out[i] = scanline[i] - (prevline[i] >> 1);
+ for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - ((scanline[i - bytewidth] + prevline[i]) >> 1);
+ } else {
+ for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
+ for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - (scanline[i - bytewidth] >> 1);
+ }
+ break;
+ case 4: /*Paeth*/
+ if(prevline) {
+ /*paethPredictor(0, prevline[i], 0) is always prevline[i]*/
+ for(i = 0; i != bytewidth; ++i) out[i] = (scanline[i] - prevline[i]);
+ for(i = bytewidth; i < length; ++i) {
+ out[i] = (scanline[i] - paethPredictor(scanline[i - bytewidth], prevline[i], prevline[i - bytewidth]));
+ }
+ } else {
+ for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
+ /*paethPredictor(scanline[i - bytewidth], 0, 0) is always scanline[i - bytewidth]*/
+ for(i = bytewidth; i < length; ++i) out[i] = (scanline[i] - scanline[i - bytewidth]);
+ }
+ break;
+ default: return; /*invalid filter type given*/
+ }
+}
+
+/* integer binary logarithm, max return value is 31 */
+static size_t ilog2(size_t i) {
+ size_t result = 0;
+ if(i >= 65536) { result += 16; i >>= 16; }
+ if(i >= 256) { result += 8; i >>= 8; }
+ if(i >= 16) { result += 4; i >>= 4; }
+ if(i >= 4) { result += 2; i >>= 2; }
+ if(i >= 2) { result += 1; /*i >>= 1;*/ }
+ return result;
+}
+
+/* integer approximation for i * log2(i), helper function for LFS_ENTROPY */
+static size_t ilog2i(size_t i) {
+ size_t l;
+ if(i == 0) return 0;
+ l = ilog2(i);
+ /* approximate i*log2(i): l is integer logarithm, ((i - (1u << l)) << 1u)
+ linearly approximates the missing fractional part multiplied by i */
+ return i * l + ((i - (1u << l)) << 1u);
+}
+
+static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h,
+ const LodePNGColorMode* color, const LodePNGEncoderSettings* settings) {
+ /*
+ For PNG filter method 0
+ out must be a buffer with as size: h + (w * h * bpp + 7u) / 8u, because there are
+ the scanlines with 1 extra byte per scanline
+ */
+
+ unsigned bpp = lodepng_get_bpp(color);
+ /*the width of a scanline in bytes, not including the filter type*/
+ size_t linebytes = lodepng_get_raw_size_idat(w, 1, bpp) - 1u;
+
+ /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
+ size_t bytewidth = (bpp + 7u) / 8u;
+ const unsigned char* prevline = 0;
+ unsigned x, y;
+ unsigned error = 0;
+ LodePNGFilterStrategy strategy = settings->filter_strategy;
+
+ /*
+ There is a heuristic called the minimum sum of absolute differences heuristic, suggested by the PNG standard:
+ * If the image type is Palette, or the bit depth is smaller than 8, then do not filter the image (i.e.
+ use fixed filtering, with the filter None).
+ * (The other case) If the image type is Grayscale or RGB (with or without Alpha), and the bit depth is
+ not smaller than 8, then use adaptive filtering heuristic as follows: independently for each row, apply
+ all five filters and select the filter that produces the smallest sum of absolute values per row.
+ This heuristic is used if filter strategy is LFS_MINSUM and filter_palette_zero is true.
+
+ If filter_palette_zero is true and filter_strategy is not LFS_MINSUM, the above heuristic is followed,
+ but for "the other case", whatever strategy filter_strategy is set to instead of the minimum sum
+ heuristic is used.
+ */
+ if(settings->filter_palette_zero &&
+ (color->colortype == LCT_PALETTE || color->bitdepth < 8)) strategy = LFS_ZERO;
+
+ if(bpp == 0) return 31; /*error: invalid color type*/
+
+ if(strategy >= LFS_ZERO && strategy <= LFS_FOUR) {
+ unsigned char type = (unsigned char)strategy;
+ for(y = 0; y != h; ++y) {
+ size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
+ size_t inindex = linebytes * y;
+ out[outindex] = type; /*filter type byte*/
+ filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, type);
+ prevline = &in[inindex];
+ }
+ } else if(strategy == LFS_MINSUM) {
+ /*adaptive filtering*/
+ unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
+ size_t smallest = 0;
+ unsigned char type, bestType = 0;
+
+ for(type = 0; type != 5; ++type) {
+ attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
+ if(!attempt[type]) error = 83; /*alloc fail*/
+ }
+
+ if(!error) {
+ for(y = 0; y != h; ++y) {
+ /*try the 5 filter types*/
+ for(type = 0; type != 5; ++type) {
+ size_t sum = 0;
+ filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
+
+ /*calculate the sum of the result*/
+ if(type == 0) {
+ for(x = 0; x != linebytes; ++x) sum += (unsigned char)(attempt[type][x]);
+ } else {
+ for(x = 0; x != linebytes; ++x) {
+ /*For differences, each byte should be treated as signed, values above 127 are negative
+ (converted to signed char). Filtertype 0 isn't a difference though, so use unsigned there.
+ This means filtertype 0 is almost never chosen, but that is justified.*/
+ unsigned char s = attempt[type][x];
+ sum += s < 128 ? s : (255U - s);
+ }
+ }
+
+ /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
+ if(type == 0 || sum < smallest) {
+ bestType = type;
+ smallest = sum;
+ }
+ }
+
+ prevline = &in[y * linebytes];
+
+ /*now fill the out values*/
+ out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
+ for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
+ }
+ }
+
+ for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
+ } else if(strategy == LFS_ENTROPY) {
+ unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
+ size_t bestSum = 0;
+ unsigned type, bestType = 0;
+ unsigned count[256];
+
+ for(type = 0; type != 5; ++type) {
+ attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
+ if(!attempt[type]) error = 83; /*alloc fail*/
+ }
+
+ if(!error) {
+ for(y = 0; y != h; ++y) {
+ /*try the 5 filter types*/
+ for(type = 0; type != 5; ++type) {
+ size_t sum = 0;
+ filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
+ lodepng_memset(count, 0, 256 * sizeof(*count));
+ for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]];
+ ++count[type]; /*the filter type itself is part of the scanline*/
+ for(x = 0; x != 256; ++x) {
+ sum += ilog2i(count[x]);
+ }
+ /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
+ if(type == 0 || sum > bestSum) {
+ bestType = type;
+ bestSum = sum;
+ }
+ }
+
+ prevline = &in[y * linebytes];
+
+ /*now fill the out values*/
+ out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
+ for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
+ }
+ }
+
+ for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
+ } else if(strategy == LFS_PREDEFINED) {
+ for(y = 0; y != h; ++y) {
+ size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
+ size_t inindex = linebytes * y;
+ unsigned char type = settings->predefined_filters[y];
+ out[outindex] = type; /*filter type byte*/
+ filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, type);
+ prevline = &in[inindex];
+ }
+ } else if(strategy == LFS_BRUTE_FORCE) {
+ /*brute force filter chooser.
+ deflate the scanline after every filter attempt to see which one deflates best.
+ This is very slow and gives only slightly smaller, sometimes even larger, result*/
+ size_t size[5];
+ unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
+ size_t smallest = 0;
+ unsigned type = 0, bestType = 0;
+ unsigned char* dummy;
+ LodePNGCompressSettings zlibsettings;
+ lodepng_memcpy(&zlibsettings, &settings->zlibsettings, sizeof(LodePNGCompressSettings));
+ /*use fixed tree on the attempts so that the tree is not adapted to the filtertype on purpose,
+ to simulate the true case where the tree is the same for the whole image. Sometimes it gives
+ better result with dynamic tree anyway. Using the fixed tree sometimes gives worse, but in rare
+ cases better compression. It does make this a bit less slow, so it's worth doing this.*/
+ zlibsettings.btype = 1;
+ /*a custom encoder likely doesn't read the btype setting and is optimized for complete PNG
+ images only, so disable it*/
+ zlibsettings.custom_zlib = 0;
+ zlibsettings.custom_deflate = 0;
+ for(type = 0; type != 5; ++type) {
+ attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
+ if(!attempt[type]) error = 83; /*alloc fail*/
+ }
+ if(!error) {
+ for(y = 0; y != h; ++y) /*try the 5 filter types*/ {
+ for(type = 0; type != 5; ++type) {
+ unsigned testsize = (unsigned)linebytes;
+ /*if(testsize > 8) testsize /= 8;*/ /*it already works good enough by testing a part of the row*/
+
+ filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
+ size[type] = 0;
+ dummy = 0;
+ zlib_compress(&dummy, &size[type], attempt[type], testsize, &zlibsettings);
+ lodepng_free(dummy);
+ /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/
+ if(type == 0 || size[type] < smallest) {
+ bestType = type;
+ smallest = size[type];
+ }
+ }
+ prevline = &in[y * linebytes];
+ out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
+ for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
+ }
+ }
+ for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
+ }
+ else return 88; /* unknown filter strategy */
+
+ return error;
+}
+
+static void addPaddingBits(unsigned char* out, const unsigned char* in,
+ size_t olinebits, size_t ilinebits, unsigned h) {
+ /*The opposite of the removePaddingBits function
+ olinebits must be >= ilinebits*/
+ unsigned y;
+ size_t diff = olinebits - ilinebits;
+ size_t obp = 0, ibp = 0; /*bit pointers*/
+ for(y = 0; y != h; ++y) {
+ size_t x;
+ for(x = 0; x < ilinebits; ++x) {
+ unsigned char bit = readBitFromReversedStream(&ibp, in);
+ setBitOfReversedStream(&obp, out, bit);
+ }
+ /*obp += diff; --> no, fill in some value in the padding bits too, to avoid
+ "Use of uninitialised value of size ###" warning from valgrind*/
+ for(x = 0; x != diff; ++x) setBitOfReversedStream(&obp, out, 0);
+ }
+}
+
+/*
+in: non-interlaced image with size w*h
+out: the same pixels, but re-ordered according to PNG's Adam7 interlacing, with
+ no padding bits between scanlines, but between reduced images so that each
+ reduced image starts at a byte.
+bpp: bits per pixel
+there are no padding bits, not between scanlines, not between reduced images
+in has the following size in bits: w * h * bpp.
+out is possibly bigger due to padding bits between reduced images
+NOTE: comments about padding bits are only relevant if bpp < 8
+*/
+static void Adam7_interlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) {
+ unsigned passw[7], passh[7];
+ size_t filter_passstart[8], padded_passstart[8], passstart[8];
+ unsigned i;
+
+ Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+ if(bpp >= 8) {
+ for(i = 0; i != 7; ++i) {
+ unsigned x, y, b;
+ size_t bytewidth = bpp / 8u;
+ for(y = 0; y < passh[i]; ++y)
+ for(x = 0; x < passw[i]; ++x) {
+ size_t pixelinstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth;
+ size_t pixeloutstart = passstart[i] + (y * passw[i] + x) * bytewidth;
+ for(b = 0; b < bytewidth; ++b) {
+ out[pixeloutstart + b] = in[pixelinstart + b];
+ }
+ }
+ }
+ } else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/ {
+ for(i = 0; i != 7; ++i) {
+ unsigned x, y, b;
+ unsigned ilinebits = bpp * passw[i];
+ unsigned olinebits = bpp * w;
+ size_t obp, ibp; /*bit pointers (for out and in buffer)*/
+ for(y = 0; y < passh[i]; ++y)
+ for(x = 0; x < passw[i]; ++x) {
+ ibp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp;
+ obp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
+ for(b = 0; b < bpp; ++b) {
+ unsigned char bit = readBitFromReversedStream(&ibp, in);
+ setBitOfReversedStream(&obp, out, bit);
+ }
+ }
+ }
+ }
+}
+
+/*out must be buffer big enough to contain uncompressed IDAT chunk data, and in must contain the full image.
+return value is error**/
+static unsigned preProcessScanlines(unsigned char** out, size_t* outsize, const unsigned char* in,
+ unsigned w, unsigned h,
+ const LodePNGInfo* info_png, const LodePNGEncoderSettings* settings) {
+ /*
+ This function converts the pure 2D image with the PNG's colortype, into filtered-padded-interlaced data. Steps:
+ *) if no Adam7: 1) add padding bits (= possible extra bits per scanline if bpp < 8) 2) filter
+ *) if adam7: 1) Adam7_interlace 2) 7x add padding bits 3) 7x filter
+ */
+ unsigned bpp = lodepng_get_bpp(&info_png->color);
+ unsigned error = 0;
+
+ if(info_png->interlace_method == 0) {
+ *outsize = h + (h * ((w * bpp + 7u) / 8u)); /*image size plus an extra byte per scanline + possible padding bits*/
+ *out = (unsigned char*)lodepng_malloc(*outsize);
+ if(!(*out) && (*outsize)) error = 83; /*alloc fail*/
+
+ if(!error) {
+ /*non multiple of 8 bits per scanline, padding bits needed per scanline*/
+ if(bpp < 8 && w * bpp != ((w * bpp + 7u) / 8u) * 8u) {
+ unsigned char* padded = (unsigned char*)lodepng_malloc(h * ((w * bpp + 7u) / 8u));
+ if(!padded) error = 83; /*alloc fail*/
+ if(!error) {
+ addPaddingBits(padded, in, ((w * bpp + 7u) / 8u) * 8u, w * bpp, h);
+ error = filter(*out, padded, w, h, &info_png->color, settings);
+ }
+ lodepng_free(padded);
+ } else {
+ /*we can immediately filter into the out buffer, no other steps needed*/
+ error = filter(*out, in, w, h, &info_png->color, settings);
+ }
+ }
+ } else /*interlace_method is 1 (Adam7)*/ {
+ unsigned passw[7], passh[7];
+ size_t filter_passstart[8], padded_passstart[8], passstart[8];
+ unsigned char* adam7;
+
+ Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+ *outsize = filter_passstart[7]; /*image size plus an extra byte per scanline + possible padding bits*/
+ *out = (unsigned char*)lodepng_malloc(*outsize);
+ if(!(*out)) error = 83; /*alloc fail*/
+
+ adam7 = (unsigned char*)lodepng_malloc(passstart[7]);
+ if(!adam7 && passstart[7]) error = 83; /*alloc fail*/
+
+ if(!error) {
+ unsigned i;
+
+ Adam7_interlace(adam7, in, w, h, bpp);
+ for(i = 0; i != 7; ++i) {
+ if(bpp < 8) {
+ unsigned char* padded = (unsigned char*)lodepng_malloc(padded_passstart[i + 1] - padded_passstart[i]);
+ if(!padded) ERROR_BREAK(83); /*alloc fail*/
+ addPaddingBits(padded, &adam7[passstart[i]],
+ ((passw[i] * bpp + 7u) / 8u) * 8u, passw[i] * bpp, passh[i]);
+ error = filter(&(*out)[filter_passstart[i]], padded,
+ passw[i], passh[i], &info_png->color, settings);
+ lodepng_free(padded);
+ } else {
+ error = filter(&(*out)[filter_passstart[i]], &adam7[padded_passstart[i]],
+ passw[i], passh[i], &info_png->color, settings);
+ }
+
+ if(error) break;
+ }
+ }
+
+ lodepng_free(adam7);
+ }
+
+ return error;
+}
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+static unsigned addUnknownChunks(ucvector* out, unsigned char* data, size_t datasize) {
+ unsigned char* inchunk = data;
+ while((size_t)(inchunk - data) < datasize) {
+ CERROR_TRY_RETURN(lodepng_chunk_append(&out->data, &out->size, inchunk));
+ out->allocsize = out->size; /*fix the allocsize again*/
+ inchunk = lodepng_chunk_next(inchunk, data + datasize);
+ }
+ return 0;
+}
+
+static unsigned isGrayICCProfile(const unsigned char* profile, unsigned size) {
+ /*
+ It is a gray profile if bytes 16-19 are "GRAY", rgb profile if bytes 16-19
+ are "RGB ". We do not perform any full parsing of the ICC profile here, other
+ than check those 4 bytes to grayscale profile. Other than that, validity of
+ the profile is not checked. This is needed only because the PNG specification
+ requires using a non-gray color model if there is an ICC profile with "RGB "
+ (sadly limiting compression opportunities if the input data is grayscale RGB
+ data), and requires using a gray color model if it is "GRAY".
+ */
+ if(size < 20) return 0;
+ return profile[16] == 'G' && profile[17] == 'R' && profile[18] == 'A' && profile[19] == 'Y';
+}
+
+static unsigned isRGBICCProfile(const unsigned char* profile, unsigned size) {
+ /* See comment in isGrayICCProfile*/
+ if(size < 20) return 0;
+ return profile[16] == 'R' && profile[17] == 'G' && profile[18] == 'B' && profile[19] == ' ';
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+unsigned lodepng_encode(unsigned char** out, size_t* outsize,
+ const unsigned char* image, unsigned w, unsigned h,
+ LodePNGState* state) {
+ unsigned char* data = 0; /*uncompressed version of the IDAT chunk data*/
+ size_t datasize = 0;
+ ucvector outv = ucvector_init(NULL, 0);
+ LodePNGInfo info;
+ const LodePNGInfo* info_png = &state->info_png;
+
+ lodepng_info_init(&info);
+
+ /*provide some proper output values if error will happen*/
+ *out = 0;
+ *outsize = 0;
+ state->error = 0;
+
+ /*check input values validity*/
+ if((info_png->color.colortype == LCT_PALETTE || state->encoder.force_palette)
+ && (info_png->color.palettesize == 0 || info_png->color.palettesize > 256)) {
+ state->error = 68; /*invalid palette size, it is only allowed to be 1-256*/
+ goto cleanup;
+ }
+ if(state->encoder.zlibsettings.btype > 2) {
+ state->error = 61; /*error: invalid btype*/
+ goto cleanup;
+ }
+ if(info_png->interlace_method > 1) {
+ state->error = 71; /*error: invalid interlace mode*/
+ goto cleanup;
+ }
+ state->error = checkColorValidity(info_png->color.colortype, info_png->color.bitdepth);
+ if(state->error) goto cleanup; /*error: invalid color type given*/
+ state->error = checkColorValidity(state->info_raw.colortype, state->info_raw.bitdepth);
+ if(state->error) goto cleanup; /*error: invalid color type given*/
+
+ /* color convert and compute scanline filter types */
+ lodepng_info_copy(&info, &state->info_png);
+ if(state->encoder.auto_convert) {
+ LodePNGColorStats stats;
+ lodepng_color_stats_init(&stats);
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ if(info_png->iccp_defined &&
+ isGrayICCProfile(info_png->iccp_profile, info_png->iccp_profile_size)) {
+ /*the PNG specification does not allow to use palette with a GRAY ICC profile, even
+ if the palette has only gray colors, so disallow it.*/
+ stats.allow_palette = 0;
+ }
+ if(info_png->iccp_defined &&
+ isRGBICCProfile(info_png->iccp_profile, info_png->iccp_profile_size)) {
+ /*the PNG specification does not allow to use grayscale color with RGB ICC profile, so disallow gray.*/
+ stats.allow_greyscale = 0;
+ }
+#endif /* LODEPNG_COMPILE_ANCILLARY_CHUNKS */
+ state->error = lodepng_compute_color_stats(&stats, image, w, h, &state->info_raw);
+ if(state->error) goto cleanup;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ if(info_png->background_defined) {
+ /*the background chunk's color must be taken into account as well*/
+ unsigned r = 0, g = 0, b = 0;
+ LodePNGColorMode mode16 = lodepng_color_mode_make(LCT_RGB, 16);
+ lodepng_convert_rgb(&r, &g, &b, info_png->background_r, info_png->background_g, info_png->background_b, &mode16, &info_png->color);
+ state->error = lodepng_color_stats_add(&stats, r, g, b, 65535);
+ if(state->error) goto cleanup;
+ }
+#endif /* LODEPNG_COMPILE_ANCILLARY_CHUNKS */
+ state->error = auto_choose_color(&info.color, &state->info_raw, &stats);
+ if(state->error) goto cleanup;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ /*also convert the background chunk*/
+ if(info_png->background_defined) {
+ if(lodepng_convert_rgb(&info.background_r, &info.background_g, &info.background_b,
+ info_png->background_r, info_png->background_g, info_png->background_b, &info.color, &info_png->color)) {
+ state->error = 104;
+ goto cleanup;
+ }
+ }
+#endif /* LODEPNG_COMPILE_ANCILLARY_CHUNKS */
+ }
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ if(info_png->iccp_defined) {
+ unsigned gray_icc = isGrayICCProfile(info_png->iccp_profile, info_png->iccp_profile_size);
+ unsigned rgb_icc = isRGBICCProfile(info_png->iccp_profile, info_png->iccp_profile_size);
+ unsigned gray_png = info.color.colortype == LCT_GREY || info.color.colortype == LCT_GREY_ALPHA;
+ if(!gray_icc && !rgb_icc) {
+ state->error = 100; /* Disallowed profile color type for PNG */
+ goto cleanup;
+ }
+ if(gray_icc != gray_png) {
+ /*Not allowed to use RGB/RGBA/palette with GRAY ICC profile or vice versa,
+ or in case of auto_convert, it wasn't possible to find appropriate model*/
+ state->error = state->encoder.auto_convert ? 102 : 101;
+ goto cleanup;
+ }
+ }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+ if(!lodepng_color_mode_equal(&state->info_raw, &info.color)) {
+ unsigned char* converted;
+ size_t size = ((size_t)w * (size_t)h * (size_t)lodepng_get_bpp(&info.color) + 7u) / 8u;
+
+ converted = (unsigned char*)lodepng_malloc(size);
+ if(!converted && size) state->error = 83; /*alloc fail*/
+ if(!state->error) {
+ state->error = lodepng_convert(converted, image, &info.color, &state->info_raw, w, h);
+ }
+ if(!state->error) {
+ state->error = preProcessScanlines(&data, &datasize, converted, w, h, &info, &state->encoder);
+ }
+ lodepng_free(converted);
+ if(state->error) goto cleanup;
+ } else {
+ state->error = preProcessScanlines(&data, &datasize, image, w, h, &info, &state->encoder);
+ if(state->error) goto cleanup;
+ }
+
+ /* output all PNG chunks */ {
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ size_t i;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+ /*write signature and chunks*/
+ state->error = writeSignature(&outv);
+ if(state->error) goto cleanup;
+ /*IHDR*/
+ state->error = addChunk_IHDR(&outv, w, h, info.color.colortype, info.color.bitdepth, info.interlace_method);
+ if(state->error) goto cleanup;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ /*unknown chunks between IHDR and PLTE*/
+ if(info.unknown_chunks_data[0]) {
+ state->error = addUnknownChunks(&outv, info.unknown_chunks_data[0], info.unknown_chunks_size[0]);
+ if(state->error) goto cleanup;
+ }
+ /*color profile chunks must come before PLTE */
+ if(info.iccp_defined) {
+ state->error = addChunk_iCCP(&outv, &info, &state->encoder.zlibsettings);
+ if(state->error) goto cleanup;
+ }
+ if(info.srgb_defined) {
+ state->error = addChunk_sRGB(&outv, &info);
+ if(state->error) goto cleanup;
+ }
+ if(info.gama_defined) {
+ state->error = addChunk_gAMA(&outv, &info);
+ if(state->error) goto cleanup;
+ }
+ if(info.chrm_defined) {
+ state->error = addChunk_cHRM(&outv, &info);
+ if(state->error) goto cleanup;
+ }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+ /*PLTE*/
+ if(info.color.colortype == LCT_PALETTE) {
+ state->error = addChunk_PLTE(&outv, &info.color);
+ if(state->error) goto cleanup;
+ }
+ if(state->encoder.force_palette && (info.color.colortype == LCT_RGB || info.color.colortype == LCT_RGBA)) {
+ /*force_palette means: write suggested palette for truecolor in PLTE chunk*/
+ state->error = addChunk_PLTE(&outv, &info.color);
+ if(state->error) goto cleanup;
+ }
+ /*tRNS (this will only add if when necessary) */
+ state->error = addChunk_tRNS(&outv, &info.color);
+ if(state->error) goto cleanup;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ /*bKGD (must come between PLTE and the IDAt chunks*/
+ if(info.background_defined) {
+ state->error = addChunk_bKGD(&outv, &info);
+ if(state->error) goto cleanup;
+ }
+ /*pHYs (must come before the IDAT chunks)*/
+ if(info.phys_defined) {
+ state->error = addChunk_pHYs(&outv, &info);
+ if(state->error) goto cleanup;
+ }
+
+ /*unknown chunks between PLTE and IDAT*/
+ if(info.unknown_chunks_data[1]) {
+ state->error = addUnknownChunks(&outv, info.unknown_chunks_data[1], info.unknown_chunks_size[1]);
+ if(state->error) goto cleanup;
+ }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+ /*IDAT (multiple IDAT chunks must be consecutive)*/
+ state->error = addChunk_IDAT(&outv, data, datasize, &state->encoder.zlibsettings);
+ if(state->error) goto cleanup;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ /*tIME*/
+ if(info.time_defined) {
+ state->error = addChunk_tIME(&outv, &info.time);
+ if(state->error) goto cleanup;
+ }
+ /*tEXt and/or zTXt*/
+ for(i = 0; i != info.text_num; ++i) {
+ if(lodepng_strlen(info.text_keys[i]) > 79) {
+ state->error = 66; /*text chunk too large*/
+ goto cleanup;
+ }
+ if(lodepng_strlen(info.text_keys[i]) < 1) {
+ state->error = 67; /*text chunk too small*/
+ goto cleanup;
+ }
+ if(state->encoder.text_compression) {
+ state->error = addChunk_zTXt(&outv, info.text_keys[i], info.text_strings[i], &state->encoder.zlibsettings);
+ if(state->error) goto cleanup;
+ } else {
+ state->error = addChunk_tEXt(&outv, info.text_keys[i], info.text_strings[i]);
+ if(state->error) goto cleanup;
+ }
+ }
+ /*LodePNG version id in text chunk*/
+ if(state->encoder.add_id) {
+ unsigned already_added_id_text = 0;
+ for(i = 0; i != info.text_num; ++i) {
+ const char* k = info.text_keys[i];
+ /* Could use strcmp, but we're not calling or reimplementing this C library function for this use only */
+ if(k[0] == 'L' && k[1] == 'o' && k[2] == 'd' && k[3] == 'e' &&
+ k[4] == 'P' && k[5] == 'N' && k[6] == 'G' && k[7] == '\0') {
+ already_added_id_text = 1;
+ break;
+ }
+ }
+ if(already_added_id_text == 0) {
+ state->error = addChunk_tEXt(&outv, "LodePNG", LODEPNG_VERSION_STRING); /*it's shorter as tEXt than as zTXt chunk*/
+ if(state->error) goto cleanup;
+ }
+ }
+ /*iTXt*/
+ for(i = 0; i != info.itext_num; ++i) {
+ if(lodepng_strlen(info.itext_keys[i]) > 79) {
+ state->error = 66; /*text chunk too large*/
+ goto cleanup;
+ }
+ if(lodepng_strlen(info.itext_keys[i]) < 1) {
+ state->error = 67; /*text chunk too small*/
+ goto cleanup;
+ }
+ state->error = addChunk_iTXt(
+ &outv, state->encoder.text_compression,
+ info.itext_keys[i], info.itext_langtags[i], info.itext_transkeys[i], info.itext_strings[i],
+ &state->encoder.zlibsettings);
+ if(state->error) goto cleanup;
+ }
+
+ /*unknown chunks between IDAT and IEND*/
+ if(info.unknown_chunks_data[2]) {
+ state->error = addUnknownChunks(&outv, info.unknown_chunks_data[2], info.unknown_chunks_size[2]);
+ if(state->error) goto cleanup;
+ }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+ state->error = addChunk_IEND(&outv);
+ if(state->error) goto cleanup;
+ }
+
+cleanup:
+ lodepng_info_cleanup(&info);
+ lodepng_free(data);
+
+ /*instead of cleaning the vector up, give it to the output*/
+ *out = outv.data;
+ *outsize = outv.size;
+
+ return state->error;
+}
+
+unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize, const unsigned char* image,
+ unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth) {
+ unsigned error;
+ LodePNGState state;
+ lodepng_state_init(&state);
+ state.info_raw.colortype = colortype;
+ state.info_raw.bitdepth = bitdepth;
+ state.info_png.color.colortype = colortype;
+ state.info_png.color.bitdepth = bitdepth;
+ lodepng_encode(out, outsize, image, w, h, &state);
+ error = state.error;
+ lodepng_state_cleanup(&state);
+ return error;
+}
+
+unsigned lodepng_encode32(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h) {
+ return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGBA, 8);
+}
+
+unsigned lodepng_encode24(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h) {
+ return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGB, 8);
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned lodepng_encode_file(const char* filename, const unsigned char* image, unsigned w, unsigned h,
+ LodePNGColorType colortype, unsigned bitdepth) {
+ unsigned char* buffer;
+ size_t buffersize;
+ unsigned error = lodepng_encode_memory(&buffer, &buffersize, image, w, h, colortype, bitdepth);
+ if(!error) error = lodepng_save_file(buffer, buffersize, filename);
+ lodepng_free(buffer);
+ return error;
+}
+
+unsigned lodepng_encode32_file(const char* filename, const unsigned char* image, unsigned w, unsigned h) {
+ return lodepng_encode_file(filename, image, w, h, LCT_RGBA, 8);
+}
+
+unsigned lodepng_encode24_file(const char* filename, const unsigned char* image, unsigned w, unsigned h) {
+ return lodepng_encode_file(filename, image, w, h, LCT_RGB, 8);
+}
+#endif /*LODEPNG_COMPILE_DISK*/
+
+void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings) {
+ lodepng_compress_settings_init(&settings->zlibsettings);
+ settings->filter_palette_zero = 1;
+ settings->filter_strategy = LFS_MINSUM;
+ settings->auto_convert = 1;
+ settings->force_palette = 0;
+ settings->predefined_filters = 0;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ settings->add_id = 0;
+ settings->text_compression = 1;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+}
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ERROR_TEXT
+/*
+This returns the description of a numerical error code in English. This is also
+the documentation of all the error codes.
+*/
+const char* lodepng_error_text(unsigned code) {
+ switch(code) {
+ case 0: return "no error, everything went ok";
+ case 1: return "nothing done yet"; /*the Encoder/Decoder has done nothing yet, error checking makes no sense yet*/
+ case 10: return "end of input memory reached without huffman end code"; /*while huffman decoding*/
+ case 11: return "error in code tree made it jump outside of huffman tree"; /*while huffman decoding*/
+ case 13: return "problem while processing dynamic deflate block";
+ case 14: return "problem while processing dynamic deflate block";
+ case 15: return "problem while processing dynamic deflate block";
+ /*this error could happen if there are only 0 or 1 symbols present in the huffman code:*/
+ case 16: return "invalid code while processing dynamic deflate block";
+ case 17: return "end of out buffer memory reached while inflating";
+ case 18: return "invalid distance code while inflating";
+ case 19: return "end of out buffer memory reached while inflating";
+ case 20: return "invalid deflate block BTYPE encountered while decoding";
+ case 21: return "NLEN is not ones complement of LEN in a deflate block";
+
+ /*end of out buffer memory reached while inflating:
+ This can happen if the inflated deflate data is longer than the amount of bytes required to fill up
+ all the pixels of the image, given the color depth and image dimensions. Something that doesn't
+ happen in a normal, well encoded, PNG image.*/
+ case 22: return "end of out buffer memory reached while inflating";
+ case 23: return "end of in buffer memory reached while inflating";
+ case 24: return "invalid FCHECK in zlib header";
+ case 25: return "invalid compression method in zlib header";
+ case 26: return "FDICT encountered in zlib header while it's not used for PNG";
+ case 27: return "PNG file is smaller than a PNG header";
+ /*Checks the magic file header, the first 8 bytes of the PNG file*/
+ case 28: return "incorrect PNG signature, it's no PNG or corrupted";
+ case 29: return "first chunk is not the header chunk";
+ case 30: return "chunk length too large, chunk broken off at end of file";
+ case 31: return "illegal PNG color type or bpp";
+ case 32: return "illegal PNG compression method";
+ case 33: return "illegal PNG filter method";
+ case 34: return "illegal PNG interlace method";
+ case 35: return "chunk length of a chunk is too large or the chunk too small";
+ case 36: return "illegal PNG filter type encountered";
+ case 37: return "illegal bit depth for this color type given";
+ case 38: return "the palette is too small or too big"; /*0, or more than 256 colors*/
+ case 39: return "tRNS chunk before PLTE or has more entries than palette size";
+ case 40: return "tRNS chunk has wrong size for grayscale image";
+ case 41: return "tRNS chunk has wrong size for RGB image";
+ case 42: return "tRNS chunk appeared while it was not allowed for this color type";
+ case 43: return "bKGD chunk has wrong size for palette image";
+ case 44: return "bKGD chunk has wrong size for grayscale image";
+ case 45: return "bKGD chunk has wrong size for RGB image";
+ case 48: return "empty input buffer given to decoder. Maybe caused by non-existing file?";
+ case 49: return "jumped past memory while generating dynamic huffman tree";
+ case 50: return "jumped past memory while generating dynamic huffman tree";
+ case 51: return "jumped past memory while inflating huffman block";
+ case 52: return "jumped past memory while inflating";
+ case 53: return "size of zlib data too small";
+ case 54: return "repeat symbol in tree while there was no value symbol yet";
+ /*jumped past tree while generating huffman tree, this could be when the
+ tree will have more leaves than symbols after generating it out of the
+ given lengths. They call this an oversubscribed dynamic bit lengths tree in zlib.*/
+ case 55: return "jumped past tree while generating huffman tree";
+ case 56: return "given output image colortype or bitdepth not supported for color conversion";
+ case 57: return "invalid CRC encountered (checking CRC can be disabled)";
+ case 58: return "invalid ADLER32 encountered (checking ADLER32 can be disabled)";
+ case 59: return "requested color conversion not supported";
+ case 60: return "invalid window size given in the settings of the encoder (must be 0-32768)";
+ case 61: return "invalid BTYPE given in the settings of the encoder (only 0, 1 and 2 are allowed)";
+ /*LodePNG leaves the choice of RGB to grayscale conversion formula to the user.*/
+ case 62: return "conversion from color to grayscale not supported";
+ /*(2^31-1)*/
+ case 63: return "length of a chunk too long, max allowed for PNG is 2147483647 bytes per chunk";
+ /*this would result in the inability of a deflated block to ever contain an end code. It must be at least 1.*/
+ case 64: return "the length of the END symbol 256 in the Huffman tree is 0";
+ case 66: return "the length of a text chunk keyword given to the encoder is longer than the maximum of 79 bytes";
+ case 67: return "the length of a text chunk keyword given to the encoder is smaller than the minimum of 1 byte";
+ case 68: return "tried to encode a PLTE chunk with a palette that has less than 1 or more than 256 colors";
+ case 69: return "unknown chunk type with 'critical' flag encountered by the decoder";
+ case 71: return "invalid interlace mode given to encoder (must be 0 or 1)";
+ case 72: return "while decoding, invalid compression method encountering in zTXt or iTXt chunk (it must be 0)";
+ case 73: return "invalid tIME chunk size";
+ case 74: return "invalid pHYs chunk size";
+ /*length could be wrong, or data chopped off*/
+ case 75: return "no null termination char found while decoding text chunk";
+ case 76: return "iTXt chunk too short to contain required bytes";
+ case 77: return "integer overflow in buffer size";
+ case 78: return "failed to open file for reading"; /*file doesn't exist or couldn't be opened for reading*/
+ case 79: return "failed to open file for writing";
+ case 80: return "tried creating a tree of 0 symbols";
+ case 81: return "lazy matching at pos 0 is impossible";
+ case 82: return "color conversion to palette requested while a color isn't in palette, or index out of bounds";
+ case 83: return "memory allocation failed";
+ case 84: return "given image too small to contain all pixels to be encoded";
+ case 86: return "impossible offset in lz77 encoding (internal bug)";
+ case 87: return "must provide custom zlib function pointer if LODEPNG_COMPILE_ZLIB is not defined";
+ case 88: return "invalid filter strategy given for LodePNGEncoderSettings.filter_strategy";
+ case 89: return "text chunk keyword too short or long: must have size 1-79";
+ /*the windowsize in the LodePNGCompressSettings. Requiring POT(==> & instead of %) makes encoding 12% faster.*/
+ case 90: return "windowsize must be a power of two";
+ case 91: return "invalid decompressed idat size";
+ case 92: return "integer overflow due to too many pixels";
+ case 93: return "zero width or height is invalid";
+ case 94: return "header chunk must have a size of 13 bytes";
+ case 95: return "integer overflow with combined idat chunk size";
+ case 96: return "invalid gAMA chunk size";
+ case 97: return "invalid cHRM chunk size";
+ case 98: return "invalid sRGB chunk size";
+ case 99: return "invalid sRGB rendering intent";
+ case 100: return "invalid ICC profile color type, the PNG specification only allows RGB or GRAY";
+ case 101: return "PNG specification does not allow RGB ICC profile on gray color types and vice versa";
+ case 102: return "not allowed to set grayscale ICC profile with colored pixels by PNG specification";
+ case 103: return "invalid palette index in bKGD chunk. Maybe it came before PLTE chunk?";
+ case 104: return "invalid bKGD color while encoding (e.g. palette index out of range)";
+ case 105: return "integer overflow of bitsize";
+ case 106: return "PNG file must have PLTE chunk if color type is palette";
+ case 107: return "color convert from palette mode requested without setting the palette data in it";
+ case 108: return "tried to add more than 256 values to a palette";
+ }
+ return "unknown error code";
+}
+#endif /*LODEPNG_COMPILE_ERROR_TEXT*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // C++ Wrapper // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_CPP
+namespace lodepng {
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename) {
+ long size = lodepng_filesize(filename.c_str());
+ if(size < 0) return 78;
+ buffer.resize((size_t)size);
+ return size == 0 ? 0 : lodepng_buffer_file(&buffer[0], (size_t)size, filename.c_str());
+}
+
+/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
+unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename) {
+ return lodepng_save_file(buffer.empty() ? 0 : &buffer[0], buffer.size(), filename.c_str());
+}
+#endif /* LODEPNG_COMPILE_DISK */
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_DECODER
+unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+ const LodePNGDecompressSettings& settings) {
+ unsigned char* buffer = 0;
+ size_t buffersize = 0;
+ unsigned error = zlib_decompress(&buffer, &buffersize, 0, in, insize, &settings);
+ if(buffer) {
+ out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+ lodepng_free(buffer);
+ }
+ return error;
+}
+
+unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+ const LodePNGDecompressSettings& settings) {
+ return decompress(out, in.empty() ? 0 : &in[0], in.size(), settings);
+}
+#endif /* LODEPNG_COMPILE_DECODER */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+ const LodePNGCompressSettings& settings) {
+ unsigned char* buffer = 0;
+ size_t buffersize = 0;
+ unsigned error = zlib_compress(&buffer, &buffersize, in, insize, &settings);
+ if(buffer) {
+ out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+ lodepng_free(buffer);
+ }
+ return error;
+}
+
+unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+ const LodePNGCompressSettings& settings) {
+ return compress(out, in.empty() ? 0 : &in[0], in.size(), settings);
+}
+#endif /* LODEPNG_COMPILE_ENCODER */
+#endif /* LODEPNG_COMPILE_ZLIB */
+
+
+#ifdef LODEPNG_COMPILE_PNG
+
+State::State() {
+ lodepng_state_init(this);
+}
+
+State::State(const State& other) {
+ lodepng_state_init(this);
+ lodepng_state_copy(this, &other);
+}
+
+State::~State() {
+ lodepng_state_cleanup(this);
+}
+
+State& State::operator=(const State& other) {
+ lodepng_state_copy(this, &other);
+ return *this;
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const unsigned char* in,
+ size_t insize, LodePNGColorType colortype, unsigned bitdepth) {
+ unsigned char* buffer = 0;
+ unsigned error = lodepng_decode_memory(&buffer, &w, &h, in, insize, colortype, bitdepth);
+ if(buffer && !error) {
+ State state;
+ state.info_raw.colortype = colortype;
+ state.info_raw.bitdepth = bitdepth;
+ size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
+ out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+ }
+ lodepng_free(buffer);
+ return error;
+}
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+ const std::vector<unsigned char>& in, LodePNGColorType colortype, unsigned bitdepth) {
+ return decode(out, w, h, in.empty() ? 0 : &in[0], (unsigned)in.size(), colortype, bitdepth);
+}
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+ State& state,
+ const unsigned char* in, size_t insize) {
+ unsigned char* buffer = NULL;
+ unsigned error = lodepng_decode(&buffer, &w, &h, &state, in, insize);
+ if(buffer && !error) {
+ size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
+ out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+ }
+ lodepng_free(buffer);
+ return error;
+}
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+ State& state,
+ const std::vector<unsigned char>& in) {
+ return decode(out, w, h, state, in.empty() ? 0 : &in[0], in.size());
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const std::string& filename,
+ LodePNGColorType colortype, unsigned bitdepth) {
+ std::vector<unsigned char> buffer;
+ /* safe output values in case error happens */
+ w = h = 0;
+ unsigned error = load_file(buffer, filename);
+ if(error) return error;
+ return decode(out, w, h, buffer, colortype, bitdepth);
+}
+#endif /* LODEPNG_COMPILE_DECODER */
+#endif /* LODEPNG_COMPILE_DISK */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+unsigned encode(std::vector<unsigned char>& out, const unsigned char* in, unsigned w, unsigned h,
+ LodePNGColorType colortype, unsigned bitdepth) {
+ unsigned char* buffer;
+ size_t buffersize;
+ unsigned error = lodepng_encode_memory(&buffer, &buffersize, in, w, h, colortype, bitdepth);
+ if(buffer) {
+ out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+ lodepng_free(buffer);
+ }
+ return error;
+}
+
+unsigned encode(std::vector<unsigned char>& out,
+ const std::vector<unsigned char>& in, unsigned w, unsigned h,
+ LodePNGColorType colortype, unsigned bitdepth) {
+ if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
+ return encode(out, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
+}
+
+unsigned encode(std::vector<unsigned char>& out,
+ const unsigned char* in, unsigned w, unsigned h,
+ State& state) {
+ unsigned char* buffer;
+ size_t buffersize;
+ unsigned error = lodepng_encode(&buffer, &buffersize, in, w, h, &state);
+ if(buffer) {
+ out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+ lodepng_free(buffer);
+ }
+ return error;
+}
+
+unsigned encode(std::vector<unsigned char>& out,
+ const std::vector<unsigned char>& in, unsigned w, unsigned h,
+ State& state) {
+ if(lodepng_get_raw_size(w, h, &state.info_raw) > in.size()) return 84;
+ return encode(out, in.empty() ? 0 : &in[0], w, h, state);
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned encode(const std::string& filename,
+ const unsigned char* in, unsigned w, unsigned h,
+ LodePNGColorType colortype, unsigned bitdepth) {
+ std::vector<unsigned char> buffer;
+ unsigned error = encode(buffer, in, w, h, colortype, bitdepth);
+ if(!error) error = save_file(buffer, filename);
+ return error;
+}
+
+unsigned encode(const std::string& filename,
+ const std::vector<unsigned char>& in, unsigned w, unsigned h,
+ LodePNGColorType colortype, unsigned bitdepth) {
+ if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
+ return encode(filename, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
+}
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_ENCODER */
+#endif /* LODEPNG_COMPILE_PNG */
+} /* namespace lodepng */
+#endif /*LODEPNG_COMPILE_CPP*/
diff --git a/lodepng.h b/lodepng.h
new file mode 100644
index 0000000..a386459
--- /dev/null
+++ b/lodepng.h
@@ -0,0 +1,1945 @@
+/*
+LodePNG version 20200306
+
+Copyright (c) 2005-2020 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+
+ 3. This notice may not be removed or altered from any source
+ distribution.
+*/
+
+#ifndef LODEPNG_H
+#define LODEPNG_H
+
+#include <string.h> /*for size_t*/
+
+extern const char* LODEPNG_VERSION_STRING;
+
+/*
+The following #defines are used to create code sections. They can be disabled
+to disable code sections, which can give faster compile time and smaller binary.
+The "NO_COMPILE" defines are designed to be used to pass as defines to the
+compiler command to disable them without modifying this header, e.g.
+-DLODEPNG_NO_COMPILE_ZLIB for gcc.
+In addition to those below, you can also define LODEPNG_NO_COMPILE_CRC to
+allow implementing a custom lodepng_crc32.
+*/
+/*deflate & zlib. If disabled, you must specify alternative zlib functions in
+the custom_zlib field of the compress and decompress settings*/
+#ifndef LODEPNG_NO_COMPILE_ZLIB
+#define LODEPNG_COMPILE_ZLIB
+#endif
+
+/*png encoder and png decoder*/
+#ifndef LODEPNG_NO_COMPILE_PNG
+#define LODEPNG_COMPILE_PNG
+#endif
+
+/*deflate&zlib decoder and png decoder*/
+#ifndef LODEPNG_NO_COMPILE_DECODER
+#define LODEPNG_COMPILE_DECODER
+#endif
+
+/*deflate&zlib encoder and png encoder*/
+#ifndef LODEPNG_NO_COMPILE_ENCODER
+#define LODEPNG_COMPILE_ENCODER
+#endif
+
+/*the optional built in harddisk file loading and saving functions*/
+#ifndef LODEPNG_NO_COMPILE_DISK
+#define LODEPNG_COMPILE_DISK
+#endif
+
+/*support for chunks other than IHDR, IDAT, PLTE, tRNS, IEND: ancillary and unknown chunks*/
+#ifndef LODEPNG_NO_COMPILE_ANCILLARY_CHUNKS
+#define LODEPNG_COMPILE_ANCILLARY_CHUNKS
+#endif
+
+/*ability to convert error numerical codes to English text string*/
+#ifndef LODEPNG_NO_COMPILE_ERROR_TEXT
+#define LODEPNG_COMPILE_ERROR_TEXT
+#endif
+
+/*Compile the default allocators (C's free, malloc and realloc). If you disable this,
+you can define the functions lodepng_free, lodepng_malloc and lodepng_realloc in your
+source files with custom allocators.*/
+#ifndef LODEPNG_NO_COMPILE_ALLOCATORS
+#define LODEPNG_COMPILE_ALLOCATORS
+#endif
+
+/*compile the C++ version (you can disable the C++ wrapper here even when compiling for C++)*/
+#ifdef __cplusplus
+#ifndef LODEPNG_NO_COMPILE_CPP
+#define LODEPNG_COMPILE_CPP
+#endif
+#endif
+
+#ifdef LODEPNG_COMPILE_CPP
+#include <vector>
+#include <string>
+#endif /*LODEPNG_COMPILE_CPP*/
+
+#ifdef LODEPNG_COMPILE_PNG
+/*The PNG color types (also used for raw image).*/
+typedef enum LodePNGColorType {
+ LCT_GREY = 0, /*grayscale: 1,2,4,8,16 bit*/
+ LCT_RGB = 2, /*RGB: 8,16 bit*/
+ LCT_PALETTE = 3, /*palette: 1,2,4,8 bit*/
+ LCT_GREY_ALPHA = 4, /*grayscale with alpha: 8,16 bit*/
+ LCT_RGBA = 6, /*RGB with alpha: 8,16 bit*/
+ /*LCT_MAX_OCTET_VALUE lets the compiler allow this enum to represent any invalid
+ byte value from 0 to 255 that could be present in an invalid PNG file header. Do
+ not use, compare with or set the name LCT_MAX_OCTET_VALUE, instead either use
+ the valid color type names above, or numeric values like 1 or 7 when checking for
+ particular disallowed color type byte values, or cast to integer to print it.*/
+ LCT_MAX_OCTET_VALUE = 255
+} LodePNGColorType;
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*
+Converts PNG data in memory to raw pixel data.
+out: Output parameter. Pointer to buffer that will contain the raw pixel data.
+ After decoding, its size is w * h * (bytes per pixel) bytes larger than
+ initially. Bytes per pixel depends on colortype and bitdepth.
+ Must be freed after usage with free(*out).
+ Note: for 16-bit per channel colors, uses big endian format like PNG does.
+w: Output parameter. Pointer to width of pixel data.
+h: Output parameter. Pointer to height of pixel data.
+in: Memory buffer with the PNG file.
+insize: size of the in buffer.
+colortype: the desired color type for the raw output image. See explanation on PNG color types.
+bitdepth: the desired bit depth for the raw output image. See explanation on PNG color types.
+Return value: LodePNG error code (0 means no error).
+*/
+unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h,
+ const unsigned char* in, size_t insize,
+ LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_decode_memory, but always decodes to 32-bit RGBA raw image*/
+unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h,
+ const unsigned char* in, size_t insize);
+
+/*Same as lodepng_decode_memory, but always decodes to 24-bit RGB raw image*/
+unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h,
+ const unsigned char* in, size_t insize);
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Load PNG from disk, from file with given name.
+Same as the other decode functions, but instead takes a filename as input.
+*/
+unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h,
+ const char* filename,
+ LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_decode_file, but always decodes to 32-bit RGBA raw image.*/
+unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h,
+ const char* filename);
+
+/*Same as lodepng_decode_file, but always decodes to 24-bit RGB raw image.*/
+unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h,
+ const char* filename);
+#endif /*LODEPNG_COMPILE_DISK*/
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*
+Converts raw pixel data into a PNG image in memory. The colortype and bitdepth
+ of the output PNG image cannot be chosen, they are automatically determined
+ by the colortype, bitdepth and content of the input pixel data.
+ Note: for 16-bit per channel colors, needs big endian format like PNG does.
+out: Output parameter. Pointer to buffer that will contain the PNG image data.
+ Must be freed after usage with free(*out).
+outsize: Output parameter. Pointer to the size in bytes of the out buffer.
+image: The raw pixel data to encode. The size of this buffer should be
+ w * h * (bytes per pixel), bytes per pixel depends on colortype and bitdepth.
+w: width of the raw pixel data in pixels.
+h: height of the raw pixel data in pixels.
+colortype: the color type of the raw input image. See explanation on PNG color types.
+bitdepth: the bit depth of the raw input image. See explanation on PNG color types.
+Return value: LodePNG error code (0 means no error).
+*/
+unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize,
+ const unsigned char* image, unsigned w, unsigned h,
+ LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_encode_memory, but always encodes from 32-bit RGBA raw image.*/
+unsigned lodepng_encode32(unsigned char** out, size_t* outsize,
+ const unsigned char* image, unsigned w, unsigned h);
+
+/*Same as lodepng_encode_memory, but always encodes from 24-bit RGB raw image.*/
+unsigned lodepng_encode24(unsigned char** out, size_t* outsize,
+ const unsigned char* image, unsigned w, unsigned h);
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Converts raw pixel data into a PNG file on disk.
+Same as the other encode functions, but instead takes a filename as output.
+NOTE: This overwrites existing files without warning!
+*/
+unsigned lodepng_encode_file(const char* filename,
+ const unsigned char* image, unsigned w, unsigned h,
+ LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_encode_file, but always encodes from 32-bit RGBA raw image.*/
+unsigned lodepng_encode32_file(const char* filename,
+ const unsigned char* image, unsigned w, unsigned h);
+
+/*Same as lodepng_encode_file, but always encodes from 24-bit RGB raw image.*/
+unsigned lodepng_encode24_file(const char* filename,
+ const unsigned char* image, unsigned w, unsigned h);
+#endif /*LODEPNG_COMPILE_DISK*/
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+
+#ifdef LODEPNG_COMPILE_CPP
+namespace lodepng {
+#ifdef LODEPNG_COMPILE_DECODER
+/*Same as lodepng_decode_memory, but decodes to an std::vector. The colortype
+is the format to output the pixels to. Default is RGBA 8-bit per channel.*/
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+ const unsigned char* in, size_t insize,
+ LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+ const std::vector<unsigned char>& in,
+ LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Converts PNG file from disk to raw pixel data in memory.
+Same as the other decode functions, but instead takes a filename as input.
+*/
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+ const std::string& filename,
+ LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_DECODER */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*Same as lodepng_encode_memory, but encodes to an std::vector. colortype
+is that of the raw input data. The output PNG color type will be auto chosen.*/
+unsigned encode(std::vector<unsigned char>& out,
+ const unsigned char* in, unsigned w, unsigned h,
+ LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+unsigned encode(std::vector<unsigned char>& out,
+ const std::vector<unsigned char>& in, unsigned w, unsigned h,
+ LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Converts 32-bit RGBA raw pixel data into a PNG file on disk.
+Same as the other encode functions, but instead takes a filename as output.
+NOTE: This overwrites existing files without warning!
+*/
+unsigned encode(const std::string& filename,
+ const unsigned char* in, unsigned w, unsigned h,
+ LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+unsigned encode(const std::string& filename,
+ const std::vector<unsigned char>& in, unsigned w, unsigned h,
+ LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_ENCODER */
+} /* namespace lodepng */
+#endif /*LODEPNG_COMPILE_CPP*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ERROR_TEXT
+/*Returns an English description of the numerical error code.*/
+const char* lodepng_error_text(unsigned code);
+#endif /*LODEPNG_COMPILE_ERROR_TEXT*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*Settings for zlib decompression*/
+typedef struct LodePNGDecompressSettings LodePNGDecompressSettings;
+struct LodePNGDecompressSettings {
+ /* Check LodePNGDecoderSettings for more ignorable errors such as ignore_crc */
+ unsigned ignore_adler32; /*if 1, continue and don't give an error message if the Adler32 checksum is corrupted*/
+ unsigned ignore_nlen; /*ignore complement of len checksum in uncompressed blocks*/
+
+ /*use custom zlib decoder instead of built in one (default: null)*/
+ unsigned (*custom_zlib)(unsigned char**, size_t*,
+ const unsigned char*, size_t,
+ const LodePNGDecompressSettings*);
+ /*use custom deflate decoder instead of built in one (default: null)
+ if custom_zlib is not null, custom_inflate is ignored (the zlib format uses deflate)*/
+ unsigned (*custom_inflate)(unsigned char**, size_t*,
+ const unsigned char*, size_t,
+ const LodePNGDecompressSettings*);
+
+ const void* custom_context; /*optional custom settings for custom functions*/
+};
+
+extern const LodePNGDecompressSettings lodepng_default_decompress_settings;
+void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*
+Settings for zlib compression. Tweaking these settings tweaks the balance
+between speed and compression ratio.
+*/
+typedef struct LodePNGCompressSettings LodePNGCompressSettings;
+struct LodePNGCompressSettings /*deflate = compress*/ {
+ /*LZ77 related settings*/
+ unsigned btype; /*the block type for LZ (0, 1, 2 or 3, see zlib standard). Should be 2 for proper compression.*/
+ unsigned use_lz77; /*whether or not to use LZ77. Should be 1 for proper compression.*/
+ unsigned windowsize; /*must be a power of two <= 32768. higher compresses more but is slower. Default value: 2048.*/
+ unsigned minmatch; /*minimum lz77 length. 3 is normally best, 6 can be better for some PNGs. Default: 0*/
+ unsigned nicematch; /*stop searching if >= this length found. Set to 258 for best compression. Default: 128*/
+ unsigned lazymatching; /*use lazy matching: better compression but a bit slower. Default: true*/
+
+ /*use custom zlib encoder instead of built in one (default: null)*/
+ unsigned (*custom_zlib)(unsigned char**, size_t*,
+ const unsigned char*, size_t,
+ const LodePNGCompressSettings*);
+ /*use custom deflate encoder instead of built in one (default: null)
+ if custom_zlib is used, custom_deflate is ignored since only the built in
+ zlib function will call custom_deflate*/
+ unsigned (*custom_deflate)(unsigned char**, size_t*,
+ const unsigned char*, size_t,
+ const LodePNGCompressSettings*);
+
+ const void* custom_context; /*optional custom settings for custom functions*/
+};
+
+extern const LodePNGCompressSettings lodepng_default_compress_settings;
+void lodepng_compress_settings_init(LodePNGCompressSettings* settings);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_PNG
+/*
+Color mode of an image. Contains all information required to decode the pixel
+bits to RGBA colors. This information is the same as used in the PNG file
+format, and is used both for PNG and raw image data in LodePNG.
+*/
+typedef struct LodePNGColorMode {
+ /*header (IHDR)*/
+ LodePNGColorType colortype; /*color type, see PNG standard or documentation further in this header file*/
+ unsigned bitdepth; /*bits per sample, see PNG standard or documentation further in this header file*/
+
+ /*
+ palette (PLTE and tRNS)
+
+ Dynamically allocated with the colors of the palette, including alpha.
+ This field may not be allocated directly, use lodepng_color_mode_init first,
+ then lodepng_palette_add per color to correctly initialize it (to ensure size
+ of exactly 1024 bytes).
+
+ The alpha channels must be set as well, set them to 255 for opaque images.
+
+ When decoding, by default you can ignore this palette, since LodePNG already
+ fills the palette colors in the pixels of the raw RGBA output.
+
+ The palette is only supported for color type 3.
+ */
+ unsigned char* palette; /*palette in RGBARGBA... order. Must be either 0, or when allocated must have 1024 bytes*/
+ size_t palettesize; /*palette size in number of colors (amount of used bytes is 4 * palettesize)*/
+
+ /*
+ transparent color key (tRNS)
+
+ This color uses the same bit depth as the bitdepth value in this struct, which can be 1-bit to 16-bit.
+ For grayscale PNGs, r, g and b will all 3 be set to the same.
+
+ When decoding, by default you can ignore this information, since LodePNG sets
+ pixels with this key to transparent already in the raw RGBA output.
+
+ The color key is only supported for color types 0 and 2.
+ */
+ unsigned key_defined; /*is a transparent color key given? 0 = false, 1 = true*/
+ unsigned key_r; /*red/grayscale component of color key*/
+ unsigned key_g; /*green component of color key*/
+ unsigned key_b; /*blue component of color key*/
+} LodePNGColorMode;
+
+/*init, cleanup and copy functions to use with this struct*/
+void lodepng_color_mode_init(LodePNGColorMode* info);
+void lodepng_color_mode_cleanup(LodePNGColorMode* info);
+/*return value is error code (0 means no error)*/
+unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source);
+/* Makes a temporary LodePNGColorMode that does not need cleanup (no palette) */
+LodePNGColorMode lodepng_color_mode_make(LodePNGColorType colortype, unsigned bitdepth);
+
+void lodepng_palette_clear(LodePNGColorMode* info);
+/*add 1 color to the palette*/
+unsigned lodepng_palette_add(LodePNGColorMode* info,
+ unsigned char r, unsigned char g, unsigned char b, unsigned char a);
+
+/*get the total amount of bits per pixel, based on colortype and bitdepth in the struct*/
+unsigned lodepng_get_bpp(const LodePNGColorMode* info);
+/*get the amount of color channels used, based on colortype in the struct.
+If a palette is used, it counts as 1 channel.*/
+unsigned lodepng_get_channels(const LodePNGColorMode* info);
+/*is it a grayscale type? (only colortype 0 or 4)*/
+unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info);
+/*has it got an alpha channel? (only colortype 2 or 6)*/
+unsigned lodepng_is_alpha_type(const LodePNGColorMode* info);
+/*has it got a palette? (only colortype 3)*/
+unsigned lodepng_is_palette_type(const LodePNGColorMode* info);
+/*only returns true if there is a palette and there is a value in the palette with alpha < 255.
+Loops through the palette to check this.*/
+unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info);
+/*
+Check if the given color info indicates the possibility of having non-opaque pixels in the PNG image.
+Returns true if the image can have translucent or invisible pixels (it still be opaque if it doesn't use such pixels).
+Returns false if the image can only have opaque pixels.
+In detail, it returns true only if it's a color type with alpha, or has a palette with non-opaque values,
+or if "key_defined" is true.
+*/
+unsigned lodepng_can_have_alpha(const LodePNGColorMode* info);
+/*Returns the byte size of a raw image buffer with given width, height and color mode*/
+size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color);
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+/*The information of a Time chunk in PNG.*/
+typedef struct LodePNGTime {
+ unsigned year; /*2 bytes used (0-65535)*/
+ unsigned month; /*1-12*/
+ unsigned day; /*1-31*/
+ unsigned hour; /*0-23*/
+ unsigned minute; /*0-59*/
+ unsigned second; /*0-60 (to allow for leap seconds)*/
+} LodePNGTime;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+/*Information about the PNG image, except pixels, width and height.*/
+typedef struct LodePNGInfo {
+ /*header (IHDR), palette (PLTE) and transparency (tRNS) chunks*/
+ unsigned compression_method;/*compression method of the original file. Always 0.*/
+ unsigned filter_method; /*filter method of the original file*/
+ unsigned interlace_method; /*interlace method of the original file: 0=none, 1=Adam7*/
+ LodePNGColorMode color; /*color type and bits, palette and transparency of the PNG file*/
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ /*
+ Suggested background color chunk (bKGD)
+
+ This uses the same color mode and bit depth as the PNG (except no alpha channel),
+ with values truncated to the bit depth in the unsigned integer.
+
+ For grayscale and palette PNGs, the value is stored in background_r. The values
+ in background_g and background_b are then unused.
+
+ So when decoding, you may get these in a different color mode than the one you requested
+ for the raw pixels.
+
+ When encoding with auto_convert, you must use the color model defined in info_png.color for
+ these values. The encoder normally ignores info_png.color when auto_convert is on, but will
+ use it to interpret these values (and convert copies of them to its chosen color model).
+
+ When encoding, avoid setting this to an expensive color, such as a non-gray value
+ when the image is gray, or the compression will be worse since it will be forced to
+ write the PNG with a more expensive color mode (when auto_convert is on).
+
+ The decoder does not use this background color to edit the color of pixels. This is a
+ completely optional metadata feature.
+ */
+ unsigned background_defined; /*is a suggested background color given?*/
+ unsigned background_r; /*red/gray/palette component of suggested background color*/
+ unsigned background_g; /*green component of suggested background color*/
+ unsigned background_b; /*blue component of suggested background color*/
+
+ /*
+ non-international text chunks (tEXt and zTXt)
+
+ The char** arrays each contain num strings. The actual messages are in
+ text_strings, while text_keys are keywords that give a short description what
+ the actual text represents, e.g. Title, Author, Description, or anything else.
+
+ All the string fields below including keys, names and language tags are null terminated.
+ The PNG specification uses null characters for the keys, names and tags, and forbids null
+ characters to appear in the main text which is why we can use null termination everywhere here.
+
+ A keyword is minimum 1 character and maximum 79 characters long. It's
+ discouraged to use a single line length longer than 79 characters for texts.
+
+ Don't allocate these text buffers yourself. Use the init/cleanup functions
+ correctly and use lodepng_add_text and lodepng_clear_text.
+ */
+ size_t text_num; /*the amount of texts in these char** buffers (there may be more texts in itext)*/
+ char** text_keys; /*the keyword of a text chunk (e.g. "Comment")*/
+ char** text_strings; /*the actual text*/
+
+ /*
+ international text chunks (iTXt)
+ Similar to the non-international text chunks, but with additional strings
+ "langtags" and "transkeys".
+ */
+ size_t itext_num; /*the amount of international texts in this PNG*/
+ char** itext_keys; /*the English keyword of the text chunk (e.g. "Comment")*/
+ char** itext_langtags; /*language tag for this text's language, ISO/IEC 646 string, e.g. ISO 639 language tag*/
+ char** itext_transkeys; /*keyword translated to the international language - UTF-8 string*/
+ char** itext_strings; /*the actual international text - UTF-8 string*/
+
+ /*time chunk (tIME)*/
+ unsigned time_defined; /*set to 1 to make the encoder generate a tIME chunk*/
+ LodePNGTime time;
+
+ /*phys chunk (pHYs)*/
+ unsigned phys_defined; /*if 0, there is no pHYs chunk and the values below are undefined, if 1 else there is one*/
+ unsigned phys_x; /*pixels per unit in x direction*/
+ unsigned phys_y; /*pixels per unit in y direction*/
+ unsigned phys_unit; /*may be 0 (unknown unit) or 1 (metre)*/
+
+ /*
+ Color profile related chunks: gAMA, cHRM, sRGB, iCPP
+
+ LodePNG does not apply any color conversions on pixels in the encoder or decoder and does not interpret these color
+ profile values. It merely passes on the information. If you wish to use color profiles and convert colors, please
+ use these values with a color management library.
+
+ See the PNG, ICC and sRGB specifications for more information about the meaning of these values.
+ */
+
+ /* gAMA chunk: optional, overridden by sRGB or iCCP if those are present. */
+ unsigned gama_defined; /* Whether a gAMA chunk is present (0 = not present, 1 = present). */
+ unsigned gama_gamma; /* Gamma exponent times 100000 */
+
+ /* cHRM chunk: optional, overridden by sRGB or iCCP if those are present. */
+ unsigned chrm_defined; /* Whether a cHRM chunk is present (0 = not present, 1 = present). */
+ unsigned chrm_white_x; /* White Point x times 100000 */
+ unsigned chrm_white_y; /* White Point y times 100000 */
+ unsigned chrm_red_x; /* Red x times 100000 */
+ unsigned chrm_red_y; /* Red y times 100000 */
+ unsigned chrm_green_x; /* Green x times 100000 */
+ unsigned chrm_green_y; /* Green y times 100000 */
+ unsigned chrm_blue_x; /* Blue x times 100000 */
+ unsigned chrm_blue_y; /* Blue y times 100000 */
+
+ /*
+ sRGB chunk: optional. May not appear at the same time as iCCP.
+ If gAMA is also present gAMA must contain value 45455.
+ If cHRM is also present cHRM must contain respectively 31270,32900,64000,33000,30000,60000,15000,6000.
+ */
+ unsigned srgb_defined; /* Whether an sRGB chunk is present (0 = not present, 1 = present). */
+ unsigned srgb_intent; /* Rendering intent: 0=perceptual, 1=rel. colorimetric, 2=saturation, 3=abs. colorimetric */
+
+ /*
+ iCCP chunk: optional. May not appear at the same time as sRGB.
+
+ LodePNG does not parse or use the ICC profile (except its color space header field for an edge case), a
+ separate library to handle the ICC data (not included in LodePNG) format is needed to use it for color
+ management and conversions.
+
+ For encoding, if iCCP is present, gAMA and cHRM are recommended to be added as well with values that match the ICC
+ profile as closely as possible, if you wish to do this you should provide the correct values for gAMA and cHRM and
+ enable their '_defined' flags since LodePNG will not automatically compute them from the ICC profile.
+
+ For encoding, the ICC profile is required by the PNG specification to be an "RGB" profile for non-gray
+ PNG color types and a "GRAY" profile for gray PNG color types. If you disable auto_convert, you must ensure
+ the ICC profile type matches your requested color type, else the encoder gives an error. If auto_convert is
+ enabled (the default), and the ICC profile is not a good match for the pixel data, this will result in an encoder
+ error if the pixel data has non-gray pixels for a GRAY profile, or a silent less-optimal compression of the pixel
+ data if the pixels could be encoded as grayscale but the ICC profile is RGB.
+
+ To avoid this do not set an ICC profile in the image unless there is a good reason for it, and when doing so
+ make sure you compute it carefully to avoid the above problems.
+ */
+ unsigned iccp_defined; /* Whether an iCCP chunk is present (0 = not present, 1 = present). */
+ char* iccp_name; /* Null terminated string with profile name, 1-79 bytes */
+ /*
+ The ICC profile in iccp_profile_size bytes.
+ Don't allocate this buffer yourself. Use the init/cleanup functions
+ correctly and use lodepng_set_icc and lodepng_clear_icc.
+ */
+ unsigned char* iccp_profile;
+ unsigned iccp_profile_size; /* The size of iccp_profile in bytes */
+
+ /* End of color profile related chunks */
+
+
+ /*
+ unknown chunks: chunks not known by LodePNG, passed on byte for byte.
+
+ There are 3 buffers, one for each position in the PNG where unknown chunks can appear.
+ Each buffer contains all unknown chunks for that position consecutively.
+ The 3 positions are:
+ 0: between IHDR and PLTE, 1: between PLTE and IDAT, 2: between IDAT and IEND.
+
+ For encoding, do not store critical chunks or known chunks that are enabled with a "_defined" flag
+ above in here, since the encoder will blindly follow this and could then encode an invalid PNG file
+ (such as one with two IHDR chunks or the disallowed combination of sRGB with iCCP). But do use
+ this if you wish to store an ancillary chunk that is not supported by LodePNG (such as sPLT or hIST),
+ or any non-standard PNG chunk.
+
+ Do not allocate or traverse this data yourself. Use the chunk traversing functions declared
+ later, such as lodepng_chunk_next and lodepng_chunk_append, to read/write this struct.
+ */
+ unsigned char* unknown_chunks_data[3];
+ size_t unknown_chunks_size[3]; /*size in bytes of the unknown chunks, given for protection*/
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+} LodePNGInfo;
+
+/*init, cleanup and copy functions to use with this struct*/
+void lodepng_info_init(LodePNGInfo* info);
+void lodepng_info_cleanup(LodePNGInfo* info);
+/*return value is error code (0 means no error)*/
+unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source);
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str); /*push back both texts at once*/
+void lodepng_clear_text(LodePNGInfo* info); /*use this to clear the texts again after you filled them in*/
+
+unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
+ const char* transkey, const char* str); /*push back the 4 texts of 1 chunk at once*/
+void lodepng_clear_itext(LodePNGInfo* info); /*use this to clear the itexts again after you filled them in*/
+
+/*replaces if exists*/
+unsigned lodepng_set_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size);
+void lodepng_clear_icc(LodePNGInfo* info); /*use this to clear the texts again after you filled them in*/
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+/*
+Converts raw buffer from one color type to another color type, based on
+LodePNGColorMode structs to describe the input and output color type.
+See the reference manual at the end of this header file to see which color conversions are supported.
+return value = LodePNG error code (0 if all went ok, an error if the conversion isn't supported)
+The out buffer must have size (w * h * bpp + 7) / 8, where bpp is the bits per pixel
+of the output color type (lodepng_get_bpp).
+For < 8 bpp images, there should not be padding bits at the end of scanlines.
+For 16-bit per channel colors, uses big endian format like PNG does.
+Return value is LodePNG error code
+*/
+unsigned lodepng_convert(unsigned char* out, const unsigned char* in,
+ const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in,
+ unsigned w, unsigned h);
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*
+Settings for the decoder. This contains settings for the PNG and the Zlib
+decoder, but not the Info settings from the Info structs.
+*/
+typedef struct LodePNGDecoderSettings {
+ LodePNGDecompressSettings zlibsettings; /*in here is the setting to ignore Adler32 checksums*/
+
+ /* Check LodePNGDecompressSettings for more ignorable errors such as ignore_adler32 */
+ unsigned ignore_crc; /*ignore CRC checksums*/
+ unsigned ignore_critical; /*ignore unknown critical chunks*/
+ unsigned ignore_end; /*ignore issues at end of file if possible (missing IEND chunk, too large chunk, ...)*/
+ /* TODO: make a system involving warnings with levels and a strict mode instead. Other potentially recoverable
+ errors: srgb rendering intent value, size of content of ancillary chunks, more than 79 characters for some
+ strings, placement/combination rules for ancillary chunks, crc of unknown chunks, allowed characters
+ in string keys, etc... */
+
+ unsigned color_convert; /*whether to convert the PNG to the color type you want. Default: yes*/
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ unsigned read_text_chunks; /*if false but remember_unknown_chunks is true, they're stored in the unknown chunks*/
+ /*store all bytes from unknown chunks in the LodePNGInfo (off by default, useful for a png editor)*/
+ unsigned remember_unknown_chunks;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+} LodePNGDecoderSettings;
+
+void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*automatically use color type with less bits per pixel if losslessly possible. Default: AUTO*/
+typedef enum LodePNGFilterStrategy {
+ /*every filter at zero*/
+ LFS_ZERO = 0,
+ /*every filter at 1, 2, 3 or 4 (paeth), unlike LFS_ZERO not a good choice, but for testing*/
+ LFS_ONE = 1,
+ LFS_TWO = 2,
+ LFS_THREE = 3,
+ LFS_FOUR = 4,
+ /*Use filter that gives minimum sum, as described in the official PNG filter heuristic.*/
+ LFS_MINSUM,
+ /*Use the filter type that gives smallest Shannon entropy for this scanline. Depending
+ on the image, this is better or worse than minsum.*/
+ LFS_ENTROPY,
+ /*
+ Brute-force-search PNG filters by compressing each filter for each scanline.
+ Experimental, very slow, and only rarely gives better compression than MINSUM.
+ */
+ LFS_BRUTE_FORCE,
+ /*use predefined_filters buffer: you specify the filter type for each scanline*/
+ LFS_PREDEFINED
+} LodePNGFilterStrategy;
+
+/*Gives characteristics about the integer RGBA colors of the image (count, alpha channel usage, bit depth, ...),
+which helps decide which color model to use for encoding.
+Used internally by default if "auto_convert" is enabled. Public because it's useful for custom algorithms.*/
+typedef struct LodePNGColorStats {
+ unsigned colored; /*not grayscale*/
+ unsigned key; /*image is not opaque and color key is possible instead of full alpha*/
+ unsigned short key_r; /*key values, always as 16-bit, in 8-bit case the byte is duplicated, e.g. 65535 means 255*/
+ unsigned short key_g;
+ unsigned short key_b;
+ unsigned alpha; /*image is not opaque and alpha channel or alpha palette required*/
+ unsigned numcolors; /*amount of colors, up to 257. Not valid if bits == 16 or allow_palette is disabled.*/
+ unsigned char palette[1024]; /*Remembers up to the first 256 RGBA colors, in no particular order, only valid when numcolors is valid*/
+ unsigned bits; /*bits per channel (not for palette). 1,2 or 4 for grayscale only. 16 if 16-bit per channel required.*/
+ size_t numpixels;
+
+ /*user settings for computing/using the stats*/
+ unsigned allow_palette; /*default 1. if 0, disallow choosing palette colortype in auto_choose_color, and don't count numcolors*/
+ unsigned allow_greyscale; /*default 1. if 0, choose RGB or RGBA even if the image only has gray colors*/
+} LodePNGColorStats;
+
+void lodepng_color_stats_init(LodePNGColorStats* stats);
+
+/*Get a LodePNGColorStats of the image. The stats must already have been inited.
+Returns error code (e.g. alloc fail) or 0 if ok.*/
+unsigned lodepng_compute_color_stats(LodePNGColorStats* stats,
+ const unsigned char* image, unsigned w, unsigned h,
+ const LodePNGColorMode* mode_in);
+
+/*Settings for the encoder.*/
+typedef struct LodePNGEncoderSettings {
+ LodePNGCompressSettings zlibsettings; /*settings for the zlib encoder, such as window size, ...*/
+
+ unsigned auto_convert; /*automatically choose output PNG color type. Default: true*/
+
+ /*If true, follows the official PNG heuristic: if the PNG uses a palette or lower than
+ 8 bit depth, set all filters to zero. Otherwise use the filter_strategy. Note that to
+ completely follow the official PNG heuristic, filter_palette_zero must be true and
+ filter_strategy must be LFS_MINSUM*/
+ unsigned filter_palette_zero;
+ /*Which filter strategy to use when not using zeroes due to filter_palette_zero.
+ Set filter_palette_zero to 0 to ensure always using your chosen strategy. Default: LFS_MINSUM*/
+ LodePNGFilterStrategy filter_strategy;
+ /*used if filter_strategy is LFS_PREDEFINED. In that case, this must point to a buffer with
+ the same length as the amount of scanlines in the image, and each value must <= 5. You
+ have to cleanup this buffer, LodePNG will never free it. Don't forget that filter_palette_zero
+ must be set to 0 to ensure this is also used on palette or low bitdepth images.*/
+ const unsigned char* predefined_filters;
+
+ /*force creating a PLTE chunk if colortype is 2 or 6 (= a suggested palette).
+ If colortype is 3, PLTE is _always_ created.*/
+ unsigned force_palette;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+ /*add LodePNG identifier and version as a text chunk, for debugging*/
+ unsigned add_id;
+ /*encode text chunks as zTXt chunks instead of tEXt chunks, and use compression in iTXt chunks*/
+ unsigned text_compression;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+} LodePNGEncoderSettings;
+
+void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+
+#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER)
+/*The settings, state and information for extended encoding and decoding.*/
+typedef struct LodePNGState {
+#ifdef LODEPNG_COMPILE_DECODER
+ LodePNGDecoderSettings decoder; /*the decoding settings*/
+#endif /*LODEPNG_COMPILE_DECODER*/
+#ifdef LODEPNG_COMPILE_ENCODER
+ LodePNGEncoderSettings encoder; /*the encoding settings*/
+#endif /*LODEPNG_COMPILE_ENCODER*/
+ LodePNGColorMode info_raw; /*specifies the format in which you would like to get the raw pixel buffer*/
+ LodePNGInfo info_png; /*info of the PNG image obtained after decoding*/
+ unsigned error;
+} LodePNGState;
+
+/*init, cleanup and copy functions to use with this struct*/
+void lodepng_state_init(LodePNGState* state);
+void lodepng_state_cleanup(LodePNGState* state);
+void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source);
+#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*
+Same as lodepng_decode_memory, but uses a LodePNGState to allow custom settings and
+getting much more information about the PNG image and color mode.
+*/
+unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h,
+ LodePNGState* state,
+ const unsigned char* in, size_t insize);
+
+/*
+Read the PNG header, but not the actual data. This returns only the information
+that is in the IHDR chunk of the PNG, such as width, height and color type. The
+information is placed in the info_png field of the LodePNGState.
+*/
+unsigned lodepng_inspect(unsigned* w, unsigned* h,
+ LodePNGState* state,
+ const unsigned char* in, size_t insize);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+/*
+Reads one metadata chunk (other than IHDR) of the PNG file and outputs what it
+read in the state. Returns error code on failure.
+Use lodepng_inspect first with a new state, then e.g. lodepng_chunk_find_const
+to find the desired chunk type, and if non null use lodepng_inspect_chunk (with
+chunk_pointer - start_of_file as pos).
+Supports most metadata chunks from the PNG standard (gAMA, bKGD, tEXt, ...).
+Ignores unsupported, unknown, non-metadata or IHDR chunks (without error).
+Requirements: &in[pos] must point to start of a chunk, must use regular
+lodepng_inspect first since format of most other chunks depends on IHDR, and if
+there is a PLTE chunk, that one must be inspected before tRNS or bKGD.
+*/
+unsigned lodepng_inspect_chunk(LodePNGState* state, size_t pos,
+ const unsigned char* in, size_t insize);
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*This function allocates the out buffer with standard malloc and stores the size in *outsize.*/
+unsigned lodepng_encode(unsigned char** out, size_t* outsize,
+ const unsigned char* image, unsigned w, unsigned h,
+ LodePNGState* state);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+/*
+The lodepng_chunk functions are normally not needed, except to traverse the
+unknown chunks stored in the LodePNGInfo struct, or add new ones to it.
+It also allows traversing the chunks of an encoded PNG file yourself.
+
+The chunk pointer always points to the beginning of the chunk itself, that is
+the first byte of the 4 length bytes.
+
+In the PNG file format, chunks have the following format:
+-4 bytes length: length of the data of the chunk in bytes (chunk itself is 12 bytes longer)
+-4 bytes chunk type (ASCII a-z,A-Z only, see below)
+-length bytes of data (may be 0 bytes if length was 0)
+-4 bytes of CRC, computed on chunk name + data
+
+The first chunk starts at the 8th byte of the PNG file, the entire rest of the file
+exists out of concatenated chunks with the above format.
+
+PNG standard chunk ASCII naming conventions:
+-First byte: uppercase = critical, lowercase = ancillary
+-Second byte: uppercase = public, lowercase = private
+-Third byte: must be uppercase
+-Fourth byte: uppercase = unsafe to copy, lowercase = safe to copy
+*/
+
+/*
+Gets the length of the data of the chunk. Total chunk length has 12 bytes more.
+There must be at least 4 bytes to read from. If the result value is too large,
+it may be corrupt data.
+*/
+unsigned lodepng_chunk_length(const unsigned char* chunk);
+
+/*puts the 4-byte type in null terminated string*/
+void lodepng_chunk_type(char type[5], const unsigned char* chunk);
+
+/*check if the type is the given type*/
+unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type);
+
+/*0: it's one of the critical chunk types, 1: it's an ancillary chunk (see PNG standard)*/
+unsigned char lodepng_chunk_ancillary(const unsigned char* chunk);
+
+/*0: public, 1: private (see PNG standard)*/
+unsigned char lodepng_chunk_private(const unsigned char* chunk);
+
+/*0: the chunk is unsafe to copy, 1: the chunk is safe to copy (see PNG standard)*/
+unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk);
+
+/*get pointer to the data of the chunk, where the input points to the header of the chunk*/
+unsigned char* lodepng_chunk_data(unsigned char* chunk);
+const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk);
+
+/*returns 0 if the crc is correct, 1 if it's incorrect (0 for OK as usual!)*/
+unsigned lodepng_chunk_check_crc(const unsigned char* chunk);
+
+/*generates the correct CRC from the data and puts it in the last 4 bytes of the chunk*/
+void lodepng_chunk_generate_crc(unsigned char* chunk);
+
+/*
+Iterate to next chunks, allows iterating through all chunks of the PNG file.
+Input must be at the beginning of a chunk (result of a previous lodepng_chunk_next call,
+or the 8th byte of a PNG file which always has the first chunk), or alternatively may
+point to the first byte of the PNG file (which is not a chunk but the magic header, the
+function will then skip over it and return the first real chunk).
+Will output pointer to the start of the next chunk, or at or beyond end of the file if there
+is no more chunk after this or possibly if the chunk is corrupt.
+Start this process at the 8th byte of the PNG file.
+In a non-corrupt PNG file, the last chunk should have name "IEND".
+*/
+unsigned char* lodepng_chunk_next(unsigned char* chunk, unsigned char* end);
+const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk, const unsigned char* end);
+
+/*Finds the first chunk with the given type in the range [chunk, end), or returns NULL if not found.*/
+unsigned char* lodepng_chunk_find(unsigned char* chunk, unsigned char* end, const char type[5]);
+const unsigned char* lodepng_chunk_find_const(const unsigned char* chunk, const unsigned char* end, const char type[5]);
+
+/*
+Appends chunk to the data in out. The given chunk should already have its chunk header.
+The out variable and outsize are updated to reflect the new reallocated buffer.
+Returns error code (0 if it went ok)
+*/
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outsize, const unsigned char* chunk);
+
+/*
+Appends new chunk to out. The chunk to append is given by giving its length, type
+and data separately. The type is a 4-letter string.
+The out variable and outsize are updated to reflect the new reallocated buffer.
+Returne error code (0 if it went ok)
+*/
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outsize, unsigned length,
+ const char* type, const unsigned char* data);
+
+
+/*Calculate CRC32 of buffer*/
+unsigned lodepng_crc32(const unsigned char* buf, size_t len);
+#endif /*LODEPNG_COMPILE_PNG*/
+
+
+#ifdef LODEPNG_COMPILE_ZLIB
+/*
+This zlib part can be used independently to zlib compress and decompress a
+buffer. It cannot be used to create gzip files however, and it only supports the
+part of zlib that is required for PNG, it does not support dictionaries.
+*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*Inflate a buffer. Inflate is the decompression step of deflate. Out buffer must be freed after use.*/
+unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
+ const unsigned char* in, size_t insize,
+ const LodePNGDecompressSettings* settings);
+
+/*
+Decompresses Zlib data. Reallocates the out buffer and appends the data. The
+data must be according to the zlib specification.
+Either, *out must be NULL and *outsize must be 0, or, *out must be a valid
+buffer and *outsize its size in bytes. out must be freed by user after usage.
+*/
+unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize,
+ const unsigned char* in, size_t insize,
+ const LodePNGDecompressSettings* settings);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*
+Compresses data with Zlib. Reallocates the out buffer and appends the data.
+Zlib adds a small header and trailer around the deflate data.
+The data is output in the format of the zlib specification.
+Either, *out must be NULL and *outsize must be 0, or, *out must be a valid
+buffer and *outsize its size in bytes. out must be freed by user after usage.
+*/
+unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize,
+ const unsigned char* in, size_t insize,
+ const LodePNGCompressSettings* settings);
+
+/*
+Find length-limited Huffman code for given frequencies. This function is in the
+public interface only for tests, it's used internally by lodepng_deflate.
+*/
+unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
+ size_t numcodes, unsigned maxbitlen);
+
+/*Compress a buffer with deflate. See RFC 1951. Out buffer must be freed after use.*/
+unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
+ const unsigned char* in, size_t insize,
+ const LodePNGCompressSettings* settings);
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Load a file from disk into buffer. The function allocates the out buffer, and
+after usage you should free it.
+out: output parameter, contains pointer to loaded buffer.
+outsize: output parameter, size of the allocated out buffer
+filename: the path to the file to load
+return value: error code (0 means ok)
+*/
+unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename);
+
+/*
+Save a file from buffer to disk. Warning, if it exists, this function overwrites
+the file without warning!
+buffer: the buffer to write
+buffersize: size of the buffer to write
+filename: the path to the file to save to
+return value: error code (0 means ok)
+*/
+unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename);
+#endif /*LODEPNG_COMPILE_DISK*/
+
+#ifdef LODEPNG_COMPILE_CPP
+/* The LodePNG C++ wrapper uses std::vectors instead of manually allocated memory buffers. */
+namespace lodepng {
+#ifdef LODEPNG_COMPILE_PNG
+class State : public LodePNGState {
+ public:
+ State();
+ State(const State& other);
+ ~State();
+ State& operator=(const State& other);
+};
+
+#ifdef LODEPNG_COMPILE_DECODER
+/* Same as other lodepng::decode, but using a State for more settings and information. */
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+ State& state,
+ const unsigned char* in, size_t insize);
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+ State& state,
+ const std::vector<unsigned char>& in);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/* Same as other lodepng::encode, but using a State for more settings and information. */
+unsigned encode(std::vector<unsigned char>& out,
+ const unsigned char* in, unsigned w, unsigned h,
+ State& state);
+unsigned encode(std::vector<unsigned char>& out,
+ const std::vector<unsigned char>& in, unsigned w, unsigned h,
+ State& state);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Load a file from disk into an std::vector.
+return value: error code (0 means ok)
+*/
+unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename);
+
+/*
+Save the binary data in an std::vector to a file on disk. The file is overwritten
+without warning.
+*/
+unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename);
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_PNG */
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_DECODER
+/* Zlib-decompress an unsigned char buffer */
+unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+ const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
+
+/* Zlib-decompress an std::vector */
+unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+ const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
+#endif /* LODEPNG_COMPILE_DECODER */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/* Zlib-compress an unsigned char buffer */
+unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+ const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
+
+/* Zlib-compress an std::vector */
+unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+ const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
+#endif /* LODEPNG_COMPILE_ENCODER */
+#endif /* LODEPNG_COMPILE_ZLIB */
+} /* namespace lodepng */
+#endif /*LODEPNG_COMPILE_CPP*/
+
+/*
+TODO:
+[.] test if there are no memory leaks or security exploits - done a lot but needs to be checked often
+[.] check compatibility with various compilers - done but needs to be redone for every newer version
+[X] converting color to 16-bit per channel types
+[X] support color profile chunk types (but never let them touch RGB values by default)
+[ ] support all public PNG chunk types (almost done except sBIT, sPLT and hIST)
+[ ] make sure encoder generates no chunks with size > (2^31)-1
+[ ] partial decoding (stream processing)
+[X] let the "isFullyOpaque" function check color keys and transparent palettes too
+[X] better name for the variables "codes", "codesD", "codelengthcodes", "clcl" and "lldl"
+[ ] allow treating some errors like warnings, when image is recoverable (e.g. 69, 57, 58)
+[ ] make warnings like: oob palette, checksum fail, data after iend, wrong/unknown crit chunk, no null terminator in text, ...
+[ ] error messages with line numbers (and version)
+[ ] errors in state instead of as return code?
+[ ] new errors/warnings like suspiciously big decompressed ztxt or iccp chunk
+[ ] let the C++ wrapper catch exceptions coming from the standard library and return LodePNG error codes
+[ ] allow user to provide custom color conversion functions, e.g. for premultiplied alpha, padding bits or not, ...
+[ ] allow user to give data (void*) to custom allocator
+[X] provide alternatives for C library functions not present on some platforms (memcpy, ...)
+*/
+
+#endif /*LODEPNG_H inclusion guard*/
+
+/*
+LodePNG Documentation
+---------------------
+
+0. table of contents
+--------------------
+
+ 1. about
+ 1.1. supported features
+ 1.2. features not supported
+ 2. C and C++ version
+ 3. security
+ 4. decoding
+ 5. encoding
+ 6. color conversions
+ 6.1. PNG color types
+ 6.2. color conversions
+ 6.3. padding bits
+ 6.4. A note about 16-bits per channel and endianness
+ 7. error values
+ 8. chunks and PNG editing
+ 9. compiler support
+ 10. examples
+ 10.1. decoder C++ example
+ 10.2. decoder C example
+ 11. state settings reference
+ 12. changes
+ 13. contact information
+
+
+1. about
+--------
+
+PNG is a file format to store raster images losslessly with good compression,
+supporting different color types and alpha channel.
+
+LodePNG is a PNG codec according to the Portable Network Graphics (PNG)
+Specification (Second Edition) - W3C Recommendation 10 November 2003.
+
+The specifications used are:
+
+*) Portable Network Graphics (PNG) Specification (Second Edition):
+ http://www.w3.org/TR/2003/REC-PNG-20031110
+*) RFC 1950 ZLIB Compressed Data Format version 3.3:
+ http://www.gzip.org/zlib/rfc-zlib.html
+*) RFC 1951 DEFLATE Compressed Data Format Specification ver 1.3:
+ http://www.gzip.org/zlib/rfc-deflate.html
+
+The most recent version of LodePNG can currently be found at
+http://lodev.org/lodepng/
+
+LodePNG works both in C (ISO C90) and C++, with a C++ wrapper that adds
+extra functionality.
+
+LodePNG exists out of two files:
+-lodepng.h: the header file for both C and C++
+-lodepng.c(pp): give it the name lodepng.c or lodepng.cpp (or .cc) depending on your usage
+
+If you want to start using LodePNG right away without reading this doc, get the
+examples from the LodePNG website to see how to use it in code, or check the
+smaller examples in chapter 13 here.
+
+LodePNG is simple but only supports the basic requirements. To achieve
+simplicity, the following design choices were made: There are no dependencies
+on any external library. There are functions to decode and encode a PNG with
+a single function call, and extended versions of these functions taking a
+LodePNGState struct allowing to specify or get more information. By default
+the colors of the raw image are always RGB or RGBA, no matter what color type
+the PNG file uses. To read and write files, there are simple functions to
+convert the files to/from buffers in memory.
+
+This all makes LodePNG suitable for loading textures in games, demos and small
+programs, ... It's less suitable for full fledged image editors, loading PNGs
+over network (it requires all the image data to be available before decoding can
+begin), life-critical systems, ...
+
+1.1. supported features
+-----------------------
+
+The following features are supported by the decoder:
+
+*) decoding of PNGs with any color type, bit depth and interlace mode, to a 24- or 32-bit color raw image,
+ or the same color type as the PNG
+*) encoding of PNGs, from any raw image to 24- or 32-bit color, or the same color type as the raw image
+*) Adam7 interlace and deinterlace for any color type
+*) loading the image from harddisk or decoding it from a buffer from other sources than harddisk
+*) support for alpha channels, including RGBA color model, translucent palettes and color keying
+*) zlib decompression (inflate)
+*) zlib compression (deflate)
+*) CRC32 and ADLER32 checksums
+*) colorimetric color profile conversions: currently experimentally available in lodepng_util.cpp only,
+ plus alternatively ability to pass on chroma/gamma/ICC profile information to other color management system.
+*) handling of unknown chunks, allowing making a PNG editor that stores custom and unknown chunks.
+*) the following chunks are supported by both encoder and decoder:
+ IHDR: header information
+ PLTE: color palette
+ IDAT: pixel data
+ IEND: the final chunk
+ tRNS: transparency for palettized images
+ tEXt: textual information
+ zTXt: compressed textual information
+ iTXt: international textual information
+ bKGD: suggested background color
+ pHYs: physical dimensions
+ tIME: modification time
+ cHRM: RGB chromaticities
+ gAMA: RGB gamma correction
+ iCCP: ICC color profile
+ sRGB: rendering intent
+
+1.2. features not supported
+---------------------------
+
+The following features are _not_ supported:
+
+*) some features needed to make a conformant PNG-Editor might be still missing.
+*) partial loading/stream processing. All data must be available and is processed in one call.
+*) The following public chunks are not (yet) supported but treated as unknown chunks by LodePNG:
+ sBIT
+ hIST
+ sPLT
+
+
+2. C and C++ version
+--------------------
+
+The C version uses buffers allocated with alloc that you need to free()
+yourself. You need to use init and cleanup functions for each struct whenever
+using a struct from the C version to avoid exploits and memory leaks.
+
+The C++ version has extra functions with std::vectors in the interface and the
+lodepng::State class which is a LodePNGState with constructor and destructor.
+
+These files work without modification for both C and C++ compilers because all
+the additional C++ code is in "#ifdef __cplusplus" blocks that make C-compilers
+ignore it, and the C code is made to compile both with strict ISO C90 and C++.
+
+To use the C++ version, you need to rename the source file to lodepng.cpp
+(instead of lodepng.c), and compile it with a C++ compiler.
+
+To use the C version, you need to rename the source file to lodepng.c (instead
+of lodepng.cpp), and compile it with a C compiler.
+
+
+3. Security
+-----------
+
+Even if carefully designed, it's always possible that LodePNG contains possible
+exploits. If you discover one, please let me know, and it will be fixed.
+
+When using LodePNG, care has to be taken with the C version of LodePNG, as well
+as the C-style structs when working with C++. The following conventions are used
+for all C-style structs:
+
+-if a struct has a corresponding init function, always call the init function when making a new one
+-if a struct has a corresponding cleanup function, call it before the struct disappears to avoid memory leaks
+-if a struct has a corresponding copy function, use the copy function instead of "=".
+ The destination must also be inited already.
+
+
+4. Decoding
+-----------
+
+Decoding converts a PNG compressed image to a raw pixel buffer.
+
+Most documentation on using the decoder is at its declarations in the header
+above. For C, simple decoding can be done with functions such as
+lodepng_decode32, and more advanced decoding can be done with the struct
+LodePNGState and lodepng_decode. For C++, all decoding can be done with the
+various lodepng::decode functions, and lodepng::State can be used for advanced
+features.
+
+When using the LodePNGState, it uses the following fields for decoding:
+*) LodePNGInfo info_png: it stores extra information about the PNG (the input) in here
+*) LodePNGColorMode info_raw: here you can say what color mode of the raw image (the output) you want to get
+*) LodePNGDecoderSettings decoder: you can specify a few extra settings for the decoder to use
+
+LodePNGInfo info_png
+--------------------
+
+After decoding, this contains extra information of the PNG image, except the actual
+pixels, width and height because these are already gotten directly from the decoder
+functions.
+
+It contains for example the original color type of the PNG image, text comments,
+suggested background color, etc... More details about the LodePNGInfo struct are
+at its declaration documentation.
+
+LodePNGColorMode info_raw
+-------------------------
+
+When decoding, here you can specify which color type you want
+the resulting raw image to be. If this is different from the colortype of the
+PNG, then the decoder will automatically convert the result. This conversion
+always works, except if you want it to convert a color PNG to grayscale or to
+a palette with missing colors.
+
+By default, 32-bit color is used for the result.
+
+LodePNGDecoderSettings decoder
+------------------------------
+
+The settings can be used to ignore the errors created by invalid CRC and Adler32
+chunks, and to disable the decoding of tEXt chunks.
+
+There's also a setting color_convert, true by default. If false, no conversion
+is done, the resulting data will be as it was in the PNG (after decompression)
+and you'll have to puzzle the colors of the pixels together yourself using the
+color type information in the LodePNGInfo.
+
+
+5. Encoding
+-----------
+
+Encoding converts a raw pixel buffer to a PNG compressed image.
+
+Most documentation on using the encoder is at its declarations in the header
+above. For C, simple encoding can be done with functions such as
+lodepng_encode32, and more advanced decoding can be done with the struct
+LodePNGState and lodepng_encode. For C++, all encoding can be done with the
+various lodepng::encode functions, and lodepng::State can be used for advanced
+features.
+
+Like the decoder, the encoder can also give errors. However it gives less errors
+since the encoder input is trusted, the decoder input (a PNG image that could
+be forged by anyone) is not trusted.
+
+When using the LodePNGState, it uses the following fields for encoding:
+*) LodePNGInfo info_png: here you specify how you want the PNG (the output) to be.
+*) LodePNGColorMode info_raw: here you say what color type of the raw image (the input) has
+*) LodePNGEncoderSettings encoder: you can specify a few settings for the encoder to use
+
+LodePNGInfo info_png
+--------------------
+
+When encoding, you use this the opposite way as when decoding: for encoding,
+you fill in the values you want the PNG to have before encoding. By default it's
+not needed to specify a color type for the PNG since it's automatically chosen,
+but it's possible to choose it yourself given the right settings.
+
+The encoder will not always exactly match the LodePNGInfo struct you give,
+it tries as close as possible. Some things are ignored by the encoder. The
+encoder uses, for example, the following settings from it when applicable:
+colortype and bitdepth, text chunks, time chunk, the color key, the palette, the
+background color, the interlace method, unknown chunks, ...
+
+When encoding to a PNG with colortype 3, the encoder will generate a PLTE chunk.
+If the palette contains any colors for which the alpha channel is not 255 (so
+there are translucent colors in the palette), it'll add a tRNS chunk.
+
+LodePNGColorMode info_raw
+-------------------------
+
+You specify the color type of the raw image that you give to the input here,
+including a possible transparent color key and palette you happen to be using in
+your raw image data.
+
+By default, 32-bit color is assumed, meaning your input has to be in RGBA
+format with 4 bytes (unsigned chars) per pixel.
+
+LodePNGEncoderSettings encoder
+------------------------------
+
+The following settings are supported (some are in sub-structs):
+*) auto_convert: when this option is enabled, the encoder will
+automatically choose the smallest possible color mode (including color key) that
+can encode the colors of all pixels without information loss.
+*) btype: the block type for LZ77. 0 = uncompressed, 1 = fixed huffman tree,
+ 2 = dynamic huffman tree (best compression). Should be 2 for proper
+ compression.
+*) use_lz77: whether or not to use LZ77 for compressed block types. Should be
+ true for proper compression.
+*) windowsize: the window size used by the LZ77 encoder (1 - 32768). Has value
+ 2048 by default, but can be set to 32768 for better, but slow, compression.
+*) force_palette: if colortype is 2 or 6, you can make the encoder write a PLTE
+ chunk if force_palette is true. This can used as suggested palette to convert
+ to by viewers that don't support more than 256 colors (if those still exist)
+*) add_id: add text chunk "Encoder: LodePNG <version>" to the image.
+*) text_compression: default 1. If 1, it'll store texts as zTXt instead of tEXt chunks.
+ zTXt chunks use zlib compression on the text. This gives a smaller result on
+ large texts but a larger result on small texts (such as a single program name).
+ It's all tEXt or all zTXt though, there's no separate setting per text yet.
+
+
+6. color conversions
+--------------------
+
+An important thing to note about LodePNG, is that the color type of the PNG, and
+the color type of the raw image, are completely independent. By default, when
+you decode a PNG, you get the result as a raw image in the color type you want,
+no matter whether the PNG was encoded with a palette, grayscale or RGBA color.
+And if you encode an image, by default LodePNG will automatically choose the PNG
+color type that gives good compression based on the values of colors and amount
+of colors in the image. It can be configured to let you control it instead as
+well, though.
+
+To be able to do this, LodePNG does conversions from one color mode to another.
+It can convert from almost any color type to any other color type, except the
+following conversions: RGB to grayscale is not supported, and converting to a
+palette when the palette doesn't have a required color is not supported. This is
+not supported on purpose: this is information loss which requires a color
+reduction algorithm that is beyond the scope of a PNG encoder (yes, RGB to gray
+is easy, but there are multiple ways if you want to give some channels more
+weight).
+
+By default, when decoding, you get the raw image in 32-bit RGBA or 24-bit RGB
+color, no matter what color type the PNG has. And by default when encoding,
+LodePNG automatically picks the best color model for the output PNG, and expects
+the input image to be 32-bit RGBA or 24-bit RGB. So, unless you want to control
+the color format of the images yourself, you can skip this chapter.
+
+6.1. PNG color types
+--------------------
+
+A PNG image can have many color types, ranging from 1-bit color to 64-bit color,
+as well as palettized color modes. After the zlib decompression and unfiltering
+in the PNG image is done, the raw pixel data will have that color type and thus
+a certain amount of bits per pixel. If you want the output raw image after
+decoding to have another color type, a conversion is done by LodePNG.
+
+The PNG specification gives the following color types:
+
+0: grayscale, bit depths 1, 2, 4, 8, 16
+2: RGB, bit depths 8 and 16
+3: palette, bit depths 1, 2, 4 and 8
+4: grayscale with alpha, bit depths 8 and 16
+6: RGBA, bit depths 8 and 16
+
+Bit depth is the amount of bits per pixel per color channel. So the total amount
+of bits per pixel is: amount of channels * bitdepth.
+
+6.2. color conversions
+----------------------
+
+As explained in the sections about the encoder and decoder, you can specify
+color types and bit depths in info_png and info_raw to change the default
+behaviour.
+
+If, when decoding, you want the raw image to be something else than the default,
+you need to set the color type and bit depth you want in the LodePNGColorMode,
+or the parameters colortype and bitdepth of the simple decoding function.
+
+If, when encoding, you use another color type than the default in the raw input
+image, you need to specify its color type and bit depth in the LodePNGColorMode
+of the raw image, or use the parameters colortype and bitdepth of the simple
+encoding function.
+
+If, when encoding, you don't want LodePNG to choose the output PNG color type
+but control it yourself, you need to set auto_convert in the encoder settings
+to false, and specify the color type you want in the LodePNGInfo of the
+encoder (including palette: it can generate a palette if auto_convert is true,
+otherwise not).
+
+If the input and output color type differ (whether user chosen or auto chosen),
+LodePNG will do a color conversion, which follows the rules below, and may
+sometimes result in an error.
+
+To avoid some confusion:
+-the decoder converts from PNG to raw image
+-the encoder converts from raw image to PNG
+-the colortype and bitdepth in LodePNGColorMode info_raw, are those of the raw image
+-the colortype and bitdepth in the color field of LodePNGInfo info_png, are those of the PNG
+-when encoding, the color type in LodePNGInfo is ignored if auto_convert
+ is enabled, it is automatically generated instead
+-when decoding, the color type in LodePNGInfo is set by the decoder to that of the original
+ PNG image, but it can be ignored since the raw image has the color type you requested instead
+-if the color type of the LodePNGColorMode and PNG image aren't the same, a conversion
+ between the color types is done if the color types are supported. If it is not
+ supported, an error is returned. If the types are the same, no conversion is done.
+-even though some conversions aren't supported, LodePNG supports loading PNGs from any
+ colortype and saving PNGs to any colortype, sometimes it just requires preparing
+ the raw image correctly before encoding.
+-both encoder and decoder use the same color converter.
+
+The function lodepng_convert does the color conversion. It is available in the
+interface but normally isn't needed since the encoder and decoder already call
+it.
+
+Non supported color conversions:
+-color to grayscale when non-gray pixels are present: no error is thrown, but
+the result will look ugly because only the red channel is taken (it assumes all
+three channels are the same in this case so ignores green and blue). The reason
+no error is given is to allow converting from three-channel grayscale images to
+one-channel even if there are numerical imprecisions.
+-anything to palette when the palette does not have an exact match for a from-color
+in it: in this case an error is thrown
+
+Supported color conversions:
+-anything to 8-bit RGB, 8-bit RGBA, 16-bit RGB, 16-bit RGBA
+-any gray or gray+alpha, to gray or gray+alpha
+-anything to a palette, as long as the palette has the requested colors in it
+-removing alpha channel
+-higher to smaller bitdepth, and vice versa
+
+If you want no color conversion to be done (e.g. for speed or control):
+-In the encoder, you can make it save a PNG with any color type by giving the
+raw color mode and LodePNGInfo the same color mode, and setting auto_convert to
+false.
+-In the decoder, you can make it store the pixel data in the same color type
+as the PNG has, by setting the color_convert setting to false. Settings in
+info_raw are then ignored.
+
+6.3. padding bits
+-----------------
+
+In the PNG file format, if a less than 8-bit per pixel color type is used and the scanlines
+have a bit amount that isn't a multiple of 8, then padding bits are used so that each
+scanline starts at a fresh byte. But that is NOT true for the LodePNG raw input and output.
+The raw input image you give to the encoder, and the raw output image you get from the decoder
+will NOT have these padding bits, e.g. in the case of a 1-bit image with a width
+of 7 pixels, the first pixel of the second scanline will the 8th bit of the first byte,
+not the first bit of a new byte.
+
+6.4. A note about 16-bits per channel and endianness
+----------------------------------------------------
+
+LodePNG uses unsigned char arrays for 16-bit per channel colors too, just like
+for any other color format. The 16-bit values are stored in big endian (most
+significant byte first) in these arrays. This is the opposite order of the
+little endian used by x86 CPU's.
+
+LodePNG always uses big endian because the PNG file format does so internally.
+Conversions to other formats than PNG uses internally are not supported by
+LodePNG on purpose, there are myriads of formats, including endianness of 16-bit
+colors, the order in which you store R, G, B and A, and so on. Supporting and
+converting to/from all that is outside the scope of LodePNG.
+
+This may mean that, depending on your use case, you may want to convert the big
+endian output of LodePNG to little endian with a for loop. This is certainly not
+always needed, many applications and libraries support big endian 16-bit colors
+anyway, but it means you cannot simply cast the unsigned char* buffer to an
+unsigned short* buffer on x86 CPUs.
+
+
+7. error values
+---------------
+
+All functions in LodePNG that return an error code, return 0 if everything went
+OK, or a non-zero code if there was an error.
+
+The meaning of the LodePNG error values can be retrieved with the function
+lodepng_error_text: given the numerical error code, it returns a description
+of the error in English as a string.
+
+Check the implementation of lodepng_error_text to see the meaning of each code.
+
+
+8. chunks and PNG editing
+-------------------------
+
+If you want to add extra chunks to a PNG you encode, or use LodePNG for a PNG
+editor that should follow the rules about handling of unknown chunks, or if your
+program is able to read other types of chunks than the ones handled by LodePNG,
+then that's possible with the chunk functions of LodePNG.
+
+A PNG chunk has the following layout:
+
+4 bytes length
+4 bytes type name
+length bytes data
+4 bytes CRC
+
+8.1. iterating through chunks
+-----------------------------
+
+If you have a buffer containing the PNG image data, then the first chunk (the
+IHDR chunk) starts at byte number 8 of that buffer. The first 8 bytes are the
+signature of the PNG and are not part of a chunk. But if you start at byte 8
+then you have a chunk, and can check the following things of it.
+
+NOTE: none of these functions check for memory buffer boundaries. To avoid
+exploits, always make sure the buffer contains all the data of the chunks.
+When using lodepng_chunk_next, make sure the returned value is within the
+allocated memory.
+
+unsigned lodepng_chunk_length(const unsigned char* chunk):
+
+Get the length of the chunk's data. The total chunk length is this length + 12.
+
+void lodepng_chunk_type(char type[5], const unsigned char* chunk):
+unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type):
+
+Get the type of the chunk or compare if it's a certain type
+
+unsigned char lodepng_chunk_critical(const unsigned char* chunk):
+unsigned char lodepng_chunk_private(const unsigned char* chunk):
+unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk):
+
+Check if the chunk is critical in the PNG standard (only IHDR, PLTE, IDAT and IEND are).
+Check if the chunk is private (public chunks are part of the standard, private ones not).
+Check if the chunk is safe to copy. If it's not, then, when modifying data in a critical
+chunk, unsafe to copy chunks of the old image may NOT be saved in the new one if your
+program doesn't handle that type of unknown chunk.
+
+unsigned char* lodepng_chunk_data(unsigned char* chunk):
+const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk):
+
+Get a pointer to the start of the data of the chunk.
+
+unsigned lodepng_chunk_check_crc(const unsigned char* chunk):
+void lodepng_chunk_generate_crc(unsigned char* chunk):
+
+Check if the crc is correct or generate a correct one.
+
+unsigned char* lodepng_chunk_next(unsigned char* chunk):
+const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk):
+
+Iterate to the next chunk. This works if you have a buffer with consecutive chunks. Note that these
+functions do no boundary checking of the allocated data whatsoever, so make sure there is enough
+data available in the buffer to be able to go to the next chunk.
+
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outsize, const unsigned char* chunk):
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outsize, unsigned length,
+ const char* type, const unsigned char* data):
+
+These functions are used to create new chunks that are appended to the data in *out that has
+length *outsize. The append function appends an existing chunk to the new data. The create
+function creates a new chunk with the given parameters and appends it. Type is the 4-letter
+name of the chunk.
+
+8.2. chunks in info_png
+-----------------------
+
+The LodePNGInfo struct contains fields with the unknown chunk in it. It has 3
+buffers (each with size) to contain 3 types of unknown chunks:
+the ones that come before the PLTE chunk, the ones that come between the PLTE
+and the IDAT chunks, and the ones that come after the IDAT chunks.
+It's necessary to make the distinction between these 3 cases because the PNG
+standard forces to keep the ordering of unknown chunks compared to the critical
+chunks, but does not force any other ordering rules.
+
+info_png.unknown_chunks_data[0] is the chunks before PLTE
+info_png.unknown_chunks_data[1] is the chunks after PLTE, before IDAT
+info_png.unknown_chunks_data[2] is the chunks after IDAT
+
+The chunks in these 3 buffers can be iterated through and read by using the same
+way described in the previous subchapter.
+
+When using the decoder to decode a PNG, you can make it store all unknown chunks
+if you set the option settings.remember_unknown_chunks to 1. By default, this
+option is off (0).
+
+The encoder will always encode unknown chunks that are stored in the info_png.
+If you need it to add a particular chunk that isn't known by LodePNG, you can
+use lodepng_chunk_append or lodepng_chunk_create to the chunk data in
+info_png.unknown_chunks_data[x].
+
+Chunks that are known by LodePNG should not be added in that way. E.g. to make
+LodePNG add a bKGD chunk, set background_defined to true and add the correct
+parameters there instead.
+
+
+9. compiler support
+-------------------
+
+No libraries other than the current standard C library are needed to compile
+LodePNG. For the C++ version, only the standard C++ library is needed on top.
+Add the files lodepng.c(pp) and lodepng.h to your project, include
+lodepng.h where needed, and your program can read/write PNG files.
+
+It is compatible with C90 and up, and C++03 and up.
+
+If performance is important, use optimization when compiling! For both the
+encoder and decoder, this makes a large difference.
+
+Make sure that LodePNG is compiled with the same compiler of the same version
+and with the same settings as the rest of the program, or the interfaces with
+std::vectors and std::strings in C++ can be incompatible.
+
+CHAR_BITS must be 8 or higher, because LodePNG uses unsigned chars for octets.
+
+*) gcc and g++
+
+LodePNG is developed in gcc so this compiler is natively supported. It gives no
+warnings with compiler options "-Wall -Wextra -pedantic -ansi", with gcc and g++
+version 4.7.1 on Linux, 32-bit and 64-bit.
+
+*) Clang
+
+Fully supported and warning-free.
+
+*) Mingw
+
+The Mingw compiler (a port of gcc for Windows) should be fully supported by
+LodePNG.
+
+*) Visual Studio and Visual C++ Express Edition
+
+LodePNG should be warning-free with warning level W4. Two warnings were disabled
+with pragmas though: warning 4244 about implicit conversions, and warning 4996
+where it wants to use a non-standard function fopen_s instead of the standard C
+fopen.
+
+Visual Studio may want "stdafx.h" files to be included in each source file and
+give an error "unexpected end of file while looking for precompiled header".
+This is not standard C++ and will not be added to the stock LodePNG. You can
+disable it for lodepng.cpp only by right clicking it, Properties, C/C++,
+Precompiled Headers, and set it to Not Using Precompiled Headers there.
+
+NOTE: Modern versions of VS should be fully supported, but old versions, e.g.
+VS6, are not guaranteed to work.
+
+*) Compilers on Macintosh
+
+LodePNG has been reported to work both with gcc and LLVM for Macintosh, both for
+C and C++.
+
+*) Other Compilers
+
+If you encounter problems on any compilers, feel free to let me know and I may
+try to fix it if the compiler is modern and standards compliant.
+
+
+10. examples
+------------
+
+This decoder example shows the most basic usage of LodePNG. More complex
+examples can be found on the LodePNG website.
+
+10.1. decoder C++ example
+-------------------------
+
+#include "lodepng.h"
+#include <iostream>
+
+int main(int argc, char *argv[]) {
+ const char* filename = argc > 1 ? argv[1] : "test.png";
+
+ //load and decode
+ std::vector<unsigned char> image;
+ unsigned width, height;
+ unsigned error = lodepng::decode(image, width, height, filename);
+
+ //if there's an error, display it
+ if(error) std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl;
+
+ //the pixels are now in the vector "image", 4 bytes per pixel, ordered RGBARGBA..., use it as texture, draw it, ...
+}
+
+10.2. decoder C example
+-----------------------
+
+#include "lodepng.h"
+
+int main(int argc, char *argv[]) {
+ unsigned error;
+ unsigned char* image;
+ size_t width, height;
+ const char* filename = argc > 1 ? argv[1] : "test.png";
+
+ error = lodepng_decode32_file(&image, &width, &height, filename);
+
+ if(error) printf("decoder error %u: %s\n", error, lodepng_error_text(error));
+
+ / * use image here * /
+
+ free(image);
+ return 0;
+}
+
+11. state settings reference
+----------------------------
+
+A quick reference of some settings to set on the LodePNGState
+
+For decoding:
+
+state.decoder.zlibsettings.ignore_adler32: ignore ADLER32 checksums
+state.decoder.zlibsettings.custom_...: use custom inflate function
+state.decoder.ignore_crc: ignore CRC checksums
+state.decoder.ignore_critical: ignore unknown critical chunks
+state.decoder.ignore_end: ignore missing IEND chunk. May fail if this corruption causes other errors
+state.decoder.color_convert: convert internal PNG color to chosen one
+state.decoder.read_text_chunks: whether to read in text metadata chunks
+state.decoder.remember_unknown_chunks: whether to read in unknown chunks
+state.info_raw.colortype: desired color type for decoded image
+state.info_raw.bitdepth: desired bit depth for decoded image
+state.info_raw....: more color settings, see struct LodePNGColorMode
+state.info_png....: no settings for decoder but ouput, see struct LodePNGInfo
+
+For encoding:
+
+state.encoder.zlibsettings.btype: disable compression by setting it to 0
+state.encoder.zlibsettings.use_lz77: use LZ77 in compression
+state.encoder.zlibsettings.windowsize: tweak LZ77 windowsize
+state.encoder.zlibsettings.minmatch: tweak min LZ77 length to match
+state.encoder.zlibsettings.nicematch: tweak LZ77 match where to stop searching
+state.encoder.zlibsettings.lazymatching: try one more LZ77 matching
+state.encoder.zlibsettings.custom_...: use custom deflate function
+state.encoder.auto_convert: choose optimal PNG color type, if 0 uses info_png
+state.encoder.filter_palette_zero: PNG filter strategy for palette
+state.encoder.filter_strategy: PNG filter strategy to encode with
+state.encoder.force_palette: add palette even if not encoding to one
+state.encoder.add_id: add LodePNG identifier and version as a text chunk
+state.encoder.text_compression: use compressed text chunks for metadata
+state.info_raw.colortype: color type of raw input image you provide
+state.info_raw.bitdepth: bit depth of raw input image you provide
+state.info_raw: more color settings, see struct LodePNGColorMode
+state.info_png.color.colortype: desired color type if auto_convert is false
+state.info_png.color.bitdepth: desired bit depth if auto_convert is false
+state.info_png.color....: more color settings, see struct LodePNGColorMode
+state.info_png....: more PNG related settings, see struct LodePNGInfo
+
+
+12. changes
+-----------
+
+The version number of LodePNG is the date of the change given in the format
+yyyymmdd.
+
+Some changes aren't backwards compatible. Those are indicated with a (!)
+symbol.
+
+Not all changes are listed here, the commit history in github lists more:
+https://github.com/lvandeve/lodepng
+
+*) 06 mar 2020: simplified some of the dynamic memory allocations.
+*) 12 jan 2020: (!) added 'end' argument to lodepng_chunk_next to allow correct
+ overflow checks.
+*) 14 aug 2019: around 25% faster decoding thanks to huffman lookup tables.
+*) 15 jun 2019: (!) auto_choose_color API changed (for bugfix: don't use palette
+ if gray ICC profile) and non-ICC LodePNGColorProfile renamed to
+ LodePNGColorStats.
+*) 30 dec 2018: code style changes only: removed newlines before opening braces.
+*) 10 sep 2018: added way to inspect metadata chunks without full decoding.
+*) 19 aug 2018: (!) fixed color mode bKGD is encoded with and made it use
+ palette index in case of palette.
+*) 10 aug 2018: (!) added support for gAMA, cHRM, sRGB and iCCP chunks. This
+ change is backwards compatible unless you relied on unknown_chunks for those.
+*) 11 jun 2018: less restrictive check for pixel size integer overflow
+*) 14 jan 2018: allow optionally ignoring a few more recoverable errors
+*) 17 sep 2017: fix memory leak for some encoder input error cases
+*) 27 nov 2016: grey+alpha auto color model detection bugfix
+*) 18 apr 2016: Changed qsort to custom stable sort (for platforms w/o qsort).
+*) 09 apr 2016: Fixed colorkey usage detection, and better file loading (within
+ the limits of pure C90).
+*) 08 dec 2015: Made load_file function return error if file can't be opened.
+*) 24 okt 2015: Bugfix with decoding to palette output.
+*) 18 apr 2015: Boundary PM instead of just package-merge for faster encoding.
+*) 24 aug 2014: Moved to github
+*) 23 aug 2014: Reduced needless memory usage of decoder.
+*) 28 jun 2014: Removed fix_png setting, always support palette OOB for
+ simplicity. Made ColorProfile public.
+*) 09 jun 2014: Faster encoder by fixing hash bug and more zeros optimization.
+*) 22 dec 2013: Power of two windowsize required for optimization.
+*) 15 apr 2013: Fixed bug with LAC_ALPHA and color key.
+*) 25 mar 2013: Added an optional feature to ignore some PNG errors (fix_png).
+*) 11 mar 2013: (!) Bugfix with custom free. Changed from "my" to "lodepng_"
+ prefix for the custom allocators and made it possible with a new #define to
+ use custom ones in your project without needing to change lodepng's code.
+*) 28 jan 2013: Bugfix with color key.
+*) 27 okt 2012: Tweaks in text chunk keyword length error handling.
+*) 8 okt 2012: (!) Added new filter strategy (entropy) and new auto color mode.
+ (no palette). Better deflate tree encoding. New compression tweak settings.
+ Faster color conversions while decoding. Some internal cleanups.
+*) 23 sep 2012: Reduced warnings in Visual Studio a little bit.
+*) 1 sep 2012: (!) Removed #define's for giving custom (de)compression functions
+ and made it work with function pointers instead.
+*) 23 jun 2012: Added more filter strategies. Made it easier to use custom alloc
+ and free functions and toggle #defines from compiler flags. Small fixes.
+*) 6 may 2012: (!) Made plugging in custom zlib/deflate functions more flexible.
+*) 22 apr 2012: (!) Made interface more consistent, renaming a lot. Removed
+ redundant C++ codec classes. Reduced amount of structs. Everything changed,
+ but it is cleaner now imho and functionality remains the same. Also fixed
+ several bugs and shrunk the implementation code. Made new samples.
+*) 6 nov 2011: (!) By default, the encoder now automatically chooses the best
+ PNG color model and bit depth, based on the amount and type of colors of the
+ raw image. For this, autoLeaveOutAlphaChannel replaced by auto_choose_color.
+*) 9 okt 2011: simpler hash chain implementation for the encoder.
+*) 8 sep 2011: lz77 encoder lazy matching instead of greedy matching.
+*) 23 aug 2011: tweaked the zlib compression parameters after benchmarking.
+ A bug with the PNG filtertype heuristic was fixed, so that it chooses much
+ better ones (it's quite significant). A setting to do an experimental, slow,
+ brute force search for PNG filter types is added.
+*) 17 aug 2011: (!) changed some C zlib related function names.
+*) 16 aug 2011: made the code less wide (max 120 characters per line).
+*) 17 apr 2011: code cleanup. Bugfixes. Convert low to 16-bit per sample colors.
+*) 21 feb 2011: fixed compiling for C90. Fixed compiling with sections disabled.
+*) 11 dec 2010: encoding is made faster, based on suggestion by Peter Eastman
+ to optimize long sequences of zeros.
+*) 13 nov 2010: added LodePNG_InfoColor_hasPaletteAlpha and
+ LodePNG_InfoColor_canHaveAlpha functions for convenience.
+*) 7 nov 2010: added LodePNG_error_text function to get error code description.
+*) 30 okt 2010: made decoding slightly faster
+*) 26 okt 2010: (!) changed some C function and struct names (more consistent).
+ Reorganized the documentation and the declaration order in the header.
+*) 08 aug 2010: only changed some comments and external samples.
+*) 05 jul 2010: fixed bug thanks to warnings in the new gcc version.
+*) 14 mar 2010: fixed bug where too much memory was allocated for char buffers.
+*) 02 sep 2008: fixed bug where it could create empty tree that linux apps could
+ read by ignoring the problem but windows apps couldn't.
+*) 06 jun 2008: added more error checks for out of memory cases.
+*) 26 apr 2008: added a few more checks here and there to ensure more safety.
+*) 06 mar 2008: crash with encoding of strings fixed
+*) 02 feb 2008: support for international text chunks added (iTXt)
+*) 23 jan 2008: small cleanups, and #defines to divide code in sections
+*) 20 jan 2008: support for unknown chunks allowing using LodePNG for an editor.
+*) 18 jan 2008: support for tIME and pHYs chunks added to encoder and decoder.
+*) 17 jan 2008: ability to encode and decode compressed zTXt chunks added
+ Also various fixes, such as in the deflate and the padding bits code.
+*) 13 jan 2008: Added ability to encode Adam7-interlaced images. Improved
+ filtering code of encoder.
+*) 07 jan 2008: (!) changed LodePNG to use ISO C90 instead of C++. A
+ C++ wrapper around this provides an interface almost identical to before.
+ Having LodePNG be pure ISO C90 makes it more portable. The C and C++ code
+ are together in these files but it works both for C and C++ compilers.
+*) 29 dec 2007: (!) changed most integer types to unsigned int + other tweaks
+*) 30 aug 2007: bug fixed which makes this Borland C++ compatible
+*) 09 aug 2007: some VS2005 warnings removed again
+*) 21 jul 2007: deflate code placed in new namespace separate from zlib code
+*) 08 jun 2007: fixed bug with 2- and 4-bit color, and small interlaced images
+*) 04 jun 2007: improved support for Visual Studio 2005: crash with accessing
+ invalid std::vector element [0] fixed, and level 3 and 4 warnings removed
+*) 02 jun 2007: made the encoder add a tag with version by default
+*) 27 may 2007: zlib and png code separated (but still in the same file),
+ simple encoder/decoder functions added for more simple usage cases
+*) 19 may 2007: minor fixes, some code cleaning, new error added (error 69),
+ moved some examples from here to lodepng_examples.cpp
+*) 12 may 2007: palette decoding bug fixed
+*) 24 apr 2007: changed the license from BSD to the zlib license
+*) 11 mar 2007: very simple addition: ability to encode bKGD chunks.
+*) 04 mar 2007: (!) tEXt chunk related fixes, and support for encoding
+ palettized PNG images. Plus little interface change with palette and texts.
+*) 03 mar 2007: Made it encode dynamic Huffman shorter with repeat codes.
+ Fixed a bug where the end code of a block had length 0 in the Huffman tree.
+*) 26 feb 2007: Huffman compression with dynamic trees (BTYPE 2) now implemented
+ and supported by the encoder, resulting in smaller PNGs at the output.
+*) 27 jan 2007: Made the Adler-32 test faster so that a timewaste is gone.
+*) 24 jan 2007: gave encoder an error interface. Added color conversion from any
+ greyscale type to 8-bit greyscale with or without alpha.
+*) 21 jan 2007: (!) Totally changed the interface. It allows more color types
+ to convert to and is more uniform. See the manual for how it works now.
+*) 07 jan 2007: Some cleanup & fixes, and a few changes over the last days:
+ encode/decode custom tEXt chunks, separate classes for zlib & deflate, and
+ at last made the decoder give errors for incorrect Adler32 or Crc.
+*) 01 jan 2007: Fixed bug with encoding PNGs with less than 8 bits per channel.
+*) 29 dec 2006: Added support for encoding images without alpha channel, and
+ cleaned out code as well as making certain parts faster.
+*) 28 dec 2006: Added "Settings" to the encoder.
+*) 26 dec 2006: The encoder now does LZ77 encoding and produces much smaller files now.
+ Removed some code duplication in the decoder. Fixed little bug in an example.
+*) 09 dec 2006: (!) Placed output parameters of public functions as first parameter.
+ Fixed a bug of the decoder with 16-bit per color.
+*) 15 okt 2006: Changed documentation structure
+*) 09 okt 2006: Encoder class added. It encodes a valid PNG image from the
+ given image buffer, however for now it's not compressed.
+*) 08 sep 2006: (!) Changed to interface with a Decoder class
+*) 30 jul 2006: (!) LodePNG_InfoPng , width and height are now retrieved in different
+ way. Renamed decodePNG to decodePNGGeneric.
+*) 29 jul 2006: (!) Changed the interface: image info is now returned as a
+ struct of type LodePNG::LodePNG_Info, instead of a vector, which was a bit clumsy.
+*) 28 jul 2006: Cleaned the code and added new error checks.
+ Corrected terminology "deflate" into "inflate".
+*) 23 jun 2006: Added SDL example in the documentation in the header, this
+ example allows easy debugging by displaying the PNG and its transparency.
+*) 22 jun 2006: (!) Changed way to obtain error value. Added
+ loadFile function for convenience. Made decodePNG32 faster.
+*) 21 jun 2006: (!) Changed type of info vector to unsigned.
+ Changed position of palette in info vector. Fixed an important bug that
+ happened on PNGs with an uncompressed block.
+*) 16 jun 2006: Internally changed unsigned into unsigned where
+ needed, and performed some optimizations.
+*) 07 jun 2006: (!) Renamed functions to decodePNG and placed them
+ in LodePNG namespace. Changed the order of the parameters. Rewrote the
+ documentation in the header. Renamed files to lodepng.cpp and lodepng.h
+*) 22 apr 2006: Optimized and improved some code
+*) 07 sep 2005: (!) Changed to std::vector interface
+*) 12 aug 2005: Initial release (C++, decoder only)
+
+
+13. contact information
+-----------------------
+
+Feel free to contact me with suggestions, problems, comments, ... concerning
+LodePNG. If you encounter a PNG image that doesn't work properly with this
+decoder, feel free to send it and I'll use it to find and fix the problem.
+
+My email address is (puzzle the account and domain together with an @ symbol):
+Domain: gmail dot com.
+Account: lode dot vandevenne.
+
+
+Copyright (c) 2005-2020 Lode Vandevenne
+*/
diff --git a/makefile b/makefile
new file mode 100644
index 0000000..6911348
--- /dev/null
+++ b/makefile
@@ -0,0 +1,20 @@
+all: jrrtilevq jrrtilevq.exe
+
+obj/lodepng.o: lodepng.c lodepng.h
+ gcc -static -O2 -std=c99 -Wall -Wextra -Wpedantic -o obj/lodepng.o -c lodepng.c
+
+obj/jrrtilevq-cli.o: jrrtilevq-cli.c jrrtilevq.h lodepng.h
+ gcc -static -O2 -std=c99 -Wall -Wextra -Wpedantic -o obj/jrrtilevq-cli.o -c jrrtilevq-cli.c
+
+jrrtilevq: obj/lodepng.o obj/jrrtilevq-cli.o
+ gcc -static -O2 -std=c99 -Wall -Wextra -Wpedantic -o jrrtilevq obj/lodepng.o obj/jrrtilevq-cli.o
+
+
+obj/lodepng.mingw32.o: lodepng.c lodepng.h
+ x86_64-w64-mingw32-gcc -static -O2 -std=c99 -Wall -Wextra -Wpedantic -o obj/lodepng.mingw32.o -c lodepng.c
+
+obj/jrrtilevq-cli.mingw32.o: jrrtilevq-cli.c jrrtilevq.h lodepng.h
+ x86_64-w64-mingw32-gcc -static -O2 -std=c99 -Wall -Wextra -Wpedantic -o obj/jrrtilevq-cli.mingw32.o -c jrrtilevq-cli.c
+
+jrrtilevq.exe: obj/lodepng.mingw32.o obj/jrrtilevq-cli.mingw32.o
+ x86_64-w64-mingw32-gcc -static -O2 -std=c99 -Wall -Wextra -Wpedantic -o jrrtilevq.exe obj/lodepng.mingw32.o obj/jrrtilevq-cli.mingw32.o
diff --git a/xxhash.h b/xxhash.h
new file mode 100644
index 0000000..763b965
--- /dev/null
+++ b/xxhash.h
@@ -0,0 +1,5451 @@
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Header File
+ * Copyright (C) 2012-2020 Yann Collet
+ *
+ * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at:
+ * - xxHash homepage: https://www.xxhash.com
+ * - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+/*!
+ * @mainpage xxHash
+ *
+ * @file xxhash.h
+ * xxHash prototypes and implementation
+ */
+/* TODO: update */
+/* Notice extracted from xxHash homepage:
+
+xxHash is an extremely fast hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name Speed Q.Score Author
+xxHash 5.4 GB/s 10
+CrapWow 3.2 GB/s 2 Andrew
+MurmurHash 3a 2.7 GB/s 10 Austin Appleby
+SpookyHash 2.0 GB/s 10 Bob Jenkins
+SBox 1.4 GB/s 9 Bret Mulvey
+Lookup3 1.2 GB/s 9 Bob Jenkins
+SuperFastHash 1.2 GB/s 1 Paul Hsieh
+CityHash64 1.05 GB/s 10 Pike & Alakuijala
+FNV 0.55 GB/s 5 Fowler, Noll, Vo
+CRC32 0.43 GB/s 9
+MD5-32 0.33 GB/s 10 Ronald L. Rivest
+SHA1-32 0.28 GB/s 10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+Note: SMHasher's CRC32 implementation is not the fastest one.
+Other speed-oriented implementations can be faster,
+especially in combination with PCLMUL instruction:
+https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
+
+A 64-bit version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bit applications only.
+Name Speed on 64 bits Speed on 32 bits
+XXH64 13.8 GB/s 1.9 GB/s
+XXH32 6.8 GB/s 6.0 GB/s
+*/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* ****************************
+ * INLINE mode
+ ******************************/
+/*!
+ * XXH_INLINE_ALL (and XXH_PRIVATE_API)
+ * Use these build macros to inline xxhash into the target unit.
+ * Inlining improves performance on small inputs, especially when the length is
+ * expressed as a compile-time constant:
+ *
+ * https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
+ *
+ * It also keeps xxHash symbols private to the unit, so they are not exported.
+ *
+ * Usage:
+ * #define XXH_INLINE_ALL
+ * #include "xxhash.h"
+ *
+ * Do not compile and link xxhash.o as a separate object, as it is not useful.
+ */
+#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
+ && !defined(XXH_INLINE_ALL_31684351384)
+ /* this section should be traversed only once */
+# define XXH_INLINE_ALL_31684351384
+ /* give access to the advanced API, required to compile implementations */
+# undef XXH_STATIC_LINKING_ONLY /* avoid macro redef */
+# define XXH_STATIC_LINKING_ONLY
+ /* make all functions private */
+# undef XXH_PUBLIC_API
+# if defined(__GNUC__)
+# define XXH_PUBLIC_API static __inline __attribute__((unused))
+# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# define XXH_PUBLIC_API static inline
+# elif defined(_MSC_VER)
+# define XXH_PUBLIC_API static __inline
+# else
+ /* note: this version may generate warnings for unused static functions */
+# define XXH_PUBLIC_API static
+# endif
+
+ /*
+ * This part deals with the special case where a unit wants to inline xxHash,
+ * but "xxhash.h" has previously been included without XXH_INLINE_ALL, such
+ * as part of some previously included *.h header file.
+ * Without further action, the new include would just be ignored,
+ * and functions would effectively _not_ be inlined (silent failure).
+ * The following macros solve this situation by prefixing all inlined names,
+ * avoiding naming collision with previous inclusions.
+ */
+# ifdef XXH_NAMESPACE
+# error "XXH_INLINE_ALL with XXH_NAMESPACE is not supported"
+ /*
+ * Note: Alternative: #undef all symbols (it's a pretty large list).
+ * Without #error: it compiles, but functions are actually not inlined.
+ */
+# endif
+# define XXH_NAMESPACE XXH_INLINE_
+ /*
+ * Some identifiers (enums, type names) are not symbols, but they must
+ * still be renamed to avoid redeclaration.
+ * Alternative solution: do not redeclare them.
+ * However, this requires some #ifdefs, and is a more dispersed action.
+ * Meanwhile, renaming can be achieved in a single block
+ */
+# define XXH_IPREF(Id) XXH_INLINE_ ## Id
+# define XXH_OK XXH_IPREF(XXH_OK)
+# define XXH_ERROR XXH_IPREF(XXH_ERROR)
+# define XXH_errorcode XXH_IPREF(XXH_errorcode)
+# define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t)
+# define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t)
+# define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)
+# define XXH32_state_s XXH_IPREF(XXH32_state_s)
+# define XXH32_state_t XXH_IPREF(XXH32_state_t)
+# define XXH64_state_s XXH_IPREF(XXH64_state_s)
+# define XXH64_state_t XXH_IPREF(XXH64_state_t)
+# define XXH3_state_s XXH_IPREF(XXH3_state_s)
+# define XXH3_state_t XXH_IPREF(XXH3_state_t)
+# define XXH128_hash_t XXH_IPREF(XXH128_hash_t)
+ /* Ensure the header is parsed again, even if it was previously included */
+# undef XXHASH_H_5627135585666179
+# undef XXHASH_H_STATIC_13879238742
+#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
+
+
+
+/* ****************************************************************
+ * Stable API
+ *****************************************************************/
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
+
+
+/*!
+ * @defgroup public Public API
+ * Contains details on the public xxHash functions.
+ * @{
+ */
+/* specific declaration modes for Windows */
+#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
+# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
+# ifdef XXH_EXPORT
+# define XXH_PUBLIC_API __declspec(dllexport)
+# elif XXH_IMPORT
+# define XXH_PUBLIC_API __declspec(dllimport)
+# endif
+# else
+# define XXH_PUBLIC_API /* do nothing */
+# endif
+#endif
+
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Emulate a namespace by transparently prefixing all symbols.
+ *
+ * If you want to include _and expose_ xxHash functions from within your own
+ * library, but also want to avoid symbol collisions with other libraries which
+ * may also include xxHash, you can use XXH_NAMESPACE to automatically prefix
+ * any public symbol from xxhash library with the value of XXH_NAMESPACE
+ * (therefore, avoid empty or numeric values).
+ *
+ * Note that no change is required within the calling program as long as it
+ * includes `xxhash.h`: Regular symbol names will be automatically translated
+ * by this header.
+ */
+# define XXH_NAMESPACE /* YOUR NAME HERE */
+# undef XXH_NAMESPACE
+#endif
+
+#ifdef XXH_NAMESPACE
+# define XXH_CAT(A,B) A##B
+# define XXH_NAME2(A,B) XXH_CAT(A,B)
+# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+/* XXH32 */
+# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
+# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
+/* XXH64 */
+# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
+# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
+# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+/* XXH3_64bits */
+# define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
+# define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
+# define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
+# define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
+# define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
+# define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
+# define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
+# define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
+# define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
+# define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
+# define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
+# define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
+/* XXH3_128bits */
+# define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
+# define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
+# define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
+# define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
+# define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
+# define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
+# define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
+# define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
+# define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
+# define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
+# define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
+# define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
+# define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
+#endif
+
+
+/* *************************************
+* Version
+***************************************/
+#define XXH_VERSION_MAJOR 0
+#define XXH_VERSION_MINOR 8
+#define XXH_VERSION_RELEASE 0
+#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+
+/*!
+ * @brief Obtains the xxHash version.
+ *
+ * This is only useful when xxHash is compiled as a shared library, as it is
+ * independent of the version defined in the header.
+ *
+ * @return `XXH_VERSION_NUMBER` as of when the function was compiled.
+ */
+XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+
+
+/* ****************************
+* Definitions
+******************************/
+#include <stddef.h> /* size_t */
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/*-**********************************************************************
+* 32-bit hash
+************************************************************************/
+#if defined(XXH_DOXYGEN) /* Don't show <stdint.h> include */
+/*!
+ * @brief An unsigned 32-bit integer.
+ *
+ * Not necessarily defined to `uint32_t` but functionally equivalent.
+ */
+typedef uint32_t XXH32_hash_t;
+#elif !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+ typedef uint32_t XXH32_hash_t;
+#else
+# include <limits.h>
+# if UINT_MAX == 0xFFFFFFFFUL
+ typedef unsigned int XXH32_hash_t;
+# else
+# if ULONG_MAX == 0xFFFFFFFFUL
+ typedef unsigned long XXH32_hash_t;
+# else
+# error "unsupported platform: need a 32-bit type"
+# endif
+# endif
+#endif
+
+/*!
+ * @}
+ *
+ * @defgroup xxh32_family XXH32 family
+ * @ingroup public
+ * Contains functions used in the classic 32-bit xxHash algorithm.
+ *
+ * @note
+ * XXH32 is considered rather weak by today's standards.
+ * The @ref xxh3_family provides competitive speed for both 32-bit and 64-bit
+ * systems, and offers true 64/128 bit hash results. It provides a superior
+ * level of dispersion, and greatly reduces the risks of collisions.
+ *
+ * @see @ref xxh64_family, @ref xxh3_family : Other xxHash families
+ * @see @ref xxh32_impl for implementation details
+ * @{
+ */
+
+/*!
+ * @brief Calculates the 32-bit hash of @p input using xxHash32.
+ *
+ * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s
+ *
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ * @param seed The 32-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 32-bit hash value.
+ *
+ * @see
+ * XXH64(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
+ * Direct equivalents for the other variants of xxHash.
+ * @see
+ * XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version.
+ */
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
+
+/*!
+ * Streaming functions generate the xxHash value from an incrememtal input.
+ * This method is slower than single-call functions, due to state management.
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ *
+ * An XXH state must first be allocated using `XXH*_createState()`.
+ *
+ * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
+ *
+ * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
+ *
+ * The function returns an error code, with 0 meaning OK, and any other value
+ * meaning there is an error.
+ *
+ * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
+ * This function returns the nn-bits hash as an int or long long.
+ *
+ * It's still possible to continue inserting input into the hash state after a
+ * digest, and generate new hash values later on by invoking `XXH*_digest()`.
+ *
+ * When done, release the state using `XXH*_freeState()`.
+ *
+ * Example code for incrementally hashing a file:
+ * @code{.c}
+ * #include <stdio.h>
+ * #include <xxhash.h>
+ * #define BUFFER_SIZE 256
+ *
+ * // Note: XXH64 and XXH3 use the same interface.
+ * XXH32_hash_t
+ * hashFile(FILE* stream)
+ * {
+ * XXH32_state_t* state;
+ * unsigned char buf[BUFFER_SIZE];
+ * size_t amt;
+ * XXH32_hash_t hash;
+ *
+ * state = XXH32_createState(); // Create a state
+ * assert(state != NULL); // Error check here
+ * XXH32_reset(state, 0xbaad5eed); // Reset state with our seed
+ * while ((amt = fread(buf, 1, sizeof(buf), stream)) != 0) {
+ * XXH32_update(state, buf, amt); // Hash the file in chunks
+ * }
+ * hash = XXH32_digest(state); // Finalize the hash
+ * XXH32_freeState(state); // Clean up
+ * return hash;
+ * }
+ * @endcode
+ */
+
+/*!
+ * @typedef struct XXH32_state_s XXH32_state_t
+ * @brief The opaque state struct for the XXH32 streaming API.
+ *
+ * @see XXH32_state_s for details.
+ */
+typedef struct XXH32_state_s XXH32_state_t;
+
+/*!
+ * @brief Allocates an @ref XXH32_state_t.
+ *
+ * Must be freed with XXH32_freeState().
+ * @return An allocated XXH32_state_t on success, `NULL` on failure.
+ */
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+/*!
+ * @brief Frees an @ref XXH32_state_t.
+ *
+ * Must be allocated with XXH32_createState().
+ * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState().
+ * @return XXH_OK.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
+/*!
+ * @brief Copies one @ref XXH32_state_t to another.
+ *
+ * @param dst_state The state to copy to.
+ * @param src_state The state to copy from.
+ * @pre
+ * @p dst_state and @p src_state must not be `NULL` and must not overlap.
+ */
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
+
+/*!
+ * @brief Resets an @ref XXH32_state_t to begin a new hash.
+ *
+ * This function resets and seeds a state. Call it before @ref XXH32_update().
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed The 32-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH32_state_t.
+ *
+ * Call this to incrementally consume blocks of data.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ * @pre
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+
+/*!
+ * @brief Returns the calculated hash value from an @ref XXH32_state_t.
+ *
+ * @note
+ * Calling XXH32_digest() will not affect @p statePtr, so you can update,
+ * digest, and update again.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return The calculated xxHash32 value from that state.
+ */
+XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
+
+/******* Canonical representation *******/
+
+/*
+ * The default return values from XXH functions are unsigned 32 and 64 bit
+ * integers.
+ * This the simplest and fastest format for further post-processing.
+ *
+ * However, this leaves open the question of what is the order on the byte level,
+ * since little and big endian conventions will store the same number differently.
+ *
+ * The canonical representation settles this issue by mandating big-endian
+ * convention, the same convention as human-readable numbers (large digits first).
+ *
+ * When writing hash values to storage, sending them over a network, or printing
+ * them, it's highly recommended to use the canonical representation to ensure
+ * portability across a wider range of systems, present and future.
+ *
+ * The following functions allow transformation of hash values to and from
+ * canonical format.
+ */
+
+/*!
+ * @brief Canonical (big endian) representation of @ref XXH32_hash_t.
+ */
+typedef struct {
+ unsigned char digest[4]; /*!< Hash bytes, big endian */
+} XXH32_canonical_t;
+
+/*!
+ * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t.
+ *
+ * @param dst The @ref XXH32_canonical_t pointer to be stored to.
+ * @param hash The @ref XXH32_hash_t to be converted.
+ *
+ * @pre
+ * @p dst must not be `NULL`.
+ */
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+
+/*!
+ * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t.
+ *
+ * @param src The @ref XXH32_canonical_t to convert.
+ *
+ * @pre
+ * @p src must not be `NULL`.
+ *
+ * @return The converted hash.
+ */
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+
+
+/*!
+ * @}
+ * @ingroup public
+ * @{
+ */
+
+#ifndef XXH_NO_LONG_LONG
+/*-**********************************************************************
+* 64-bit hash
+************************************************************************/
+#if defined(XXH_DOXYGEN) /* don't include <stdint.h> */
+/*!
+ * @brief An unsigned 64-bit integer.
+ *
+ * Not necessarily defined to `uint64_t` but functionally equivalent.
+ */
+typedef uint64_t XXH64_hash_t;
+#elif !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+ typedef uint64_t XXH64_hash_t;
+#else
+# include <limits.h>
+# if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL
+ /* LP64 ABI says uint64_t is unsigned long */
+ typedef unsigned long XXH64_hash_t;
+# else
+ /* the following type must have a width of 64-bit */
+ typedef unsigned long long XXH64_hash_t;
+# endif
+#endif
+
+/*!
+ * @}
+ *
+ * @defgroup xxh64_family XXH64 family
+ * @ingroup public
+ * @{
+ * Contains functions used in the classic 64-bit xxHash algorithm.
+ *
+ * @note
+ * XXH3 provides competitive speed for both 32-bit and 64-bit systems,
+ * and offers true 64/128 bit hash results. It provides a superior level of
+ * dispersion, and greatly reduces the risks of collisions.
+ */
+
+
+/*!
+ * @brief Calculates the 64-bit hash of @p input using xxHash64.
+ *
+ * This function usually runs faster on 64-bit systems, but slower on 32-bit
+ * systems (see benchmark).
+ *
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ * @param seed The 64-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 64-bit hash.
+ *
+ * @see
+ * XXH32(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
+ * Direct equivalents for the other variants of xxHash.
+ * @see
+ * XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.
+ */
+XXH_PUBLIC_API XXH64_hash_t XXH64(const void* input, size_t length, XXH64_hash_t seed);
+
+/******* Streaming *******/
+/*!
+ * @brief The opaque state struct for the XXH64 streaming API.
+ *
+ * @see XXH64_state_s for details.
+ */
+typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
+
+/******* Canonical representation *******/
+typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+
+/*!
+ * @}
+ * ************************************************************************
+ * @defgroup xxh3_family XXH3 family
+ * @ingroup public
+ * @{
+ *
+ * XXH3 is a more recent hash algorithm featuring:
+ * - Improved speed for both small and large inputs
+ * - True 64-bit and 128-bit outputs
+ * - SIMD acceleration
+ * - Improved 32-bit viability
+ *
+ * Speed analysis methodology is explained here:
+ *
+ * https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
+ *
+ * Compared to XXH64, expect XXH3 to run approximately
+ * ~2x faster on large inputs and >3x faster on small ones,
+ * exact differences vary depending on platform.
+ *
+ * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
+ * but does not require it.
+ * Any 32-bit and 64-bit targets that can run XXH32 smoothly
+ * can run XXH3 at competitive speeds, even without vector support.
+ * Further details are explained in the implementation.
+ *
+ * Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8,
+ * ZVector and scalar targets. This can be controlled via the XXH_VECTOR macro.
+ *
+ * XXH3 implementation is portable:
+ * it has a generic C90 formulation that can be compiled on any platform,
+ * all implementations generage exactly the same hash value on all platforms.
+ * Starting from v0.8.0, it's also labelled "stable", meaning that
+ * any future version will also generate the same hash value.
+ *
+ * XXH3 offers 2 variants, _64bits and _128bits.
+ *
+ * When only 64 bits are needed, prefer invoking the _64bits variant, as it
+ * reduces the amount of mixing, resulting in faster speed on small inputs.
+ * It's also generally simpler to manipulate a scalar return type than a struct.
+ *
+ * The API supports one-shot hashing, streaming mode, and custom secrets.
+ */
+
+/*-**********************************************************************
+* XXH3 64-bit variant
+************************************************************************/
+
+/* XXH3_64bits():
+ * default 64-bit variant, using default secret and default seed of 0.
+ * It's the fastest variant. */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len);
+
+/*
+ * XXH3_64bits_withSeed():
+ * This variant generates a custom secret on the fly
+ * based on default secret altered using the `seed` value.
+ * While this operation is decently fast, note that it's not completely free.
+ * Note: seed==0 produces the same results as XXH3_64bits().
+ */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
+
+/*!
+ * The bare minimum size for a custom secret.
+ *
+ * @see
+ * XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(),
+ * XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret().
+ */
+#define XXH3_SECRET_SIZE_MIN 136
+
+/*
+ * XXH3_64bits_withSecret():
+ * It's possible to provide any blob of bytes as a "secret" to generate the hash.
+ * This makes it more difficult for an external actor to prepare an intentional collision.
+ * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
+ * However, the quality of produced hash values depends on secret's entropy.
+ * Technically, the secret must look like a bunch of random bytes.
+ * Avoid "trivial" or structured data such as repeated sequences or a text document.
+ * Whenever unsure about the "randomness" of the blob of bytes,
+ * consider relabelling it as a "custom seed" instead,
+ * and employ "XXH3_generateSecret()" (see below)
+ * to generate a high entropy secret derived from the custom seed.
+ */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
+
+
+/******* Streaming *******/
+/*
+ * Streaming requires state maintenance.
+ * This operation costs memory and CPU.
+ * As a consequence, streaming is slower than one-shot hashing.
+ * For better performance, prefer one-shot functions whenever applicable.
+ */
+
+/*!
+ * @brief The state struct for the XXH3 streaming API.
+ *
+ * @see XXH3_state_s for details.
+ */
+typedef struct XXH3_state_s XXH3_state_t;
+XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void);
+XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
+XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state);
+
+/*
+ * XXH3_64bits_reset():
+ * Initialize with default parameters.
+ * digest will be equivalent to `XXH3_64bits()`.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr);
+/*
+ * XXH3_64bits_reset_withSeed():
+ * Generate a custom secret from `seed`, and store it into `statePtr`.
+ * digest will be equivalent to `XXH3_64bits_withSeed()`.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
+/*
+ * XXH3_64bits_reset_withSecret():
+ * `secret` is referenced, it _must outlive_ the hash streaming session.
+ * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`,
+ * and the quality of produced hash values depends on secret's entropy
+ * (secret's content should look like a bunch of random bytes).
+ * When in doubt about the randomness of a candidate `secret`,
+ * consider employing `XXH3_generateSecret()` instead (see below).
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
+
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* statePtr);
+
+/* note : canonical representation of XXH3 is the same as XXH64
+ * since they both produce XXH64_hash_t values */
+
+
+/*-**********************************************************************
+* XXH3 128-bit variant
+************************************************************************/
+
+/*!
+ * @brief The return value from 128-bit hashes.
+ *
+ * Stored in little endian order, although the fields themselves are in native
+ * endianness.
+ */
+typedef struct {
+ XXH64_hash_t low64; /*!< `value & 0xFFFFFFFFFFFFFFFF` */
+ XXH64_hash_t high64; /*!< `value >> 64` */
+} XXH128_hash_t;
+
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len);
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
+
+/******* Streaming *******/
+/*
+ * Streaming requires state maintenance.
+ * This operation costs memory and CPU.
+ * As a consequence, streaming is slower than one-shot hashing.
+ * For better performance, prefer one-shot functions whenever applicable.
+ *
+ * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits().
+ * Use already declared XXH3_createState() and XXH3_freeState().
+ *
+ * All reset and streaming functions have same meaning as their 64-bit counterpart.
+ */
+
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH3_state_t* statePtr);
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
+
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
+
+/* Following helper functions make it possible to compare XXH128_hast_t values.
+ * Since XXH128_hash_t is a structure, this capability is not offered by the language.
+ * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */
+
+/*!
+ * XXH128_isEqual():
+ * Return: 1 if `h1` and `h2` are equal, 0 if they are not.
+ */
+XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
+
+/*!
+ * XXH128_cmp():
+ *
+ * This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
+ *
+ * return: >0 if *h128_1 > *h128_2
+ * =0 if *h128_1 == *h128_2
+ * <0 if *h128_1 < *h128_2
+ */
+XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2);
+
+
+/******* Canonical representation *******/
+typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
+XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash);
+XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);
+
+
+#endif /* XXH_NO_LONG_LONG */
+
+/*!
+ * @}
+ */
+#endif /* XXHASH_H_5627135585666179 */
+
+
+
+#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
+#define XXHASH_H_STATIC_13879238742
+/* ****************************************************************************
+ * This section contains declarations which are not guaranteed to remain stable.
+ * They may change in future versions, becoming incompatible with a different
+ * version of the library.
+ * These declarations should only be used with static linking.
+ * Never use them in association with dynamic linking!
+ ***************************************************************************** */
+
+/*
+ * These definitions are only present to allow static allocation
+ * of XXH states, on stack or in a struct, for example.
+ * Never **ever** access their members directly.
+ */
+
+/*!
+ * @internal
+ * @brief Structure for XXH32 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
+ * an opaque type. This allows fields to safely be changed.
+ *
+ * Typedef'd to @ref XXH32_state_t.
+ * Do not access the members of this struct directly.
+ * @see XXH64_state_s, XXH3_state_s
+ */
+struct XXH32_state_s {
+ XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */
+ XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
+ XXH32_hash_t v1; /*!< First accumulator lane */
+ XXH32_hash_t v2; /*!< Second accumulator lane */
+ XXH32_hash_t v3; /*!< Third accumulator lane */
+ XXH32_hash_t v4; /*!< Fourth accumulator lane */
+ XXH32_hash_t mem32[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
+ XXH32_hash_t memsize; /*!< Amount of data in @ref mem32 */
+ XXH32_hash_t reserved; /*!< Reserved field. Do not read or write to it, it may be removed. */
+}; /* typedef'd to XXH32_state_t */
+
+
+#ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */
+
+/*!
+ * @internal
+ * @brief Structure for XXH64 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
+ * an opaque type. This allows fields to safely be changed.
+ *
+ * Typedef'd to @ref XXH64_state_t.
+ * Do not access the members of this struct directly.
+ * @see XXH32_state_s, XXH3_state_s
+ */
+struct XXH64_state_s {
+ XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */
+ XXH64_hash_t v1; /*!< First accumulator lane */
+ XXH64_hash_t v2; /*!< Second accumulator lane */
+ XXH64_hash_t v3; /*!< Third accumulator lane */
+ XXH64_hash_t v4; /*!< Fourth accumulator lane */
+ XXH64_hash_t mem64[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */
+ XXH32_hash_t memsize; /*!< Amount of data in @ref mem64 */
+ XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/
+ XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it, it may be removed. */
+}; /* typedef'd to XXH64_state_t */
+
+#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11+ */
+# include <stdalign.h>
+# define XXH_ALIGN(n) alignas(n)
+#elif defined(__GNUC__)
+# define XXH_ALIGN(n) __attribute__ ((aligned(n)))
+#elif defined(_MSC_VER)
+# define XXH_ALIGN(n) __declspec(align(n))
+#else
+# define XXH_ALIGN(n) /* disabled */
+#endif
+
+/* Old GCC versions only accept the attribute after the type in structures. */
+#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \
+ && defined(__GNUC__)
+# define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
+#else
+# define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
+#endif
+
+/*!
+ * @brief The size of the internal XXH3 buffer.
+ *
+ * This is the optimal update size for incremental hashing.
+ *
+ * @see XXH3_64b_update(), XXH3_128b_update().
+ */
+#define XXH3_INTERNALBUFFER_SIZE 256
+
+/*!
+ * @brief Default size of the secret buffer (and @ref XXH3_kSecret).
+ *
+ * This is the size used in @ref XXH3_kSecret and the seeded functions.
+ *
+ * Not to be confused with @ref XXH3_SECRET_SIZE_MIN.
+ */
+#define XXH3_SECRET_DEFAULT_SIZE 192
+
+/*!
+ * @internal
+ * @brief Structure for XXH3 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
+ * an opaque type. This allows fields to safely be changed.
+ *
+ * @note **This structure has a strict alignment requirement of 64 bytes.** Do
+ * not allocate this with `malloc()` or `new`, it will not be sufficiently
+ * aligned. Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack
+ * allocation.
+ *
+ * Typedef'd to @ref XXH3_state_t.
+ * Do not access the members of this struct directly.
+ *
+ * @see XXH3_INITSTATE() for stack initialization.
+ * @see XXH3_createState(), XXH3_freeState().
+ * @see XXH32_state_s, XXH64_state_s
+ */
+struct XXH3_state_s {
+ XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
+ /*!< The 8 accumulators. Similar to `vN` in @ref XXH32_state_s::v1 and @ref XXH64_state_s */
+ XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
+ /*!< Used to store a custom secret generated from a seed. */
+ XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
+ /*!< The internal buffer. @see XXH32_state_s::mem32 */
+ XXH32_hash_t bufferedSize;
+ /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */
+ XXH32_hash_t reserved32;
+ /*!< Reserved field. Needed for padding on 64-bit. */
+ size_t nbStripesSoFar;
+ /*!< Number or stripes processed. */
+ XXH64_hash_t totalLen;
+ /*!< Total length hashed. 64-bit even on 32-bit targets. */
+ size_t nbStripesPerBlock;
+ /*!< Number of stripes per block. */
+ size_t secretLimit;
+ /*!< Size of @ref customSecret or @ref extSecret */
+ XXH64_hash_t seed;
+ /*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */
+ XXH64_hash_t reserved64;
+ /*!< Reserved field. */
+ const unsigned char* extSecret;
+ /*!< Reference to an external secret for the _withSecret variants, NULL
+ * for other variants. */
+ /* note: there may be some padding at the end due to alignment on 64 bytes */
+}; /* typedef'd to XXH3_state_t */
+
+#undef XXH_ALIGN_MEMBER
+
+/*!
+ * @brief Initializes a stack-allocated `XXH3_state_s`.
+ *
+ * When the @ref XXH3_state_t structure is merely emplaced on stack,
+ * it should be initialized with XXH3_INITSTATE() or a memset()
+ * in case its first reset uses XXH3_NNbits_reset_withSeed().
+ * This init can be omitted if the first reset uses default or _withSecret mode.
+ * This operation isn't necessary when the state is created with XXH3_createState().
+ * Note that this doesn't prepare the state for a streaming operation,
+ * it's still necessary to use XXH3_NNbits_reset*() afterwards.
+ */
+#define XXH3_INITSTATE(XXH3_state_ptr) { (XXH3_state_ptr)->seed = 0; }
+
+
+/* === Experimental API === */
+/* Symbols defined below must be considered tied to a specific library version. */
+
+/*
+ * XXH3_generateSecret():
+ *
+ * Derive a high-entropy secret from any user-defined content, named customSeed.
+ * The generated secret can be used in combination with `*_withSecret()` functions.
+ * The `_withSecret()` variants are useful to provide a higher level of protection than 64-bit seed,
+ * as it becomes much more difficult for an external actor to guess how to impact the calculation logic.
+ *
+ * The function accepts as input a custom seed of any length and any content,
+ * and derives from it a high-entropy secret of length XXH3_SECRET_DEFAULT_SIZE
+ * into an already allocated buffer secretBuffer.
+ * The generated secret is _always_ XXH_SECRET_DEFAULT_SIZE bytes long.
+ *
+ * The generated secret can then be used with any `*_withSecret()` variant.
+ * Functions `XXH3_128bits_withSecret()`, `XXH3_64bits_withSecret()`,
+ * `XXH3_128bits_reset_withSecret()` and `XXH3_64bits_reset_withSecret()`
+ * are part of this list. They all accept a `secret` parameter
+ * which must be very long for implementation reasons (>= XXH3_SECRET_SIZE_MIN)
+ * _and_ feature very high entropy (consist of random-looking bytes).
+ * These conditions can be a high bar to meet, so
+ * this function can be used to generate a secret of proper quality.
+ *
+ * customSeed can be anything. It can have any size, even small ones,
+ * and its content can be anything, even stupidly "low entropy" source such as a bunch of zeroes.
+ * The resulting `secret` will nonetheless provide all expected qualities.
+ *
+ * Supplying NULL as the customSeed copies the default secret into `secretBuffer`.
+ * When customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
+ */
+XXH_PUBLIC_API void XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSeedSize);
+
+
+/* simple short-cut to pre-selected XXH3_128bits variant */
+XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);
+
+
+#endif /* XXH_NO_LONG_LONG */
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+# define XXH_IMPLEMENTATION
+#endif
+
+#endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
+
+
+/* ======================================================================== */
+/* ======================================================================== */
+/* ======================================================================== */
+
+
+/*-**********************************************************************
+ * xxHash implementation
+ *-**********************************************************************
+ * xxHash's implementation used to be hosted inside xxhash.c.
+ *
+ * However, inlining requires implementation to be visible to the compiler,
+ * hence be included alongside the header.
+ * Previously, implementation was hosted inside xxhash.c,
+ * which was then #included when inlining was activated.
+ * This construction created issues with a few build and install systems,
+ * as it required xxhash.c to be stored in /include directory.
+ *
+ * xxHash implementation is now directly integrated within xxhash.h.
+ * As a consequence, xxhash.c is no longer needed in /include.
+ *
+ * xxhash.c is still available and is still useful.
+ * In a "normal" setup, when xxhash is not inlined,
+ * xxhash.h only exposes the prototypes and public symbols,
+ * while xxhash.c can be built into an object file xxhash.o
+ * which can then be linked into the final binary.
+ ************************************************************************/
+
+#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
+ || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
+# define XXH_IMPLEM_13a8737387
+
+/* *************************************
+* Tuning parameters
+***************************************/
+
+/*!
+ * @defgroup tuning Tuning parameters
+ * @{
+ *
+ * Various macros to control xxHash's behavior.
+ */
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Define this to disable 64-bit code.
+ *
+ * Useful if only using the @ref xxh32_family and you have a strict C90 compiler.
+ */
+# define XXH_NO_LONG_LONG
+# undef XXH_NO_LONG_LONG /* don't actually */
+/*!
+ * @brief Controls how unaligned memory is accessed.
+ *
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is
+ * safe and portable.
+ *
+ * Unfortunately, on some target/compiler combinations, the generated assembly
+ * is sub-optimal.
+ *
+ * The below switch allow selection of a different access method
+ * in the search for improved performance.
+ *
+ * @par Possible options:
+ *
+ * - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy`
+ * @par
+ * Use `memcpy()`. Safe and portable. Note that most modern compilers will
+ * eliminate the function call and treat it as an unaligned access.
+ *
+ * - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((packed))`
+ * @par
+ * Depends on compiler extensions and is therefore not portable.
+ * This method is safe if your compiler supports it, and *generally* as
+ * fast or faster than `memcpy`.
+ *
+ * - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast
+ * @par
+ * Casts directly and dereferences. This method doesn't depend on the
+ * compiler, but it violates the C standard as it directly dereferences an
+ * unaligned pointer. It can generate buggy code on targets which do not
+ * support unaligned memory accesses, but in some circumstances, it's the
+ * only known way to get the most performance (example: GCC + ARMv6).
+ *
+ * - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift
+ * @par
+ * Also portable. This can generate the best code on old compilers which don't
+ * inline small `memcpy()` calls, and it might also be faster on big-endian
+ * systems which lack a native byteswap instruction. However, some compilers
+ * will emit literal byteshifts even if the target supports unaligned access.
+ *
+ * .
+ *
+ * @warning
+ * Methods 1 and 2 rely on implementation-defined behavior. Use these with
+ * care, as what works on one compiler/platform/optimization level may cause
+ * another to read garbage data or even crash.
+ *
+ * See https://stackoverflow.com/a/32095106/646947 for details.
+ *
+ * Prefer these methods in priority order (0 > 3 > 1 > 2)
+ */
+# define XXH_FORCE_MEMORY_ACCESS 0
+/*!
+ * @def XXH_ACCEPT_NULL_INPUT_POINTER
+ * @brief Whether to add explicit `NULL` checks.
+ *
+ * If the input pointer is `NULL` and the length is non-zero, xxHash's default
+ * behavior is to dereference it, triggering a segfault.
+ *
+ * When this macro is enabled, xxHash actively checks the input for a null pointer.
+ * If it is, the result for null input pointers is the same as a zero-length input.
+ */
+# define XXH_ACCEPT_NULL_INPUT_POINTER 0
+/*!
+ * @def XXH_FORCE_ALIGN_CHECK
+ * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
+ * and XXH64() only).
+ *
+ * This is an important performance trick for architectures without decent
+ * unaligned memory access performance.
+ *
+ * It checks for input alignment, and when conditions are met, uses a "fast
+ * path" employing direct 32-bit/64-bit reads, resulting in _dramatically
+ * faster_ read speed.
+ *
+ * The check costs one initial branch per hash, which is generally negligible,
+ * but not zero.
+ *
+ * Moreover, it's not useful to generate an additional code path if memory
+ * access uses the same instruction for both aligned and unaligned
+ * adresses (e.g. x86 and aarch64).
+ *
+ * In these cases, the alignment check can be removed by setting this macro to 0.
+ * Then the code will always use unaligned memory access.
+ * Align check is automatically disabled on x86, x64 & arm64,
+ * which are platforms known to offer good unaligned memory accesses performance.
+ *
+ * This option does not affect XXH3 (only XXH32 and XXH64).
+ */
+# define XXH_FORCE_ALIGN_CHECK 0
+
+/*!
+ * @def XXH_NO_INLINE_HINTS
+ * @brief When non-zero, sets all functions to `static`.
+ *
+ * By default, xxHash tries to force the compiler to inline almost all internal
+ * functions.
+ *
+ * This can usually improve performance due to reduced jumping and improved
+ * constant folding, but significantly increases the size of the binary which
+ * might not be favorable.
+ *
+ * Additionally, sometimes the forced inlining can be detrimental to performance,
+ * depending on the architecture.
+ *
+ * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
+ * compiler full control on whether to inline or not.
+ *
+ * When not optimizing (-O0), optimizing for size (-Os, -Oz), or using
+ * -fno-inline with GCC or Clang, this will automatically be defined.
+ */
+# define XXH_NO_INLINE_HINTS 0
+
+/*!
+ * @def XXH_REROLL
+ * @brief Whether to reroll `XXH32_finalize` and `XXH64_finalize`.
+ *
+ * For performance, `XXH32_finalize` and `XXH64_finalize` use an unrolled loop
+ * in the form of a switch statement.
+ *
+ * This is not always desirable, as it generates larger code, and depending on
+ * the architecture, may even be slower
+ *
+ * This is automatically defined with `-Os`/`-Oz` on GCC and Clang.
+ */
+# define XXH_REROLL 0
+
+/*!
+ * @internal
+ * @brief Redefines old internal names.
+ *
+ * For compatibility with code that uses xxHash's internals before the names
+ * were changed to improve namespacing. There is no other reason to use this.
+ */
+# define XXH_OLD_NAMES
+# undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
+#endif /* XXH_DOXYGEN */
+/*!
+ * @}
+ */
+
+#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
+# if !defined(__clang__) && defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM_ARCH) && (__ARM_ARCH == 6)
+# define XXH_FORCE_MEMORY_ACCESS 2
+# elif !defined(__clang__) && ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
+ (defined(__GNUC__) && (defined(__ARM_ARCH) && __ARM_ARCH >= 7)))
+# define XXH_FORCE_MEMORY_ACCESS 1
+# endif
+#endif
+
+#ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */
+# define XXH_ACCEPT_NULL_INPUT_POINTER 0
+#endif
+
+#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
+# if defined(__i386) || defined(__x86_64__) || defined(__aarch64__) \
+ || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) /* visual */
+# define XXH_FORCE_ALIGN_CHECK 0
+# else
+# define XXH_FORCE_ALIGN_CHECK 1
+# endif
+#endif
+
+#ifndef XXH_NO_INLINE_HINTS
+# if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ \
+ || defined(__NO_INLINE__) /* -O0, -fno-inline */
+# define XXH_NO_INLINE_HINTS 1
+# else
+# define XXH_NO_INLINE_HINTS 0
+# endif
+#endif
+
+#ifndef XXH_REROLL
+# if defined(__OPTIMIZE_SIZE__)
+# define XXH_REROLL 1
+# else
+# define XXH_REROLL 0
+# endif
+#endif
+
+/*!
+ * @defgroup impl Implementation
+ * @{
+ */
+
+
+/* *************************************
+* Includes & Memory related functions
+***************************************/
+/*
+ * Modify the local functions below should you wish to use
+ * different memory routines for malloc() and free()
+ */
+#include <stdlib.h>
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than malloc().
+ */
+static void* XXH_malloc(size_t s) { return malloc(s); }
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than free().
+ */
+static void XXH_free(void* p) { free(p); }
+
+#include <string.h>
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than memcpy().
+ */
+static void* XXH_memcpy(void* dest, const void* src, size_t size)
+{
+ return memcpy(dest,src,size);
+}
+
+#include <limits.h> /* ULLONG_MAX */
+
+
+/* *************************************
+* Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER /* Visual Studio warning fix */
+# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
+#endif
+
+#if XXH_NO_INLINE_HINTS /* disable inlining hints */
+# if defined(__GNUC__)
+# define XXH_FORCE_INLINE static __attribute__((unused))
+# else
+# define XXH_FORCE_INLINE static
+# endif
+# define XXH_NO_INLINE static
+/* enable inlining hints */
+#elif defined(_MSC_VER) /* Visual Studio */
+# define XXH_FORCE_INLINE static __forceinline
+# define XXH_NO_INLINE static __declspec(noinline)
+#elif defined(__GNUC__)
+# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
+# define XXH_NO_INLINE static __attribute__((noinline))
+#elif defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */
+# define XXH_FORCE_INLINE static inline
+# define XXH_NO_INLINE static
+#else
+# define XXH_FORCE_INLINE static
+# define XXH_NO_INLINE static
+#endif
+
+
+
+/* *************************************
+* Debug
+***************************************/
+/*!
+ * @ingroup tuning
+ * @def XXH_DEBUGLEVEL
+ * @brief Sets the debugging level.
+ *
+ * XXH_DEBUGLEVEL is expected to be defined externally, typically via the
+ * compiler's command line options. The value must be a number.
+ */
+#ifndef XXH_DEBUGLEVEL
+# ifdef DEBUGLEVEL /* backwards compat */
+# define XXH_DEBUGLEVEL DEBUGLEVEL
+# else
+# define XXH_DEBUGLEVEL 0
+# endif
+#endif
+
+#if (XXH_DEBUGLEVEL>=1)
+# include <assert.h> /* note: can still be disabled with NDEBUG */
+# define XXH_ASSERT(c) assert(c)
+#else
+# define XXH_ASSERT(c) ((void)0)
+#endif
+
+/* note: use after variable declarations */
+#define XXH_STATIC_ASSERT(c) do { enum { XXH_sa = 1/(int)(!!(c)) }; } while (0)
+
+
+/* *************************************
+* Basic Types
+***************************************/
+#if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+ typedef uint8_t xxh_u8;
+#else
+ typedef unsigned char xxh_u8;
+#endif
+typedef XXH32_hash_t xxh_u32;
+
+#ifdef XXH_OLD_NAMES
+# define BYTE xxh_u8
+# define U8 xxh_u8
+# define U32 xxh_u32
+#endif
+
+/* *** Memory access *** */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_read32(const void* ptr)
+ * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit native endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readLE32(const void* ptr)
+ * @brief Reads an unaligned 32-bit little endian integer from @p ptr.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit little endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readBE32(const void* ptr)
+ * @brief Reads an unaligned 32-bit big endian integer from @p ptr.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit big endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align)
+ * @brief Like @ref XXH_readLE32(), but has an option for aligned reads.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ * Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is
+ * always @ref XXH_alignment::XXH_unaligned.
+ *
+ * @param ptr The pointer to read from.
+ * @param align Whether @p ptr is aligned.
+ * @pre
+ * If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte
+ * aligned.
+ * @return The 32-bit little endian integer from the bytes at @p ptr.
+ */
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+/*
+ * Manual byteshift. Best for old compilers which don't inline memcpy.
+ * We actually directly use XXH_readLE32 and XXH_readBE32.
+ */
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/*
+ * Force direct memory access. Only works on CPU which support unaligned memory
+ * access in hardware.
+ */
+static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/*
+ * __pack instructions are safer but compiler specific, hence potentially
+ * problematic for some compilers.
+ *
+ * Currently only defined for GCC and ICC.
+ */
+#ifdef XXH_OLD_NAMES
+typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
+#endif
+static xxh_u32 XXH_read32(const void* ptr)
+{
+ typedef union { xxh_u32 u32; } __attribute__((packed)) xxh_unalign;
+ return ((const xxh_unalign*)ptr)->u32;
+}
+
+#else
+
+/*
+ * Portable and safe solution. Generally efficient.
+ * see: https://stackoverflow.com/a/32095106/646947
+ */
+static xxh_u32 XXH_read32(const void* memPtr)
+{
+ xxh_u32 val;
+ memcpy(&val, memPtr, sizeof(val));
+ return val;
+}
+
+#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* *** Endianess *** */
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+
+/*!
+ * @ingroup tuning
+ * @def XXH_CPU_LITTLE_ENDIAN
+ * @brief Whether the target is little endian.
+ *
+ * Defined to 1 if the target is little endian, or 0 if it is big endian.
+ * It can be defined externally, for example on the compiler command line.
+ *
+ * If it is not defined, a runtime check (which is usually constant folded)
+ * is used instead.
+ *
+ * @note
+ * This is not necessarily defined to an integer constant.
+ *
+ * @see XXH_isLittleEndian() for the runtime check.
+ */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+/*
+ * Try to detect endianness automatically, to avoid the nonstandard behavior
+ * in `XXH_isLittleEndian()`
+ */
+# if defined(_WIN32) /* Windows is always little endian */ \
+ || defined(__LITTLE_ENDIAN__) \
+ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+# define XXH_CPU_LITTLE_ENDIAN 1
+# elif defined(__BIG_ENDIAN__) \
+ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+# define XXH_CPU_LITTLE_ENDIAN 0
+# else
+/*!
+ * @internal
+ * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN.
+ *
+ * Most compilers will constant fold this.
+ */
+static int XXH_isLittleEndian(void)
+{
+ /*
+ * Portable and well-defined behavior.
+ * Don't use static: it is detrimental to performance.
+ */
+ const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };
+ return one.c[0];
+}
+# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
+# endif
+#endif
+
+
+
+
+/* ****************************************
+* Compiler-specific Functions and Macros
+******************************************/
+#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+#ifdef __has_builtin
+# define XXH_HAS_BUILTIN(x) __has_builtin(x)
+#else
+# define XXH_HAS_BUILTIN(x) 0
+#endif
+
+/*!
+ * @internal
+ * @def XXH_rotl32(x,r)
+ * @brief 32-bit rotate left.
+ *
+ * @param x The 32-bit integer to be rotated.
+ * @param r The number of bits to rotate.
+ * @pre
+ * @p r > 0 && @p r < 32
+ * @note
+ * @p x and @p r may be evaluated multiple times.
+ * @return The rotated result.
+ */
+#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
+ && XXH_HAS_BUILTIN(__builtin_rotateleft64)
+# define XXH_rotl32 __builtin_rotateleft32
+# define XXH_rotl64 __builtin_rotateleft64
+/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */
+#elif defined(_MSC_VER)
+# define XXH_rotl32(x,r) _rotl(x,r)
+# define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+# define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
+# define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
+#endif
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_swap32(xxh_u32 x)
+ * @brief A 32-bit byteswap.
+ *
+ * @param x The 32-bit integer to byteswap.
+ * @return @p x, byteswapped.
+ */
+#if defined(_MSC_VER) /* Visual Studio */
+# define XXH_swap32 _byteswap_ulong
+#elif XXH_GCC_VERSION >= 403
+# define XXH_swap32 __builtin_bswap32
+#else
+static xxh_u32 XXH_swap32 (xxh_u32 x)
+{
+ return ((x << 24) & 0xff000000 ) |
+ ((x << 8) & 0x00ff0000 ) |
+ ((x >> 8) & 0x0000ff00 ) |
+ ((x >> 24) & 0x000000ff );
+}
+#endif
+
+
+/* ***************************
+* Memory reads
+*****************************/
+
+/*!
+ * @internal
+ * @brief Enum to indicate whether a pointer is aligned.
+ */
+typedef enum {
+ XXH_aligned, /*!< Aligned */
+ XXH_unaligned /*!< Possibly unaligned */
+} XXH_alignment;
+
+/*
+ * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.
+ *
+ * This is ideal for older compilers which don't inline memcpy.
+ */
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+
+XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr)
+{
+ const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+ return bytePtr[0]
+ | ((xxh_u32)bytePtr[1] << 8)
+ | ((xxh_u32)bytePtr[2] << 16)
+ | ((xxh_u32)bytePtr[3] << 24);
+}
+
+XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr)
+{
+ const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+ return bytePtr[3]
+ | ((xxh_u32)bytePtr[2] << 8)
+ | ((xxh_u32)bytePtr[1] << 16)
+ | ((xxh_u32)bytePtr[0] << 24);
+}
+
+#else
+XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
+{
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+}
+
+static xxh_u32 XXH_readBE32(const void* ptr)
+{
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+#endif
+
+XXH_FORCE_INLINE xxh_u32
+XXH_readLE32_align(const void* ptr, XXH_alignment align)
+{
+ if (align==XXH_unaligned) {
+ return XXH_readLE32(ptr);
+ } else {
+ return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
+ }
+}
+
+
+/* *************************************
+* Misc
+***************************************/
+/*! @ingroup public */
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
+
+
+/* *******************************************************************
+* 32-bit hash functions
+*********************************************************************/
+/*!
+ * @}
+ * @defgroup xxh32_impl XXH32 implementation
+ * @ingroup impl
+ * @{
+ */
+static const xxh_u32 XXH_PRIME32_1 = 0x9E3779B1U; /*!< 0b10011110001101110111100110110001 */
+static const xxh_u32 XXH_PRIME32_2 = 0x85EBCA77U; /*!< 0b10000101111010111100101001110111 */
+static const xxh_u32 XXH_PRIME32_3 = 0xC2B2AE3DU; /*!< 0b11000010101100101010111000111101 */
+static const xxh_u32 XXH_PRIME32_4 = 0x27D4EB2FU; /*!< 0b00100111110101001110101100101111 */
+static const xxh_u32 XXH_PRIME32_5 = 0x165667B1U; /*!< 0b00010110010101100110011110110001 */
+
+#ifdef XXH_OLD_NAMES
+# define PRIME32_1 XXH_PRIME32_1
+# define PRIME32_2 XXH_PRIME32_2
+# define PRIME32_3 XXH_PRIME32_3
+# define PRIME32_4 XXH_PRIME32_4
+# define PRIME32_5 XXH_PRIME32_5
+#endif
+
+/*!
+ * @internal
+ * @brief Normal stripe processing routine.
+ *
+ * This shuffles the bits so that any bit from @p input impacts several bits in
+ * @p acc.
+ *
+ * @param acc The accumulator lane.
+ * @param input The stripe of input to mix.
+ * @return The mixed accumulator lane.
+ */
+static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
+{
+ acc += input * XXH_PRIME32_2;
+ acc = XXH_rotl32(acc, 13);
+ acc *= XXH_PRIME32_1;
+#if defined(__GNUC__) && defined(__SSE4_1__) && !defined(XXH_ENABLE_AUTOVECTORIZE)
+ /*
+ * UGLY HACK:
+ * This inline assembly hack forces acc into a normal register. This is the
+ * only thing that prevents GCC and Clang from autovectorizing the XXH32
+ * loop (pragmas and attributes don't work for some resason) without globally
+ * disabling SSE4.1.
+ *
+ * The reason we want to avoid vectorization is because despite working on
+ * 4 integers at a time, there are multiple factors slowing XXH32 down on
+ * SSE4:
+ * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on
+ * newer chips!) making it slightly slower to multiply four integers at
+ * once compared to four integers independently. Even when pmulld was
+ * fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE
+ * just to multiply unless doing a long operation.
+ *
+ * - Four instructions are required to rotate,
+ * movqda tmp, v // not required with VEX encoding
+ * pslld tmp, 13 // tmp <<= 13
+ * psrld v, 19 // x >>= 19
+ * por v, tmp // x |= tmp
+ * compared to one for scalar:
+ * roll v, 13 // reliably fast across the board
+ * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason
+ *
+ * - Instruction level parallelism is actually more beneficial here because
+ * the SIMD actually serializes this operation: While v1 is rotating, v2
+ * can load data, while v3 can multiply. SSE forces them to operate
+ * together.
+ *
+ * How this hack works:
+ * __asm__("" // Declare an assembly block but don't declare any instructions
+ * : // However, as an Input/Output Operand,
+ * "+r" // constrain a read/write operand (+) as a general purpose register (r).
+ * (acc) // and set acc as the operand
+ * );
+ *
+ * Because of the 'r', the compiler has promised that seed will be in a
+ * general purpose register and the '+' says that it will be 'read/write',
+ * so it has to assume it has changed. It is like volatile without all the
+ * loads and stores.
+ *
+ * Since the argument has to be in a normal register (not an SSE register),
+ * each time XXH32_round is called, it is impossible to vectorize.
+ */
+ __asm__("" : "+r" (acc));
+#endif
+ return acc;
+}
+
+/*!
+ * @internal
+ * @brief Mixes all bits to finalize the hash.
+ *
+ * The final mix ensures that all input bits have a chance to impact any bit in
+ * the output digest, resulting in an unbiased distribution.
+ *
+ * @param h32 The hash to avalanche.
+ * @return The avalanched hash.
+ */
+static xxh_u32 XXH32_avalanche(xxh_u32 h32)
+{
+ h32 ^= h32 >> 15;
+ h32 *= XXH_PRIME32_2;
+ h32 ^= h32 >> 13;
+ h32 *= XXH_PRIME32_3;
+ h32 ^= h32 >> 16;
+ return(h32);
+}
+
+#define XXH_get32bits(p) XXH_readLE32_align(p, align)
+
+/*!
+ * @internal
+ * @brief Processes the last 0-15 bytes of @p ptr.
+ *
+ * There may be up to 15 bytes remaining to consume from the input.
+ * This final stage will digest them to ensure that all input bytes are present
+ * in the final mix.
+ *
+ * @param h32 The hash to finalize.
+ * @param ptr The pointer to the remaining input.
+ * @param len The remaining length, modulo 16.
+ * @param align Whether @p ptr is aligned.
+ * @return The finalized hash.
+ */
+static xxh_u32
+XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
+{
+#define XXH_PROCESS1 do { \
+ h32 += (*ptr++) * XXH_PRIME32_5; \
+ h32 = XXH_rotl32(h32, 11) * XXH_PRIME32_1; \
+} while (0)
+
+#define XXH_PROCESS4 do { \
+ h32 += XXH_get32bits(ptr) * XXH_PRIME32_3; \
+ ptr += 4; \
+ h32 = XXH_rotl32(h32, 17) * XXH_PRIME32_4; \
+} while (0)
+
+ /* Compact rerolled version */
+ if (XXH_REROLL) {
+ len &= 15;
+ while (len >= 4) {
+ XXH_PROCESS4;
+ len -= 4;
+ }
+ while (len > 0) {
+ XXH_PROCESS1;
+ --len;
+ }
+ return XXH32_avalanche(h32);
+ } else {
+ switch(len&15) /* or switch(bEnd - p) */ {
+ case 12: XXH_PROCESS4;
+ /* fallthrough */
+ case 8: XXH_PROCESS4;
+ /* fallthrough */
+ case 4: XXH_PROCESS4;
+ return XXH32_avalanche(h32);
+
+ case 13: XXH_PROCESS4;
+ /* fallthrough */
+ case 9: XXH_PROCESS4;
+ /* fallthrough */
+ case 5: XXH_PROCESS4;
+ XXH_PROCESS1;
+ return XXH32_avalanche(h32);
+
+ case 14: XXH_PROCESS4;
+ /* fallthrough */
+ case 10: XXH_PROCESS4;
+ /* fallthrough */
+ case 6: XXH_PROCESS4;
+ XXH_PROCESS1;
+ XXH_PROCESS1;
+ return XXH32_avalanche(h32);
+
+ case 15: XXH_PROCESS4;
+ /* fallthrough */
+ case 11: XXH_PROCESS4;
+ /* fallthrough */
+ case 7: XXH_PROCESS4;
+ /* fallthrough */
+ case 3: XXH_PROCESS1;
+ /* fallthrough */
+ case 2: XXH_PROCESS1;
+ /* fallthrough */
+ case 1: XXH_PROCESS1;
+ /* fallthrough */
+ case 0: return XXH32_avalanche(h32);
+ }
+ XXH_ASSERT(0);
+ return h32; /* reaching this point is deemed impossible */
+ }
+}
+
+#ifdef XXH_OLD_NAMES
+# define PROCESS1 XXH_PROCESS1
+# define PROCESS4 XXH_PROCESS4
+#else
+# undef XXH_PROCESS1
+# undef XXH_PROCESS4
+#endif
+
+/*!
+ * @internal
+ * @brief The implementation for @ref XXH32().
+ *
+ * @param input, len, seed Directly passed from @ref XXH32().
+ * @param align Whether @p input is aligned.
+ * @return The calculated hash.
+ */
+XXH_FORCE_INLINE xxh_u32
+XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
+{
+ const xxh_u8* bEnd = input + len;
+ xxh_u32 h32;
+
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+ if (input==NULL) {
+ len=0;
+ bEnd=input=(const xxh_u8*)(size_t)16;
+ }
+#endif
+
+ if (len>=16) {
+ const xxh_u8* const limit = bEnd - 15;
+ xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
+ xxh_u32 v2 = seed + XXH_PRIME32_2;
+ xxh_u32 v3 = seed + 0;
+ xxh_u32 v4 = seed - XXH_PRIME32_1;
+
+ do {
+ v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
+ v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
+ v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
+ v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
+ } while (input < limit);
+
+ h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7)
+ + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+ } else {
+ h32 = seed + XXH_PRIME32_5;
+ }
+
+ h32 += (xxh_u32)len;
+
+ return XXH32_finalize(h32, input, len&15, align);
+}
+
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
+{
+#if 0
+ /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+ XXH32_state_t state;
+ XXH32_reset(&state, seed);
+ XXH32_update(&state, (const xxh_u8*)input, len);
+ return XXH32_digest(&state);
+#else
+ if (XXH_FORCE_ALIGN_CHECK) {
+ if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
+ return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
+ } }
+
+ return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
+#endif
+}
+
+
+
+/******* Hash streaming *******/
+/*!
+ * @ingroup xxh32_family
+ */
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
+{
+ return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+ XXH_free(statePtr);
+ return XXH_OK;
+}
+
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
+{
+ memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
+{
+ XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+ memset(&state, 0, sizeof(state));
+ state.v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
+ state.v2 = seed + XXH_PRIME32_2;
+ state.v3 = seed + 0;
+ state.v4 = seed - XXH_PRIME32_1;
+ /* do not write into reserved, planned to be removed in a future version */
+ memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
+ return XXH_OK;
+}
+
+
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH32_update(XXH32_state_t* state, const void* input, size_t len)
+{
+ if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+ return XXH_OK;
+#else
+ return XXH_ERROR;
+#endif
+
+ { const xxh_u8* p = (const xxh_u8*)input;
+ const xxh_u8* const bEnd = p + len;
+
+ state->total_len_32 += (XXH32_hash_t)len;
+ state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
+
+ if (state->memsize + len < 16) { /* fill in tmp buffer */
+ XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
+ state->memsize += (XXH32_hash_t)len;
+ return XXH_OK;
+ }
+
+ if (state->memsize) { /* some data left from previous update */
+ XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
+ { const xxh_u32* p32 = state->mem32;
+ state->v1 = XXH32_round(state->v1, XXH_readLE32(p32)); p32++;
+ state->v2 = XXH32_round(state->v2, XXH_readLE32(p32)); p32++;
+ state->v3 = XXH32_round(state->v3, XXH_readLE32(p32)); p32++;
+ state->v4 = XXH32_round(state->v4, XXH_readLE32(p32));
+ }
+ p += 16-state->memsize;
+ state->memsize = 0;
+ }
+
+ if (p <= bEnd-16) {
+ const xxh_u8* const limit = bEnd - 16;
+ xxh_u32 v1 = state->v1;
+ xxh_u32 v2 = state->v2;
+ xxh_u32 v3 = state->v3;
+ xxh_u32 v4 = state->v4;
+
+ do {
+ v1 = XXH32_round(v1, XXH_readLE32(p)); p+=4;
+ v2 = XXH32_round(v2, XXH_readLE32(p)); p+=4;
+ v3 = XXH32_round(v3, XXH_readLE32(p)); p+=4;
+ v4 = XXH32_round(v4, XXH_readLE32(p)); p+=4;
+ } while (p<=limit);
+
+ state->v1 = v1;
+ state->v2 = v2;
+ state->v3 = v3;
+ state->v4 = v4;
+ }
+
+ if (p < bEnd) {
+ XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
+ state->memsize = (unsigned)(bEnd-p);
+ }
+ }
+
+ return XXH_OK;
+}
+
+
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
+{
+ xxh_u32 h32;
+
+ if (state->large_len) {
+ h32 = XXH_rotl32(state->v1, 1)
+ + XXH_rotl32(state->v2, 7)
+ + XXH_rotl32(state->v3, 12)
+ + XXH_rotl32(state->v4, 18);
+ } else {
+ h32 = state->v3 /* == seed */ + XXH_PRIME32_5;
+ }
+
+ h32 += state->total_len_32;
+
+ return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
+}
+
+
+/******* Canonical representation *******/
+
+/*!
+ * @ingroup xxh32_family
+ * The default return values from XXH functions are unsigned 32 and 64 bit
+ * integers.
+ *
+ * The canonical representation uses big endian convention, the same convention
+ * as human-readable numbers (large digits first).
+ *
+ * This way, hash values can be written into a file or buffer, remaining
+ * comparable across different systems.
+ *
+ * The following functions allow transformation of hash values to and from their
+ * canonical format.
+ */
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
+{
+ XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+ if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
+ memcpy(dst, &hash, sizeof(*dst));
+}
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
+{
+ return XXH_readBE32(src);
+}
+
+
+#ifndef XXH_NO_LONG_LONG
+
+/* *******************************************************************
+* 64-bit hash functions
+*********************************************************************/
+/*!
+ * @}
+ * @ingroup impl
+ * @{
+ */
+/******* Memory access *******/
+
+typedef XXH64_hash_t xxh_u64;
+
+#ifdef XXH_OLD_NAMES
+# define U64 xxh_u64
+#endif
+
+/*!
+ * XXH_REROLL_XXH64:
+ * Whether to reroll the XXH64_finalize() loop.
+ *
+ * Just like XXH32, we can unroll the XXH64_finalize() loop. This can be a
+ * performance gain on 64-bit hosts, as only one jump is required.
+ *
+ * However, on 32-bit hosts, because arithmetic needs to be done with two 32-bit
+ * registers, and 64-bit arithmetic needs to be simulated, it isn't beneficial
+ * to unroll. The code becomes ridiculously large (the largest function in the
+ * binary on i386!), and rerolling it saves anywhere from 3kB to 20kB. It is
+ * also slightly faster because it fits into cache better and is more likely
+ * to be inlined by the compiler.
+ *
+ * If XXH_REROLL is defined, this is ignored and the loop is always rerolled.
+ */
+#ifndef XXH_REROLL_XXH64
+# if (defined(__ILP32__) || defined(_ILP32)) /* ILP32 is often defined on 32-bit GCC family */ \
+ || !(defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) /* x86-64 */ \
+ || defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) /* aarch64 */ \
+ || defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || defined(__powerpc64__) /* ppc64 */ \
+ || defined(__mips64__) || defined(__mips64)) /* mips64 */ \
+ || (!defined(SIZE_MAX) || SIZE_MAX < ULLONG_MAX) /* check limits */
+# define XXH_REROLL_XXH64 1
+# else
+# define XXH_REROLL_XXH64 0
+# endif
+#endif /* !defined(XXH_REROLL_XXH64) */
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+/*
+ * Manual byteshift. Best for old compilers which don't inline memcpy.
+ * We actually directly use XXH_readLE64 and XXH_readBE64.
+ */
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static xxh_u64 XXH_read64(const void* memPtr)
+{
+ return *(const xxh_u64*) memPtr;
+}
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/*
+ * __pack instructions are safer, but compiler specific, hence potentially
+ * problematic for some compilers.
+ *
+ * Currently only defined for GCC and ICC.
+ */
+#ifdef XXH_OLD_NAMES
+typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
+#endif
+static xxh_u64 XXH_read64(const void* ptr)
+{
+ typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) xxh_unalign64;
+ return ((const xxh_unalign64*)ptr)->u64;
+}
+
+#else
+
+/*
+ * Portable and safe solution. Generally efficient.
+ * see: https://stackoverflow.com/a/32095106/646947
+ */
+static xxh_u64 XXH_read64(const void* memPtr)
+{
+ xxh_u64 val;
+ memcpy(&val, memPtr, sizeof(val));
+ return val;
+}
+
+#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+#if defined(_MSC_VER) /* Visual Studio */
+# define XXH_swap64 _byteswap_uint64
+#elif XXH_GCC_VERSION >= 403
+# define XXH_swap64 __builtin_bswap64
+#else
+static xxh_u64 XXH_swap64(xxh_u64 x)
+{
+ return ((x << 56) & 0xff00000000000000ULL) |
+ ((x << 40) & 0x00ff000000000000ULL) |
+ ((x << 24) & 0x0000ff0000000000ULL) |
+ ((x << 8) & 0x000000ff00000000ULL) |
+ ((x >> 8) & 0x00000000ff000000ULL) |
+ ((x >> 24) & 0x0000000000ff0000ULL) |
+ ((x >> 40) & 0x000000000000ff00ULL) |
+ ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+
+/* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+
+XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr)
+{
+ const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+ return bytePtr[0]
+ | ((xxh_u64)bytePtr[1] << 8)
+ | ((xxh_u64)bytePtr[2] << 16)
+ | ((xxh_u64)bytePtr[3] << 24)
+ | ((xxh_u64)bytePtr[4] << 32)
+ | ((xxh_u64)bytePtr[5] << 40)
+ | ((xxh_u64)bytePtr[6] << 48)
+ | ((xxh_u64)bytePtr[7] << 56);
+}
+
+XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr)
+{
+ const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+ return bytePtr[7]
+ | ((xxh_u64)bytePtr[6] << 8)
+ | ((xxh_u64)bytePtr[5] << 16)
+ | ((xxh_u64)bytePtr[4] << 24)
+ | ((xxh_u64)bytePtr[3] << 32)
+ | ((xxh_u64)bytePtr[2] << 40)
+ | ((xxh_u64)bytePtr[1] << 48)
+ | ((xxh_u64)bytePtr[0] << 56);
+}
+
+#else
+XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
+{
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+}
+
+static xxh_u64 XXH_readBE64(const void* ptr)
+{
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+#endif
+
+XXH_FORCE_INLINE xxh_u64
+XXH_readLE64_align(const void* ptr, XXH_alignment align)
+{
+ if (align==XXH_unaligned)
+ return XXH_readLE64(ptr);
+ else
+ return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
+}
+
+
+/******* xxh64 *******/
+/*!
+ * @}
+ * @defgroup xxh64_impl XXH64 implementation
+ * @ingroup impl
+ * @{
+ */
+static const xxh_u64 XXH_PRIME64_1 = 0x9E3779B185EBCA87ULL; /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */
+static const xxh_u64 XXH_PRIME64_2 = 0xC2B2AE3D27D4EB4FULL; /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */
+static const xxh_u64 XXH_PRIME64_3 = 0x165667B19E3779F9ULL; /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */
+static const xxh_u64 XXH_PRIME64_4 = 0x85EBCA77C2B2AE63ULL; /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */
+static const xxh_u64 XXH_PRIME64_5 = 0x27D4EB2F165667C5ULL; /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */
+
+#ifdef XXH_OLD_NAMES
+# define PRIME64_1 XXH_PRIME64_1
+# define PRIME64_2 XXH_PRIME64_2
+# define PRIME64_3 XXH_PRIME64_3
+# define PRIME64_4 XXH_PRIME64_4
+# define PRIME64_5 XXH_PRIME64_5
+#endif
+
+static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
+{
+ acc += input * XXH_PRIME64_2;
+ acc = XXH_rotl64(acc, 31);
+ acc *= XXH_PRIME64_1;
+ return acc;
+}
+
+static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
+{
+ val = XXH64_round(0, val);
+ acc ^= val;
+ acc = acc * XXH_PRIME64_1 + XXH_PRIME64_4;
+ return acc;
+}
+
+static xxh_u64 XXH64_avalanche(xxh_u64 h64)
+{
+ h64 ^= h64 >> 33;
+ h64 *= XXH_PRIME64_2;
+ h64 ^= h64 >> 29;
+ h64 *= XXH_PRIME64_3;
+ h64 ^= h64 >> 32;
+ return h64;
+}
+
+
+#define XXH_get64bits(p) XXH_readLE64_align(p, align)
+
+static xxh_u64
+XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
+{
+#define XXH_PROCESS1_64 do { \
+ h64 ^= (*ptr++) * XXH_PRIME64_5; \
+ h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1; \
+} while (0)
+
+#define XXH_PROCESS4_64 do { \
+ h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1; \
+ ptr += 4; \
+ h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; \
+} while (0)
+
+#define XXH_PROCESS8_64 do { \
+ xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); \
+ ptr += 8; \
+ h64 ^= k1; \
+ h64 = XXH_rotl64(h64,27) * XXH_PRIME64_1 + XXH_PRIME64_4; \
+} while (0)
+
+ /* Rerolled version for 32-bit targets is faster and much smaller. */
+ if (XXH_REROLL || XXH_REROLL_XXH64) {
+ len &= 31;
+ while (len >= 8) {
+ XXH_PROCESS8_64;
+ len -= 8;
+ }
+ if (len >= 4) {
+ XXH_PROCESS4_64;
+ len -= 4;
+ }
+ while (len > 0) {
+ XXH_PROCESS1_64;
+ --len;
+ }
+ return XXH64_avalanche(h64);
+ } else {
+ switch(len & 31) {
+ case 24: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 16: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 8: XXH_PROCESS8_64;
+ return XXH64_avalanche(h64);
+
+ case 28: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 20: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 12: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 4: XXH_PROCESS4_64;
+ return XXH64_avalanche(h64);
+
+ case 25: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 17: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 9: XXH_PROCESS8_64;
+ XXH_PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 29: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 21: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 13: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 5: XXH_PROCESS4_64;
+ XXH_PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 26: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 18: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 10: XXH_PROCESS8_64;
+ XXH_PROCESS1_64;
+ XXH_PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 30: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 22: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 14: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 6: XXH_PROCESS4_64;
+ XXH_PROCESS1_64;
+ XXH_PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 27: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 19: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 11: XXH_PROCESS8_64;
+ XXH_PROCESS1_64;
+ XXH_PROCESS1_64;
+ XXH_PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 31: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 23: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 15: XXH_PROCESS8_64;
+ /* fallthrough */
+ case 7: XXH_PROCESS4_64;
+ /* fallthrough */
+ case 3: XXH_PROCESS1_64;
+ /* fallthrough */
+ case 2: XXH_PROCESS1_64;
+ /* fallthrough */
+ case 1: XXH_PROCESS1_64;
+ /* fallthrough */
+ case 0: return XXH64_avalanche(h64);
+ }
+ }
+ /* impossible to reach */
+ XXH_ASSERT(0);
+ return 0; /* unreachable, but some compilers complain without it */
+}
+
+#ifdef XXH_OLD_NAMES
+# define PROCESS1_64 XXH_PROCESS1_64
+# define PROCESS4_64 XXH_PROCESS4_64
+# define PROCESS8_64 XXH_PROCESS8_64
+#else
+# undef XXH_PROCESS1_64
+# undef XXH_PROCESS4_64
+# undef XXH_PROCESS8_64
+#endif
+
+XXH_FORCE_INLINE xxh_u64
+XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
+{
+ const xxh_u8* bEnd = input + len;
+ xxh_u64 h64;
+
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+ if (input==NULL) {
+ len=0;
+ bEnd=input=(const xxh_u8*)(size_t)32;
+ }
+#endif
+
+ if (len>=32) {
+ const xxh_u8* const limit = bEnd - 32;
+ xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
+ xxh_u64 v2 = seed + XXH_PRIME64_2;
+ xxh_u64 v3 = seed + 0;
+ xxh_u64 v4 = seed - XXH_PRIME64_1;
+
+ do {
+ v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
+ v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
+ v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
+ v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
+ } while (input<=limit);
+
+ h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+ h64 = XXH64_mergeRound(h64, v1);
+ h64 = XXH64_mergeRound(h64, v2);
+ h64 = XXH64_mergeRound(h64, v3);
+ h64 = XXH64_mergeRound(h64, v4);
+
+ } else {
+ h64 = seed + XXH_PRIME64_5;
+ }
+
+ h64 += (xxh_u64) len;
+
+ return XXH64_finalize(h64, input, len, align);
+}
+
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
+{
+#if 0
+ /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+ XXH64_state_t state;
+ XXH64_reset(&state, seed);
+ XXH64_update(&state, (const xxh_u8*)input, len);
+ return XXH64_digest(&state);
+#else
+ if (XXH_FORCE_ALIGN_CHECK) {
+ if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
+ return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
+ } }
+
+ return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
+
+#endif
+}
+
+/******* Hash Streaming *******/
+
+/*! @ingroup xxh64_family*/
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
+{
+ return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+ XXH_free(statePtr);
+ return XXH_OK;
+}
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
+{
+ memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
+{
+ XXH64_state_t state; /* use a local state to memcpy() in order to avoid strict-aliasing warnings */
+ memset(&state, 0, sizeof(state));
+ state.v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
+ state.v2 = seed + XXH_PRIME64_2;
+ state.v3 = seed + 0;
+ state.v4 = seed - XXH_PRIME64_1;
+ /* do not write into reserved64, might be removed in a future version */
+ memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));
+ return XXH_OK;
+}
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH64_update (XXH64_state_t* state, const void* input, size_t len)
+{
+ if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+ return XXH_OK;
+#else
+ return XXH_ERROR;
+#endif
+
+ { const xxh_u8* p = (const xxh_u8*)input;
+ const xxh_u8* const bEnd = p + len;
+
+ state->total_len += len;
+
+ if (state->memsize + len < 32) { /* fill in tmp buffer */
+ XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
+ state->memsize += (xxh_u32)len;
+ return XXH_OK;
+ }
+
+ if (state->memsize) { /* tmp buffer is full */
+ XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
+ state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0));
+ state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1));
+ state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2));
+ state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3));
+ p += 32 - state->memsize;
+ state->memsize = 0;
+ }
+
+ if (p+32 <= bEnd) {
+ const xxh_u8* const limit = bEnd - 32;
+ xxh_u64 v1 = state->v1;
+ xxh_u64 v2 = state->v2;
+ xxh_u64 v3 = state->v3;
+ xxh_u64 v4 = state->v4;
+
+ do {
+ v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8;
+ v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8;
+ v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8;
+ v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8;
+ } while (p<=limit);
+
+ state->v1 = v1;
+ state->v2 = v2;
+ state->v3 = v3;
+ state->v4 = v4;
+ }
+
+ if (p < bEnd) {
+ XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
+ state->memsize = (unsigned)(bEnd-p);
+ }
+ }
+
+ return XXH_OK;
+}
+
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)
+{
+ xxh_u64 h64;
+
+ if (state->total_len >= 32) {
+ xxh_u64 const v1 = state->v1;
+ xxh_u64 const v2 = state->v2;
+ xxh_u64 const v3 = state->v3;
+ xxh_u64 const v4 = state->v4;
+
+ h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+ h64 = XXH64_mergeRound(h64, v1);
+ h64 = XXH64_mergeRound(h64, v2);
+ h64 = XXH64_mergeRound(h64, v3);
+ h64 = XXH64_mergeRound(h64, v4);
+ } else {
+ h64 = state->v3 /*seed*/ + XXH_PRIME64_5;
+ }
+
+ h64 += (xxh_u64) state->total_len;
+
+ return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
+}
+
+
+/******* Canonical representation *******/
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+{
+ XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+ if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
+ memcpy(dst, &hash, sizeof(*dst));
+}
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+{
+ return XXH_readBE64(src);
+}
+
+
+
+/* *********************************************************************
+* XXH3
+* New generation hash designed for speed on small keys and vectorization
+************************************************************************ */
+/*!
+ * @}
+ * @defgroup xxh3_impl XXH3 implementation
+ * @ingroup impl
+ * @{
+ */
+
+/* === Compiler specifics === */
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */
+# define XXH_RESTRICT restrict
+#else
+/* Note: it might be useful to define __restrict or __restrict__ for some C++ compilers */
+# define XXH_RESTRICT /* disable */
+#endif
+
+#if (defined(__GNUC__) && (__GNUC__ >= 3)) \
+ || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
+ || defined(__clang__)
+# define XXH_likely(x) __builtin_expect(x, 1)
+# define XXH_unlikely(x) __builtin_expect(x, 0)
+#else
+# define XXH_likely(x) (x)
+# define XXH_unlikely(x) (x)
+#endif
+
+#if defined(__GNUC__)
+# if defined(__AVX2__)
+# include <immintrin.h>
+# elif defined(__SSE2__)
+# include <emmintrin.h>
+# elif defined(__ARM_NEON__) || defined(__ARM_NEON)
+# define inline __inline__ /* circumvent a clang bug */
+# include <arm_neon.h>
+# undef inline
+# endif
+#elif defined(_MSC_VER)
+# include <intrin.h>
+#endif
+
+/*
+ * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
+ * remaining a true 64-bit/128-bit hash function.
+ *
+ * This is done by prioritizing a subset of 64-bit operations that can be
+ * emulated without too many steps on the average 32-bit machine.
+ *
+ * For example, these two lines seem similar, and run equally fast on 64-bit:
+ *
+ * xxh_u64 x;
+ * x ^= (x >> 47); // good
+ * x ^= (x >> 13); // bad
+ *
+ * However, to a 32-bit machine, there is a major difference.
+ *
+ * x ^= (x >> 47) looks like this:
+ *
+ * x.lo ^= (x.hi >> (47 - 32));
+ *
+ * while x ^= (x >> 13) looks like this:
+ *
+ * // note: funnel shifts are not usually cheap.
+ * x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
+ * x.hi ^= (x.hi >> 13);
+ *
+ * The first one is significantly faster than the second, simply because the
+ * shift is larger than 32. This means:
+ * - All the bits we need are in the upper 32 bits, so we can ignore the lower
+ * 32 bits in the shift.
+ * - The shift result will always fit in the lower 32 bits, and therefore,
+ * we can ignore the upper 32 bits in the xor.
+ *
+ * Thanks to this optimization, XXH3 only requires these features to be efficient:
+ *
+ * - Usable unaligned access
+ * - A 32-bit or 64-bit ALU
+ * - If 32-bit, a decent ADC instruction
+ * - A 32 or 64-bit multiply with a 64-bit result
+ * - For the 128-bit variant, a decent byteswap helps short inputs.
+ *
+ * The first two are already required by XXH32, and almost all 32-bit and 64-bit
+ * platforms which can run XXH32 can run XXH3 efficiently.
+ *
+ * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one
+ * notable exception.
+ *
+ * First of all, Thumb-1 lacks support for the UMULL instruction which
+ * performs the important long multiply. This means numerous __aeabi_lmul
+ * calls.
+ *
+ * Second of all, the 8 functional registers are just not enough.
+ * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need
+ * Lo registers, and this shuffling results in thousands more MOVs than A32.
+ *
+ * A32 and T32 don't have this limitation. They can access all 14 registers,
+ * do a 32->64 multiply with UMULL, and the flexible operand allowing free
+ * shifts is helpful, too.
+ *
+ * Therefore, we do a quick sanity check.
+ *
+ * If compiling Thumb-1 for a target which supports ARM instructions, we will
+ * emit a warning, as it is not a "sane" platform to compile for.
+ *
+ * Usually, if this happens, it is because of an accident and you probably need
+ * to specify -march, as you likely meant to compile for a newer architecture.
+ *
+ * Credit: large sections of the vectorial and asm source code paths
+ * have been contributed by @easyaspi314
+ */
+#if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
+# warning "XXH3 is highly inefficient without ARM or Thumb-2."
+#endif
+
+/* ==========================================
+ * Vectorization detection
+ * ========================================== */
+
+#ifdef XXH_DOXYGEN
+/*!
+ * @ingroup tuning
+ * @brief Overrides the vectorization implementation chosen for XXH3.
+ *
+ * Can be defined to 0 to disable SIMD or any of the values mentioned in
+ * @ref XXH_VECTOR_TYPE.
+ *
+ * If this is not defined, it uses predefined macros to determine the best
+ * implementation.
+ */
+# define XXH_VECTOR XXH_SCALAR
+/*!
+ * @ingroup tuning
+ * @brief Possible values for @ref XXH_VECTOR.
+ *
+ * Note that these are actually implemented as macros.
+ *
+ * If this is not defined, it is detected automatically.
+ * @ref XXH_X86DISPATCH overrides this.
+ */
+enum XXH_VECTOR_TYPE /* fake enum */ {
+ XXH_SCALAR = 0, /*!< Portable scalar version */
+ XXH_SSE2 = 1, /*!<
+ * SSE2 for Pentium 4, Opteron, all x86_64.
+ *
+ * @note SSE2 is also guaranteed on Windows 10, macOS, and
+ * Android x86.
+ */
+ XXH_AVX2 = 2, /*!< AVX2 for Haswell and Bulldozer */
+ XXH_AVX512 = 3, /*!< AVX512 for Skylake and Icelake */
+ XXH_NEON = 4, /*!< NEON for most ARMv7-A and all AArch64 */
+ XXH_VSX = 5, /*!< VSX and ZVector for POWER8/z13 (64-bit) */
+};
+/*!
+ * @ingroup tuning
+ * @brief Selects the minumum alignment for XXH3's accumulators.
+ *
+ * When using SIMD, this should match the alignment reqired for said vector
+ * type, so, for example, 32 for AVX2.
+ *
+ * Default: Auto detected.
+ */
+# define XXH_ACC_ALIGN 8
+#endif
+
+/* Actual definition */
+#ifndef XXH_DOXYGEN
+# define XXH_SCALAR 0
+# define XXH_SSE2 1
+# define XXH_AVX2 2
+# define XXH_AVX512 3
+# define XXH_NEON 4
+# define XXH_VSX 5
+#endif
+
+#ifndef XXH_VECTOR /* can be defined on command line */
+# if defined(__AVX512F__)
+# define XXH_VECTOR XXH_AVX512
+# elif defined(__AVX2__)
+# define XXH_VECTOR XXH_AVX2
+# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
+# define XXH_VECTOR XXH_SSE2
+# elif defined(__GNUC__) /* msvc support maybe later */ \
+ && (defined(__ARM_NEON__) || defined(__ARM_NEON)) \
+ && (defined(__LITTLE_ENDIAN__) /* We only support little endian NEON */ \
+ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+# define XXH_VECTOR XXH_NEON
+# elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
+ || (defined(__s390x__) && defined(__VEC__)) \
+ && defined(__GNUC__) /* TODO: IBM XL */
+# define XXH_VECTOR XXH_VSX
+# else
+# define XXH_VECTOR XXH_SCALAR
+# endif
+#endif
+
+/*
+ * Controls the alignment of the accumulator,
+ * for compatibility with aligned vector loads, which are usually faster.
+ */
+#ifndef XXH_ACC_ALIGN
+# if defined(XXH_X86DISPATCH)
+# define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */
+# elif XXH_VECTOR == XXH_SCALAR /* scalar */
+# define XXH_ACC_ALIGN 8
+# elif XXH_VECTOR == XXH_SSE2 /* sse2 */
+# define XXH_ACC_ALIGN 16
+# elif XXH_VECTOR == XXH_AVX2 /* avx2 */
+# define XXH_ACC_ALIGN 32
+# elif XXH_VECTOR == XXH_NEON /* neon */
+# define XXH_ACC_ALIGN 16
+# elif XXH_VECTOR == XXH_VSX /* vsx */
+# define XXH_ACC_ALIGN 16
+# elif XXH_VECTOR == XXH_AVX512 /* avx512 */
+# define XXH_ACC_ALIGN 64
+# endif
+#endif
+
+#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
+ || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
+# define XXH_SEC_ALIGN XXH_ACC_ALIGN
+#else
+# define XXH_SEC_ALIGN 8
+#endif
+
+/*
+ * UGLY HACK:
+ * GCC usually generates the best code with -O3 for xxHash.
+ *
+ * However, when targeting AVX2, it is overzealous in its unrolling resulting
+ * in code roughly 3/4 the speed of Clang.
+ *
+ * There are other issues, such as GCC splitting _mm256_loadu_si256 into
+ * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which
+ * only applies to Sandy and Ivy Bridge... which don't even support AVX2.
+ *
+ * That is why when compiling the AVX2 version, it is recommended to use either
+ * -O2 -mavx2 -march=haswell
+ * or
+ * -O2 -mavx2 -mno-avx256-split-unaligned-load
+ * for decent performance, or to use Clang instead.
+ *
+ * Fortunately, we can control the first one with a pragma that forces GCC into
+ * -O2, but the other one we can't control without "failed to inline always
+ * inline function due to target mismatch" warnings.
+ */
+#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
+ && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+ && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
+# pragma GCC push_options
+# pragma GCC optimize("-O2")
+#endif
+
+
+#if XXH_VECTOR == XXH_NEON
+/*
+ * NEON's setup for vmlal_u32 is a little more complicated than it is on
+ * SSE2, AVX2, and VSX.
+ *
+ * While PMULUDQ and VMULEUW both perform a mask, VMLAL.U32 performs an upcast.
+ *
+ * To do the same operation, the 128-bit 'Q' register needs to be split into
+ * two 64-bit 'D' registers, performing this operation::
+ *
+ * [ a | b ]
+ * | '---------. .--------' |
+ * | x |
+ * | .---------' '--------. |
+ * [ a & 0xFFFFFFFF | b & 0xFFFFFFFF ],[ a >> 32 | b >> 32 ]
+ *
+ * Due to significant changes in aarch64, the fastest method for aarch64 is
+ * completely different than the fastest method for ARMv7-A.
+ *
+ * ARMv7-A treats D registers as unions overlaying Q registers, so modifying
+ * D11 will modify the high half of Q5. This is similar to how modifying AH
+ * will only affect bits 8-15 of AX on x86.
+ *
+ * VZIP takes two registers, and puts even lanes in one register and odd lanes
+ * in the other.
+ *
+ * On ARMv7-A, this strangely modifies both parameters in place instead of
+ * taking the usual 3-operand form.
+ *
+ * Therefore, if we want to do this, we can simply use a D-form VZIP.32 on the
+ * lower and upper halves of the Q register to end up with the high and low
+ * halves where we want - all in one instruction.
+ *
+ * vzip.32 d10, d11 @ d10 = { d10[0], d11[0] }; d11 = { d10[1], d11[1] }
+ *
+ * Unfortunately we need inline assembly for this: Instructions modifying two
+ * registers at once is not possible in GCC or Clang's IR, and they have to
+ * create a copy.
+ *
+ * aarch64 requires a different approach.
+ *
+ * In order to make it easier to write a decent compiler for aarch64, many
+ * quirks were removed, such as conditional execution.
+ *
+ * NEON was also affected by this.
+ *
+ * aarch64 cannot access the high bits of a Q-form register, and writes to a
+ * D-form register zero the high bits, similar to how writes to W-form scalar
+ * registers (or DWORD registers on x86_64) work.
+ *
+ * The formerly free vget_high intrinsics now require a vext (with a few
+ * exceptions)
+ *
+ * Additionally, VZIP was replaced by ZIP1 and ZIP2, which are the equivalent
+ * of PUNPCKL* and PUNPCKH* in SSE, respectively, in order to only modify one
+ * operand.
+ *
+ * The equivalent of the VZIP.32 on the lower and upper halves would be this
+ * mess:
+ *
+ * ext v2.4s, v0.4s, v0.4s, #2 // v2 = { v0[2], v0[3], v0[0], v0[1] }
+ * zip1 v1.2s, v0.2s, v2.2s // v1 = { v0[0], v2[0] }
+ * zip2 v0.2s, v0.2s, v1.2s // v0 = { v0[1], v2[1] }
+ *
+ * Instead, we use a literal downcast, vmovn_u64 (XTN), and vshrn_n_u64 (SHRN):
+ *
+ * shrn v1.2s, v0.2d, #32 // v1 = (uint32x2_t)(v0 >> 32);
+ * xtn v0.2s, v0.2d // v0 = (uint32x2_t)(v0 & 0xFFFFFFFF);
+ *
+ * This is available on ARMv7-A, but is less efficient than a single VZIP.32.
+ */
+
+/*!
+ * Function-like macro:
+ * void XXH_SPLIT_IN_PLACE(uint64x2_t &in, uint32x2_t &outLo, uint32x2_t &outHi)
+ * {
+ * outLo = (uint32x2_t)(in & 0xFFFFFFFF);
+ * outHi = (uint32x2_t)(in >> 32);
+ * in = UNDEFINED;
+ * }
+ */
+# if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
+ && defined(__GNUC__) \
+ && !defined(__aarch64__) && !defined(__arm64__)
+# define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \
+ do { \
+ /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \
+ /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 */ \
+ /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 */ \
+ __asm__("vzip.32 %e0, %f0" : "+w" (in)); \
+ (outLo) = vget_low_u32 (vreinterpretq_u32_u64(in)); \
+ (outHi) = vget_high_u32(vreinterpretq_u32_u64(in)); \
+ } while (0)
+# else
+# define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \
+ do { \
+ (outLo) = vmovn_u64 (in); \
+ (outHi) = vshrn_n_u64 ((in), 32); \
+ } while (0)
+# endif
+#endif /* XXH_VECTOR == XXH_NEON */
+
+/*
+ * VSX and Z Vector helpers.
+ *
+ * This is very messy, and any pull requests to clean this up are welcome.
+ *
+ * There are a lot of problems with supporting VSX and s390x, due to
+ * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
+ */
+#if XXH_VECTOR == XXH_VSX
+# if defined(__s390x__)
+# include <s390intrin.h>
+# else
+/* gcc's altivec.h can have the unwanted consequence to unconditionally
+ * #define bool, vector, and pixel keywords,
+ * with bad consequences for programs already using these keywords for other purposes.
+ * The paragraph defining these macros is skipped when __APPLE_ALTIVEC__ is defined.
+ * __APPLE_ALTIVEC__ is _generally_ defined automatically by the compiler,
+ * but it seems that, in some cases, it isn't.
+ * Force the build macro to be defined, so that keywords are not altered.
+ */
+# if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__)
+# define __APPLE_ALTIVEC__
+# endif
+# include <altivec.h>
+# endif
+
+typedef __vector unsigned long long xxh_u64x2;
+typedef __vector unsigned char xxh_u8x16;
+typedef __vector unsigned xxh_u32x4;
+
+# ifndef XXH_VSX_BE
+# if defined(__BIG_ENDIAN__) \
+ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+# define XXH_VSX_BE 1
+# elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
+# warning "-maltivec=be is not recommended. Please use native endianness."
+# define XXH_VSX_BE 1
+# else
+# define XXH_VSX_BE 0
+# endif
+# endif /* !defined(XXH_VSX_BE) */
+
+# if XXH_VSX_BE
+# if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
+# define XXH_vec_revb vec_revb
+# else
+/*!
+ * A polyfill for POWER9's vec_revb().
+ */
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
+{
+ xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
+ 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
+ return vec_perm(val, val, vByteSwap);
+}
+# endif
+# endif /* XXH_VSX_BE */
+
+/*!
+ * Performs an unaligned vector load and byte swaps it on big endian.
+ */
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
+{
+ xxh_u64x2 ret;
+ memcpy(&ret, ptr, sizeof(xxh_u64x2));
+# if XXH_VSX_BE
+ ret = XXH_vec_revb(ret);
+# endif
+ return ret;
+}
+
+/*
+ * vec_mulo and vec_mule are very problematic intrinsics on PowerPC
+ *
+ * These intrinsics weren't added until GCC 8, despite existing for a while,
+ * and they are endian dependent. Also, their meaning swap depending on version.
+ * */
+# if defined(__s390x__)
+ /* s390x is always big endian, no issue on this platform */
+# define XXH_vec_mulo vec_mulo
+# define XXH_vec_mule vec_mule
+# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw)
+/* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
+# define XXH_vec_mulo __builtin_altivec_vmulouw
+# define XXH_vec_mule __builtin_altivec_vmuleuw
+# else
+/* gcc needs inline assembly */
+/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b)
+{
+ xxh_u64x2 result;
+ __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
+ return result;
+}
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
+{
+ xxh_u64x2 result;
+ __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
+ return result;
+}
+# endif /* XXH_vec_mulo, XXH_vec_mule */
+#endif /* XXH_VECTOR == XXH_VSX */
+
+
+/* prefetch
+ * can be disabled, by declaring XXH_NO_PREFETCH build macro */
+#if defined(XXH_NO_PREFETCH)
+# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
+#else
+# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */
+# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+# define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
+# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
+# define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+# else
+# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
+# endif
+#endif /* XXH_NO_PREFETCH */
+
+
+/* ==========================================
+ * XXH3 default settings
+ * ========================================== */
+
+#define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */
+
+#if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
+# error "default keyset is not large enough"
+#endif
+
+/*! Pseudorandom secret taken directly from FARSH. */
+XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
+ 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
+ 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
+ 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
+ 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
+ 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
+ 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
+ 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
+ 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
+ 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
+ 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
+ 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
+ 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
+};
+
+
+#ifdef XXH_OLD_NAMES
+# define kSecret XXH3_kSecret
+#endif
+
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Calculates a 32-bit to 64-bit long multiply.
+ *
+ * Implemented as a macro.
+ *
+ * Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it doesn't
+ * need to (but it shouldn't need to anyways, it is about 7 instructions to do
+ * a 64x64 multiply...). Since we know that this will _always_ emit `MULL`, we
+ * use that instead of the normal method.
+ *
+ * If you are compiling for platforms like Thumb-1 and don't have a better option,
+ * you may also want to write your own long multiply routine here.
+ *
+ * @param x, y Numbers to be multiplied
+ * @return 64-bit product of the low 32 bits of @p x and @p y.
+ */
+XXH_FORCE_INLINE xxh_u64
+XXH_mult32to64(xxh_u64 x, xxh_u64 y)
+{
+ return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
+}
+#elif defined(_MSC_VER) && defined(_M_IX86)
+# include <intrin.h>
+# define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
+#else
+/*
+ * Downcast + upcast is usually better than masking on older compilers like
+ * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.
+ *
+ * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands
+ * and perform a full 64x64 multiply -- entirely redundant on 32-bit.
+ */
+# define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
+#endif
+
+/*!
+ * @brief Calculates a 64->128-bit long multiply.
+ *
+ * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
+ * version.
+ *
+ * @param lhs, rhs The 64-bit integers to be multiplied
+ * @return The 128-bit result represented in an @ref XXH128_hash_t.
+ */
+static XXH128_hash_t
+XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
+{
+ /*
+ * GCC/Clang __uint128_t method.
+ *
+ * On most 64-bit targets, GCC and Clang define a __uint128_t type.
+ * This is usually the best way as it usually uses a native long 64-bit
+ * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
+ *
+ * Usually.
+ *
+ * Despite being a 32-bit platform, Clang (and emscripten) define this type
+ * despite not having the arithmetic for it. This results in a laggy
+ * compiler builtin call which calculates a full 128-bit multiply.
+ * In that case it is best to use the portable one.
+ * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
+ */
+#if defined(__GNUC__) && !defined(__wasm__) \
+ && defined(__SIZEOF_INT128__) \
+ || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
+
+ __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs;
+ XXH128_hash_t r128;
+ r128.low64 = (xxh_u64)(product);
+ r128.high64 = (xxh_u64)(product >> 64);
+ return r128;
+
+ /*
+ * MSVC for x64's _umul128 method.
+ *
+ * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);
+ *
+ * This compiles to single operand MUL on x64.
+ */
+#elif defined(_M_X64) || defined(_M_IA64)
+
+#ifndef _MSC_VER
+# pragma intrinsic(_umul128)
+#endif
+ xxh_u64 product_high;
+ xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
+ XXH128_hash_t r128;
+ r128.low64 = product_low;
+ r128.high64 = product_high;
+ return r128;
+
+#else
+ /*
+ * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
+ *
+ * This is a fast and simple grade school multiply, which is shown below
+ * with base 10 arithmetic instead of base 0x100000000.
+ *
+ * 9 3 // D2 lhs = 93
+ * x 7 5 // D2 rhs = 75
+ * ----------
+ * 1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15
+ * 4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45
+ * 2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21
+ * + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63
+ * ---------
+ * 2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27
+ * + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67
+ * ---------
+ * 6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975
+ *
+ * The reasons for adding the products like this are:
+ * 1. It avoids manual carry tracking. Just like how
+ * (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX.
+ * This avoids a lot of complexity.
+ *
+ * 2. It hints for, and on Clang, compiles to, the powerful UMAAL
+ * instruction available in ARM's Digital Signal Processing extension
+ * in 32-bit ARMv6 and later, which is shown below:
+ *
+ * void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)
+ * {
+ * xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm;
+ * *RdLo = (xxh_u32)(product & 0xFFFFFFFF);
+ * *RdHi = (xxh_u32)(product >> 32);
+ * }
+ *
+ * This instruction was designed for efficient long multiplication, and
+ * allows this to be calculated in only 4 instructions at speeds
+ * comparable to some 64-bit ALUs.
+ *
+ * 3. It isn't terrible on other platforms. Usually this will be a couple
+ * of 32-bit ADD/ADCs.
+ */
+
+ /* First calculate all of the cross products. */
+ xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
+ xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF);
+ xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
+ xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32);
+
+ /* Now add the products together. These will never overflow. */
+ xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
+ xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
+ xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
+
+ XXH128_hash_t r128;
+ r128.low64 = lower;
+ r128.high64 = upper;
+ return r128;
+#endif
+}
+
+/*!
+ * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it.
+ *
+ * The reason for the separate function is to prevent passing too many structs
+ * around by value. This will hopefully inline the multiply, but we don't force it.
+ *
+ * @param lhs, rhs The 64-bit integers to multiply
+ * @return The low 64 bits of the product XOR'd by the high 64 bits.
+ * @see XXH_mult64to128()
+ */
+static xxh_u64
+XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
+{
+ XXH128_hash_t product = XXH_mult64to128(lhs, rhs);
+ return product.low64 ^ product.high64;
+}
+
+/*! Seems to produce slightly better code on GCC for some reason. */
+XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
+{
+ XXH_ASSERT(0 <= shift && shift < 64);
+ return v64 ^ (v64 >> shift);
+}
+
+/*
+ * This is a fast avalanche stage,
+ * suitable when input bits are already partially mixed
+ */
+static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)
+{
+ h64 = XXH_xorshift64(h64, 37);
+ h64 *= 0x165667919E3779F9ULL;
+ h64 = XXH_xorshift64(h64, 32);
+ return h64;
+}
+
+/*
+ * This is a stronger avalanche,
+ * inspired by Pelle Evensen's rrmxmx
+ * preferable when input has not been previously mixed
+ */
+static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
+{
+ /* this mix is inspired by Pelle Evensen's rrmxmx */
+ h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
+ h64 *= 0x9FB21C651E98DF25ULL;
+ h64 ^= (h64 >> 35) + len ;
+ h64 *= 0x9FB21C651E98DF25ULL;
+ return XXH_xorshift64(h64, 28);
+}
+
+
+/* ==========================================
+ * Short keys
+ * ==========================================
+ * One of the shortcomings of XXH32 and XXH64 was that their performance was
+ * sub-optimal on short lengths. It used an iterative algorithm which strongly
+ * favored lengths that were a multiple of 4 or 8.
+ *
+ * Instead of iterating over individual inputs, we use a set of single shot
+ * functions which piece together a range of lengths and operate in constant time.
+ *
+ * Additionally, the number of multiplies has been significantly reduced. This
+ * reduces latency, especially when emulating 64-bit multiplies on 32-bit.
+ *
+ * Depending on the platform, this may or may not be faster than XXH32, but it
+ * is almost guaranteed to be faster than XXH64.
+ */
+
+/*
+ * At very short lengths, there isn't enough input to fully hide secrets, or use
+ * the entire secret.
+ *
+ * There is also only a limited amount of mixing we can do before significantly
+ * impacting performance.
+ *
+ * Therefore, we use different sections of the secret and always mix two secret
+ * samples with an XOR. This should have no effect on performance on the
+ * seedless or withSeed variants because everything _should_ be constant folded
+ * by modern compilers.
+ *
+ * The XOR mixing hides individual parts of the secret and increases entropy.
+ *
+ * This adds an extra layer of strength for custom secrets.
+ */
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+ XXH_ASSERT(input != NULL);
+ XXH_ASSERT(1 <= len && len <= 3);
+ XXH_ASSERT(secret != NULL);
+ /*
+ * len = 1: combined = { input[0], 0x01, input[0], input[0] }
+ * len = 2: combined = { input[1], 0x02, input[0], input[1] }
+ * len = 3: combined = { input[2], 0x03, input[0], input[1] }
+ */
+ { xxh_u8 const c1 = input[0];
+ xxh_u8 const c2 = input[len >> 1];
+ xxh_u8 const c3 = input[len - 1];
+ xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2 << 24)
+ | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
+ xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
+ xxh_u64 const keyed = (xxh_u64)combined ^ bitflip;
+ return XXH64_avalanche(keyed);
+ }
+}
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+ XXH_ASSERT(input != NULL);
+ XXH_ASSERT(secret != NULL);
+ XXH_ASSERT(4 <= len && len < 8);
+ seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
+ { xxh_u32 const input1 = XXH_readLE32(input);
+ xxh_u32 const input2 = XXH_readLE32(input + len - 4);
+ xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed;
+ xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32);
+ xxh_u64 const keyed = input64 ^ bitflip;
+ return XXH3_rrmxmx(keyed, len);
+ }
+}
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+ XXH_ASSERT(input != NULL);
+ XXH_ASSERT(secret != NULL);
+ XXH_ASSERT(8 <= len && len <= 16);
+ { xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed;
+ xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed;
+ xxh_u64 const input_lo = XXH_readLE64(input) ^ bitflip1;
+ xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2;
+ xxh_u64 const acc = len
+ + XXH_swap64(input_lo) + input_hi
+ + XXH3_mul128_fold64(input_lo, input_hi);
+ return XXH3_avalanche(acc);
+ }
+}
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+ XXH_ASSERT(len <= 16);
+ { if (XXH_likely(len > 8)) return XXH3_len_9to16_64b(input, len, secret, seed);
+ if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed);
+ if (len) return XXH3_len_1to3_64b(input, len, secret, seed);
+ return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64)));
+ }
+}
+
+/*
+ * DISCLAIMER: There are known *seed-dependent* multicollisions here due to
+ * multiplication by zero, affecting hashes of lengths 17 to 240.
+ *
+ * However, they are very unlikely.
+ *
+ * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all
+ * unseeded non-cryptographic hashes, it does not attempt to defend itself
+ * against specially crafted inputs, only random inputs.
+ *
+ * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes
+ * cancelling out the secret is taken an arbitrary number of times (addressed
+ * in XXH3_accumulate_512), this collision is very unlikely with random inputs
+ * and/or proper seeding:
+ *
+ * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a
+ * function that is only called up to 16 times per hash with up to 240 bytes of
+ * input.
+ *
+ * This is not too bad for a non-cryptographic hash function, especially with
+ * only 64 bit outputs.
+ *
+ * The 128-bit variant (which trades some speed for strength) is NOT affected
+ * by this, although it is always a good idea to use a proper seed if you care
+ * about strength.
+ */
+XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
+ const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64)
+{
+#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+ && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \
+ && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable like XXH32 hack */
+ /*
+ * UGLY HACK:
+ * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in
+ * slower code.
+ *
+ * By forcing seed64 into a register, we disrupt the cost model and
+ * cause it to scalarize. See `XXH32_round()`
+ *
+ * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600,
+ * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on
+ * GCC 9.2, despite both emitting scalar code.
+ *
+ * GCC generates much better scalar code than Clang for the rest of XXH3,
+ * which is why finding a more optimal codepath is an interest.
+ */
+ __asm__ ("" : "+r" (seed64));
+#endif
+ { xxh_u64 const input_lo = XXH_readLE64(input);
+ xxh_u64 const input_hi = XXH_readLE64(input+8);
+ return XXH3_mul128_fold64(
+ input_lo ^ (XXH_readLE64(secret) + seed64),
+ input_hi ^ (XXH_readLE64(secret+8) - seed64)
+ );
+ }
+}
+
+/* For mid range keys, XXH3 uses a Mum-hash variant. */
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
+ const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+ XXH64_hash_t seed)
+{
+ XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+ XXH_ASSERT(16 < len && len <= 128);
+
+ { xxh_u64 acc = len * XXH_PRIME64_1;
+ if (len > 32) {
+ if (len > 64) {
+ if (len > 96) {
+ acc += XXH3_mix16B(input+48, secret+96, seed);
+ acc += XXH3_mix16B(input+len-64, secret+112, seed);
+ }
+ acc += XXH3_mix16B(input+32, secret+64, seed);
+ acc += XXH3_mix16B(input+len-48, secret+80, seed);
+ }
+ acc += XXH3_mix16B(input+16, secret+32, seed);
+ acc += XXH3_mix16B(input+len-32, secret+48, seed);
+ }
+ acc += XXH3_mix16B(input+0, secret+0, seed);
+ acc += XXH3_mix16B(input+len-16, secret+16, seed);
+
+ return XXH3_avalanche(acc);
+ }
+}
+
+#define XXH3_MIDSIZE_MAX 240
+
+XXH_NO_INLINE XXH64_hash_t
+XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
+ const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+ XXH64_hash_t seed)
+{
+ XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+ XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
+
+ #define XXH3_MIDSIZE_STARTOFFSET 3
+ #define XXH3_MIDSIZE_LASTOFFSET 17
+
+ { xxh_u64 acc = len * XXH_PRIME64_1;
+ int const nbRounds = (int)len / 16;
+ int i;
+ for (i=0; i<8; i++) {
+ acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
+ }
+ acc = XXH3_avalanche(acc);
+ XXH_ASSERT(nbRounds >= 8);
+#if defined(__clang__) /* Clang */ \
+ && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
+ && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */
+ /*
+ * UGLY HACK:
+ * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.
+ * In everywhere else, it uses scalar code.
+ *
+ * For 64->128-bit multiplies, even if the NEON was 100% optimal, it
+ * would still be slower than UMAAL (see XXH_mult64to128).
+ *
+ * Unfortunately, Clang doesn't handle the long multiplies properly and
+ * converts them to the nonexistent "vmulq_u64" intrinsic, which is then
+ * scalarized into an ugly mess of VMOV.32 instructions.
+ *
+ * This mess is difficult to avoid without turning autovectorization
+ * off completely, but they are usually relatively minor and/or not
+ * worth it to fix.
+ *
+ * This loop is the easiest to fix, as unlike XXH32, this pragma
+ * _actually works_ because it is a loop vectorization instead of an
+ * SLP vectorization.
+ */
+ #pragma clang loop vectorize(disable)
+#endif
+ for (i=8 ; i < nbRounds; i++) {
+ acc += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
+ }
+ /* last bytes */
+ acc += XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
+ return XXH3_avalanche(acc);
+ }
+}
+
+
+/* ======= Long Keys ======= */
+
+#define XXH_STRIPE_LEN 64
+#define XXH_SECRET_CONSUME_RATE 8 /* nb of secret bytes consumed at each accumulation */
+#define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
+
+#ifdef XXH_OLD_NAMES
+# define STRIPE_LEN XXH_STRIPE_LEN
+# define ACC_NB XXH_ACC_NB
+#endif
+
+XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
+{
+ if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
+ memcpy(dst, &v64, sizeof(v64));
+}
+
+/* Several intrinsic functions below are supposed to accept __int64 as argument,
+ * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ .
+ * However, several environments do not define __int64 type,
+ * requiring a workaround.
+ */
+#if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+ typedef int64_t xxh_i64;
+#else
+ /* the following type must have a width of 64-bit */
+ typedef long long xxh_i64;
+#endif
+
+/*
+ * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
+ *
+ * It is a hardened version of UMAC, based off of FARSH's implementation.
+ *
+ * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
+ * implementations, and it is ridiculously fast.
+ *
+ * We harden it by mixing the original input to the accumulators as well as the product.
+ *
+ * This means that in the (relatively likely) case of a multiply by zero, the
+ * original input is preserved.
+ *
+ * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
+ * cross-pollination, as otherwise the upper and lower halves would be
+ * essentially independent.
+ *
+ * This doesn't matter on 64-bit hashes since they all get merged together in
+ * the end, so we skip the extra step.
+ *
+ * Both XXH3_64bits and XXH3_128bits use this subroutine.
+ */
+
+#if (XXH_VECTOR == XXH_AVX512) \
+ || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)
+
+#ifndef XXH_TARGET_AVX512
+# define XXH_TARGET_AVX512 /* disable attribute target */
+#endif
+
+XXH_FORCE_INLINE XXH_TARGET_AVX512 void
+XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
+ const void* XXH_RESTRICT input,
+ const void* XXH_RESTRICT secret)
+{
+ XXH_ALIGN(64) __m512i* const xacc = (__m512i *) acc;
+ XXH_ASSERT((((size_t)acc) & 63) == 0);
+ XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
+
+ {
+ /* data_vec = input[0]; */
+ __m512i const data_vec = _mm512_loadu_si512 (input);
+ /* key_vec = secret[0]; */
+ __m512i const key_vec = _mm512_loadu_si512 (secret);
+ /* data_key = data_vec ^ key_vec; */
+ __m512i const data_key = _mm512_xor_si512 (data_vec, key_vec);
+ /* data_key_lo = data_key >> 32; */
+ __m512i const data_key_lo = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
+ /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+ __m512i const product = _mm512_mul_epu32 (data_key, data_key_lo);
+ /* xacc[0] += swap(data_vec); */
+ __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
+ __m512i const sum = _mm512_add_epi64(*xacc, data_swap);
+ /* xacc[0] += product; */
+ *xacc = _mm512_add_epi64(product, sum);
+ }
+}
+
+/*
+ * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
+ *
+ * Multiplication isn't perfect, as explained by Google in HighwayHash:
+ *
+ * // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to
+ * // varying degrees. In descending order of goodness, bytes
+ * // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32.
+ * // As expected, the upper and lower bytes are much worse.
+ *
+ * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291
+ *
+ * Since our algorithm uses a pseudorandom secret to add some variance into the
+ * mix, we don't need to (or want to) mix as often or as much as HighwayHash does.
+ *
+ * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid
+ * extraction.
+ *
+ * Both XXH3_64bits and XXH3_128bits use this subroutine.
+ */
+
+XXH_FORCE_INLINE XXH_TARGET_AVX512 void
+XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+ XXH_ASSERT((((size_t)acc) & 63) == 0);
+ XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
+ { XXH_ALIGN(64) __m512i* const xacc = (__m512i*) acc;
+ const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
+
+ /* xacc[0] ^= (xacc[0] >> 47) */
+ __m512i const acc_vec = *xacc;
+ __m512i const shifted = _mm512_srli_epi64 (acc_vec, 47);
+ __m512i const data_vec = _mm512_xor_si512 (acc_vec, shifted);
+ /* xacc[0] ^= secret; */
+ __m512i const key_vec = _mm512_loadu_si512 (secret);
+ __m512i const data_key = _mm512_xor_si512 (data_vec, key_vec);
+
+ /* xacc[0] *= XXH_PRIME32_1; */
+ __m512i const data_key_hi = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
+ __m512i const prod_lo = _mm512_mul_epu32 (data_key, prime32);
+ __m512i const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32);
+ *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
+ }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_AVX512 void
+XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+ XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);
+ XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);
+ XXH_ASSERT(((size_t)customSecret & 63) == 0);
+ (void)(&XXH_writeLE64);
+ { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
+ __m512i const seed = _mm512_mask_set1_epi64(_mm512_set1_epi64((xxh_i64)seed64), 0xAA, -(xxh_i64)seed64);
+
+ XXH_ALIGN(64) const __m512i* const src = (const __m512i*) XXH3_kSecret;
+ XXH_ALIGN(64) __m512i* const dest = ( __m512i*) customSecret;
+ int i;
+ for (i=0; i < nbRounds; ++i) {
+ /* GCC has a bug, _mm512_stream_load_si512 accepts 'void*', not 'void const*',
+ * this will warn "discards ‘const’ qualifier". */
+ union {
+ XXH_ALIGN(64) const __m512i* cp;
+ XXH_ALIGN(64) void* p;
+ } remote_const_void;
+ remote_const_void.cp = src + i;
+ dest[i] = _mm512_add_epi64(_mm512_stream_load_si512(remote_const_void.p), seed);
+ } }
+}
+
+#endif
+
+#if (XXH_VECTOR == XXH_AVX2) \
+ || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)
+
+#ifndef XXH_TARGET_AVX2
+# define XXH_TARGET_AVX2 /* disable attribute target */
+#endif
+
+XXH_FORCE_INLINE XXH_TARGET_AVX2 void
+XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
+ const void* XXH_RESTRICT input,
+ const void* XXH_RESTRICT secret)
+{
+ XXH_ASSERT((((size_t)acc) & 31) == 0);
+ { XXH_ALIGN(32) __m256i* const xacc = (__m256i *) acc;
+ /* Unaligned. This is mainly for pointer arithmetic, and because
+ * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
+ const __m256i* const xinput = (const __m256i *) input;
+ /* Unaligned. This is mainly for pointer arithmetic, and because
+ * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
+ const __m256i* const xsecret = (const __m256i *) secret;
+
+ size_t i;
+ for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
+ /* data_vec = xinput[i]; */
+ __m256i const data_vec = _mm256_loadu_si256 (xinput+i);
+ /* key_vec = xsecret[i]; */
+ __m256i const key_vec = _mm256_loadu_si256 (xsecret+i);
+ /* data_key = data_vec ^ key_vec; */
+ __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);
+ /* data_key_lo = data_key >> 32; */
+ __m256i const data_key_lo = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+ /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+ __m256i const product = _mm256_mul_epu32 (data_key, data_key_lo);
+ /* xacc[i] += swap(data_vec); */
+ __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
+ __m256i const sum = _mm256_add_epi64(xacc[i], data_swap);
+ /* xacc[i] += product; */
+ xacc[i] = _mm256_add_epi64(product, sum);
+ } }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_AVX2 void
+XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+ XXH_ASSERT((((size_t)acc) & 31) == 0);
+ { XXH_ALIGN(32) __m256i* const xacc = (__m256i*) acc;
+ /* Unaligned. This is mainly for pointer arithmetic, and because
+ * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
+ const __m256i* const xsecret = (const __m256i *) secret;
+ const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1);
+
+ size_t i;
+ for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
+ /* xacc[i] ^= (xacc[i] >> 47) */
+ __m256i const acc_vec = xacc[i];
+ __m256i const shifted = _mm256_srli_epi64 (acc_vec, 47);
+ __m256i const data_vec = _mm256_xor_si256 (acc_vec, shifted);
+ /* xacc[i] ^= xsecret; */
+ __m256i const key_vec = _mm256_loadu_si256 (xsecret+i);
+ __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);
+
+ /* xacc[i] *= XXH_PRIME32_1; */
+ __m256i const data_key_hi = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+ __m256i const prod_lo = _mm256_mul_epu32 (data_key, prime32);
+ __m256i const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32);
+ xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
+ }
+ }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+ XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);
+ XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6);
+ XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);
+ (void)(&XXH_writeLE64);
+ XXH_PREFETCH(customSecret);
+ { __m256i const seed = _mm256_set_epi64x(-(xxh_i64)seed64, (xxh_i64)seed64, -(xxh_i64)seed64, (xxh_i64)seed64);
+
+ XXH_ALIGN(64) const __m256i* const src = (const __m256i*) XXH3_kSecret;
+ XXH_ALIGN(64) __m256i* dest = ( __m256i*) customSecret;
+
+# if defined(__GNUC__) || defined(__clang__)
+ /*
+ * On GCC & Clang, marking 'dest' as modified will cause the compiler:
+ * - do not extract the secret from sse registers in the internal loop
+ * - use less common registers, and avoid pushing these reg into stack
+ * The asm hack causes Clang to assume that XXH3_kSecretPtr aliases with
+ * customSecret, and on aarch64, this prevented LDP from merging two
+ * loads together for free. Putting the loads together before the stores
+ * properly generates LDP.
+ */
+ __asm__("" : "+r" (dest));
+# endif
+
+ /* GCC -O2 need unroll loop manually */
+ dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src+0), seed);
+ dest[1] = _mm256_add_epi64(_mm256_stream_load_si256(src+1), seed);
+ dest[2] = _mm256_add_epi64(_mm256_stream_load_si256(src+2), seed);
+ dest[3] = _mm256_add_epi64(_mm256_stream_load_si256(src+3), seed);
+ dest[4] = _mm256_add_epi64(_mm256_stream_load_si256(src+4), seed);
+ dest[5] = _mm256_add_epi64(_mm256_stream_load_si256(src+5), seed);
+ }
+}
+
+#endif
+
+/* x86dispatch always generates SSE2 */
+#if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
+
+#ifndef XXH_TARGET_SSE2
+# define XXH_TARGET_SSE2 /* disable attribute target */
+#endif
+
+XXH_FORCE_INLINE XXH_TARGET_SSE2 void
+XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
+ const void* XXH_RESTRICT input,
+ const void* XXH_RESTRICT secret)
+{
+ /* SSE2 is just a half-scale version of the AVX2 version. */
+ XXH_ASSERT((((size_t)acc) & 15) == 0);
+ { XXH_ALIGN(16) __m128i* const xacc = (__m128i *) acc;
+ /* Unaligned. This is mainly for pointer arithmetic, and because
+ * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
+ const __m128i* const xinput = (const __m128i *) input;
+ /* Unaligned. This is mainly for pointer arithmetic, and because
+ * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
+ const __m128i* const xsecret = (const __m128i *) secret;
+
+ size_t i;
+ for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
+ /* data_vec = xinput[i]; */
+ __m128i const data_vec = _mm_loadu_si128 (xinput+i);
+ /* key_vec = xsecret[i]; */
+ __m128i const key_vec = _mm_loadu_si128 (xsecret+i);
+ /* data_key = data_vec ^ key_vec; */
+ __m128i const data_key = _mm_xor_si128 (data_vec, key_vec);
+ /* data_key_lo = data_key >> 32; */
+ __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+ /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+ __m128i const product = _mm_mul_epu32 (data_key, data_key_lo);
+ /* xacc[i] += swap(data_vec); */
+ __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
+ __m128i const sum = _mm_add_epi64(xacc[i], data_swap);
+ /* xacc[i] += product; */
+ xacc[i] = _mm_add_epi64(product, sum);
+ } }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_SSE2 void
+XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+ XXH_ASSERT((((size_t)acc) & 15) == 0);
+ { XXH_ALIGN(16) __m128i* const xacc = (__m128i*) acc;
+ /* Unaligned. This is mainly for pointer arithmetic, and because
+ * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
+ const __m128i* const xsecret = (const __m128i *) secret;
+ const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1);
+
+ size_t i;
+ for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
+ /* xacc[i] ^= (xacc[i] >> 47) */
+ __m128i const acc_vec = xacc[i];
+ __m128i const shifted = _mm_srli_epi64 (acc_vec, 47);
+ __m128i const data_vec = _mm_xor_si128 (acc_vec, shifted);
+ /* xacc[i] ^= xsecret[i]; */
+ __m128i const key_vec = _mm_loadu_si128 (xsecret+i);
+ __m128i const data_key = _mm_xor_si128 (data_vec, key_vec);
+
+ /* xacc[i] *= XXH_PRIME32_1; */
+ __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+ __m128i const prod_lo = _mm_mul_epu32 (data_key, prime32);
+ __m128i const prod_hi = _mm_mul_epu32 (data_key_hi, prime32);
+ xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
+ }
+ }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+ XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
+ (void)(&XXH_writeLE64);
+ { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i);
+
+# if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
+ // MSVC 32bit mode does not support _mm_set_epi64x before 2015
+ XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, -(xxh_i64)seed64 };
+ __m128i const seed = _mm_load_si128((__m128i const*)seed64x2);
+# else
+ __m128i const seed = _mm_set_epi64x(-(xxh_i64)seed64, (xxh_i64)seed64);
+# endif
+ int i;
+
+ XXH_ALIGN(64) const float* const src = (float const*) XXH3_kSecret;
+ XXH_ALIGN(XXH_SEC_ALIGN) __m128i* dest = (__m128i*) customSecret;
+# if defined(__GNUC__) || defined(__clang__)
+ /*
+ * On GCC & Clang, marking 'dest' as modified will cause the compiler:
+ * - do not extract the secret from sse registers in the internal loop
+ * - use less common registers, and avoid pushing these reg into stack
+ */
+ __asm__("" : "+r" (dest));
+# endif
+
+ for (i=0; i < nbRounds; ++i) {
+ dest[i] = _mm_add_epi64(_mm_castps_si128(_mm_load_ps(src+i*4)), seed);
+ } }
+}
+
+#endif
+
+#if (XXH_VECTOR == XXH_NEON)
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
+ const void* XXH_RESTRICT input,
+ const void* XXH_RESTRICT secret)
+{
+ XXH_ASSERT((((size_t)acc) & 15) == 0);
+ {
+ XXH_ALIGN(16) uint64x2_t* const xacc = (uint64x2_t *) acc;
+ /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
+ uint8_t const* const xinput = (const uint8_t *) input;
+ uint8_t const* const xsecret = (const uint8_t *) secret;
+
+ size_t i;
+ for (i=0; i < XXH_STRIPE_LEN / sizeof(uint64x2_t); i++) {
+ /* data_vec = xinput[i]; */
+ uint8x16_t data_vec = vld1q_u8(xinput + (i * 16));
+ /* key_vec = xsecret[i]; */
+ uint8x16_t key_vec = vld1q_u8(xsecret + (i * 16));
+ uint64x2_t data_key;
+ uint32x2_t data_key_lo, data_key_hi;
+ /* xacc[i] += swap(data_vec); */
+ uint64x2_t const data64 = vreinterpretq_u64_u8(data_vec);
+ uint64x2_t const swapped = vextq_u64(data64, data64, 1);
+ xacc[i] = vaddq_u64 (xacc[i], swapped);
+ /* data_key = data_vec ^ key_vec; */
+ data_key = vreinterpretq_u64_u8(veorq_u8(data_vec, key_vec));
+ /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);
+ * data_key_hi = (uint32x2_t) (data_key >> 32);
+ * data_key = UNDEFINED; */
+ XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
+ /* xacc[i] += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
+ xacc[i] = vmlal_u32 (xacc[i], data_key_lo, data_key_hi);
+
+ }
+ }
+}
+
+XXH_FORCE_INLINE void
+XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+ XXH_ASSERT((((size_t)acc) & 15) == 0);
+
+ { uint64x2_t* xacc = (uint64x2_t*) acc;
+ uint8_t const* xsecret = (uint8_t const*) secret;
+ uint32x2_t prime = vdup_n_u32 (XXH_PRIME32_1);
+
+ size_t i;
+ for (i=0; i < XXH_STRIPE_LEN/sizeof(uint64x2_t); i++) {
+ /* xacc[i] ^= (xacc[i] >> 47); */
+ uint64x2_t acc_vec = xacc[i];
+ uint64x2_t shifted = vshrq_n_u64 (acc_vec, 47);
+ uint64x2_t data_vec = veorq_u64 (acc_vec, shifted);
+
+ /* xacc[i] ^= xsecret[i]; */
+ uint8x16_t key_vec = vld1q_u8(xsecret + (i * 16));
+ uint64x2_t data_key = veorq_u64(data_vec, vreinterpretq_u64_u8(key_vec));
+
+ /* xacc[i] *= XXH_PRIME32_1 */
+ uint32x2_t data_key_lo, data_key_hi;
+ /* data_key_lo = (uint32x2_t) (xacc[i] & 0xFFFFFFFF);
+ * data_key_hi = (uint32x2_t) (xacc[i] >> 32);
+ * xacc[i] = UNDEFINED; */
+ XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
+ { /*
+ * prod_hi = (data_key >> 32) * XXH_PRIME32_1;
+ *
+ * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will
+ * incorrectly "optimize" this:
+ * tmp = vmul_u32(vmovn_u64(a), vmovn_u64(b));
+ * shifted = vshll_n_u32(tmp, 32);
+ * to this:
+ * tmp = "vmulq_u64"(a, b); // no such thing!
+ * shifted = vshlq_n_u64(tmp, 32);
+ *
+ * However, unlike SSE, Clang lacks a 64-bit multiply routine
+ * for NEON, and it scalarizes two 64-bit multiplies instead.
+ *
+ * vmull_u32 has the same timing as vmul_u32, and it avoids
+ * this bug completely.
+ * See https://bugs.llvm.org/show_bug.cgi?id=39967
+ */
+ uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
+ /* xacc[i] = prod_hi << 32; */
+ xacc[i] = vshlq_n_u64(prod_hi, 32);
+ /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
+ xacc[i] = vmlal_u32(xacc[i], data_key_lo, prime);
+ }
+ } }
+}
+
+#endif
+
+#if (XXH_VECTOR == XXH_VSX)
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc,
+ const void* XXH_RESTRICT input,
+ const void* XXH_RESTRICT secret)
+{
+ xxh_u64x2* const xacc = (xxh_u64x2*) acc; /* presumed aligned */
+ xxh_u64x2 const* const xinput = (xxh_u64x2 const*) input; /* no alignment restriction */
+ xxh_u64x2 const* const xsecret = (xxh_u64x2 const*) secret; /* no alignment restriction */
+ xxh_u64x2 const v32 = { 32, 32 };
+ size_t i;
+ for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
+ /* data_vec = xinput[i]; */
+ xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + i);
+ /* key_vec = xsecret[i]; */
+ xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + i);
+ xxh_u64x2 const data_key = data_vec ^ key_vec;
+ /* shuffled = (data_key << 32) | (data_key >> 32); */
+ xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
+ /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
+ xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
+ xacc[i] += product;
+
+ /* swap high and low halves */
+#ifdef __s390x__
+ xacc[i] += vec_permi(data_vec, data_vec, 2);
+#else
+ xacc[i] += vec_xxpermdi(data_vec, data_vec, 2);
+#endif
+ }
+}
+
+XXH_FORCE_INLINE void
+XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+ XXH_ASSERT((((size_t)acc) & 15) == 0);
+
+ { xxh_u64x2* const xacc = (xxh_u64x2*) acc;
+ const xxh_u64x2* const xsecret = (const xxh_u64x2*) secret;
+ /* constants */
+ xxh_u64x2 const v32 = { 32, 32 };
+ xxh_u64x2 const v47 = { 47, 47 };
+ xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 };
+ size_t i;
+ for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
+ /* xacc[i] ^= (xacc[i] >> 47); */
+ xxh_u64x2 const acc_vec = xacc[i];
+ xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
+
+ /* xacc[i] ^= xsecret[i]; */
+ xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + i);
+ xxh_u64x2 const data_key = data_vec ^ key_vec;
+
+ /* xacc[i] *= XXH_PRIME32_1 */
+ /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF); */
+ xxh_u64x2 const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime);
+ /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32); */
+ xxh_u64x2 const prod_odd = XXH_vec_mulo((xxh_u32x4)data_key, prime);
+ xacc[i] = prod_odd + (prod_even << v32);
+ } }
+}
+
+#endif
+
+/* scalar variants - universal */
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
+ const void* XXH_RESTRICT input,
+ const void* XXH_RESTRICT secret)
+{
+ XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
+ const xxh_u8* const xinput = (const xxh_u8*) input; /* no alignment restriction */
+ const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */
+ size_t i;
+ XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
+ for (i=0; i < XXH_ACC_NB; i++) {
+ xxh_u64 const data_val = XXH_readLE64(xinput + 8*i);
+ xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + i*8);
+ xacc[i ^ 1] += data_val; /* swap adjacent lanes */
+ xacc[i] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
+ }
+}
+
+XXH_FORCE_INLINE void
+XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+ XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
+ const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */
+ size_t i;
+ XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
+ for (i=0; i < XXH_ACC_NB; i++) {
+ xxh_u64 const key64 = XXH_readLE64(xsecret + 8*i);
+ xxh_u64 acc64 = xacc[i];
+ acc64 = XXH_xorshift64(acc64, 47);
+ acc64 ^= key64;
+ acc64 *= XXH_PRIME32_1;
+ xacc[i] = acc64;
+ }
+}
+
+XXH_FORCE_INLINE void
+XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+ /*
+ * We need a separate pointer for the hack below,
+ * which requires a non-const pointer.
+ * Any decent compiler will optimize this out otherwise.
+ */
+ const xxh_u8* kSecretPtr = XXH3_kSecret;
+ XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
+
+#if defined(__clang__) && defined(__aarch64__)
+ /*
+ * UGLY HACK:
+ * Clang generates a bunch of MOV/MOVK pairs for aarch64, and they are
+ * placed sequentially, in order, at the top of the unrolled loop.
+ *
+ * While MOVK is great for generating constants (2 cycles for a 64-bit
+ * constant compared to 4 cycles for LDR), long MOVK chains stall the
+ * integer pipelines:
+ * I L S
+ * MOVK
+ * MOVK
+ * MOVK
+ * MOVK
+ * ADD
+ * SUB STR
+ * STR
+ * By forcing loads from memory (as the asm line causes Clang to assume
+ * that XXH3_kSecretPtr has been changed), the pipelines are used more
+ * efficiently:
+ * I L S
+ * LDR
+ * ADD LDR
+ * SUB STR
+ * STR
+ * XXH3_64bits_withSeed, len == 256, Snapdragon 835
+ * without hack: 2654.4 MB/s
+ * with hack: 3202.9 MB/s
+ */
+ __asm__("" : "+r" (kSecretPtr));
+#endif
+ /*
+ * Note: in debug mode, this overrides the asm optimization
+ * and Clang will emit MOVK chains again.
+ */
+ XXH_ASSERT(kSecretPtr == XXH3_kSecret);
+
+ { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
+ int i;
+ for (i=0; i < nbRounds; i++) {
+ /*
+ * The asm hack causes Clang to assume that kSecretPtr aliases with
+ * customSecret, and on aarch64, this prevented LDP from merging two
+ * loads together for free. Putting the loads together before the stores
+ * properly generates LDP.
+ */
+ xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i) + seed64;
+ xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64;
+ XXH_writeLE64((xxh_u8*)customSecret + 16*i, lo);
+ XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi);
+ } }
+}
+
+
+typedef void (*XXH3_f_accumulate_512)(void* XXH_RESTRICT, const void*, const void*);
+typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
+typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
+
+
+#if (XXH_VECTOR == XXH_AVX512)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_avx512
+#define XXH3_scrambleAcc XXH3_scrambleAcc_avx512
+#define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
+
+#elif (XXH_VECTOR == XXH_AVX2)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_avx2
+#define XXH3_scrambleAcc XXH3_scrambleAcc_avx2
+#define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
+
+#elif (XXH_VECTOR == XXH_SSE2)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_sse2
+#define XXH3_scrambleAcc XXH3_scrambleAcc_sse2
+#define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
+
+#elif (XXH_VECTOR == XXH_NEON)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_neon
+#define XXH3_scrambleAcc XXH3_scrambleAcc_neon
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+#elif (XXH_VECTOR == XXH_VSX)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_vsx
+#define XXH3_scrambleAcc XXH3_scrambleAcc_vsx
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+#else /* scalar */
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
+#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+#endif
+
+
+
+#ifndef XXH_PREFETCH_DIST
+# ifdef __clang__
+# define XXH_PREFETCH_DIST 320
+# else
+# if (XXH_VECTOR == XXH_AVX512)
+# define XXH_PREFETCH_DIST 512
+# else
+# define XXH_PREFETCH_DIST 384
+# endif
+# endif /* __clang__ */
+#endif /* XXH_PREFETCH_DIST */
+
+/*
+ * XXH3_accumulate()
+ * Loops over XXH3_accumulate_512().
+ * Assumption: nbStripes will not overflow the secret size
+ */
+XXH_FORCE_INLINE void
+XXH3_accumulate( xxh_u64* XXH_RESTRICT acc,
+ const xxh_u8* XXH_RESTRICT input,
+ const xxh_u8* XXH_RESTRICT secret,
+ size_t nbStripes,
+ XXH3_f_accumulate_512 f_acc512)
+{
+ size_t n;
+ for (n = 0; n < nbStripes; n++ ) {
+ const xxh_u8* const in = input + n*XXH_STRIPE_LEN;
+ XXH_PREFETCH(in + XXH_PREFETCH_DIST);
+ f_acc512(acc,
+ in,
+ secret + n*XXH_SECRET_CONSUME_RATE);
+ }
+}
+
+XXH_FORCE_INLINE void
+XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
+ const xxh_u8* XXH_RESTRICT input, size_t len,
+ const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+ XXH3_f_accumulate_512 f_acc512,
+ XXH3_f_scrambleAcc f_scramble)
+{
+ size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
+ size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
+ size_t const nb_blocks = (len - 1) / block_len;
+
+ size_t n;
+
+ XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+
+ for (n = 0; n < nb_blocks; n++) {
+ XXH3_accumulate(acc, input + n*block_len, secret, nbStripesPerBlock, f_acc512);
+ f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
+ }
+
+ /* last partial block */
+ XXH_ASSERT(len > XXH_STRIPE_LEN);
+ { size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
+ XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
+ XXH3_accumulate(acc, input + nb_blocks*block_len, secret, nbStripes, f_acc512);
+
+ /* last stripe */
+ { const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
+#define XXH_SECRET_LASTACC_START 7 /* not aligned on 8, last secret is different from acc & scrambler */
+ f_acc512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
+ } }
+}
+
+XXH_FORCE_INLINE xxh_u64
+XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret)
+{
+ return XXH3_mul128_fold64(
+ acc[0] ^ XXH_readLE64(secret),
+ acc[1] ^ XXH_readLE64(secret+8) );
+}
+
+static XXH64_hash_t
+XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start)
+{
+ xxh_u64 result64 = start;
+ size_t i = 0;
+
+ for (i = 0; i < 4; i++) {
+ result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i);
+#if defined(__clang__) /* Clang */ \
+ && (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \
+ && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
+ && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */
+ /*
+ * UGLY HACK:
+ * Prevent autovectorization on Clang ARMv7-a. Exact same problem as
+ * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b.
+ * XXH3_64bits, len == 256, Snapdragon 835:
+ * without hack: 2063.7 MB/s
+ * with hack: 2560.7 MB/s
+ */
+ __asm__("" : "+r" (result64));
+#endif
+ }
+
+ return XXH3_avalanche(result64);
+}
+
+#define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
+ XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
+ const void* XXH_RESTRICT secret, size_t secretSize,
+ XXH3_f_accumulate_512 f_acc512,
+ XXH3_f_scrambleAcc f_scramble)
+{
+ XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
+
+ XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc512, f_scramble);
+
+ /* converge into final hash */
+ XXH_STATIC_ASSERT(sizeof(acc) == 64);
+ /* do not align on 8, so that the secret is different from the accumulator */
+#define XXH_SECRET_MERGEACCS_START 11
+ XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+ return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1);
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
+ XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
+{
+ (void)seed64;
+ return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ * Since the function is not inlined, the compiler may not be able to understand that,
+ * in some scenarios, its `secret` argument is actually a compile time constant.
+ * This variant enforces that the compiler can detect that,
+ * and uses this opportunity to streamline the generated code for better performance.
+ */
+XXH_NO_INLINE XXH64_hash_t
+XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
+ XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
+{
+ (void)seed64; (void)secret; (void)secretLen;
+ return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+/*
+ * XXH3_hashLong_64b_withSeed():
+ * Generate a custom key based on alteration of default XXH3_kSecret with the seed,
+ * and then use this key for long mode hashing.
+ *
+ * This operation is decently fast but nonetheless costs a little bit of time.
+ * Try to avoid it whenever possible (typically when seed==0).
+ *
+ * It's important for performance that XXH3_hashLong is not inlined. Not sure
+ * why (uop cache maybe?), but the difference is large and easily measurable.
+ */
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
+ XXH64_hash_t seed,
+ XXH3_f_accumulate_512 f_acc512,
+ XXH3_f_scrambleAcc f_scramble,
+ XXH3_f_initCustomSecret f_initSec)
+{
+ if (seed == 0)
+ return XXH3_hashLong_64b_internal(input, len,
+ XXH3_kSecret, sizeof(XXH3_kSecret),
+ f_acc512, f_scramble);
+ { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
+ f_initSec(secret, seed);
+ return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
+ f_acc512, f_scramble);
+ }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSeed(const void* input, size_t len,
+ XXH64_hash_t seed, const xxh_u8* secret, size_t secretLen)
+{
+ (void)secret; (void)secretLen;
+ return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
+ XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
+}
+
+
+typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t,
+ XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t);
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
+ XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
+ XXH3_hashLong64_f f_hashLong)
+{
+ XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
+ /*
+ * If an action is to be taken if `secretLen` condition is not respected,
+ * it should be done here.
+ * For now, it's a contract pre-condition.
+ * Adding a check and a branch here would cost performance at every hash.
+ * Also, note that function signature doesn't offer room to return an error.
+ */
+ if (len <= 16)
+ return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
+ if (len <= 128)
+ return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+ if (len <= XXH3_MIDSIZE_MAX)
+ return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+ return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen);
+}
+
+
+/* === Public entry point === */
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t len)
+{
+ return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH64_hash_t
+XXH3_64bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
+{
+ return XXH3_64bits_internal(input, len, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH64_hash_t
+XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
+{
+ return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
+}
+
+
+/* === XXH3 streaming === */
+
+/*
+ * Malloc's a pointer that is always aligned to align.
+ *
+ * This must be freed with `XXH_alignedFree()`.
+ *
+ * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte
+ * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2
+ * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.
+ *
+ * This underalignment previously caused a rather obvious crash which went
+ * completely unnoticed due to XXH3_createState() not actually being tested.
+ * Credit to RedSpah for noticing this bug.
+ *
+ * The alignment is done manually: Functions like posix_memalign or _mm_malloc
+ * are avoided: To maintain portability, we would have to write a fallback
+ * like this anyways, and besides, testing for the existence of library
+ * functions without relying on external build tools is impossible.
+ *
+ * The method is simple: Overallocate, manually align, and store the offset
+ * to the original behind the returned pointer.
+ *
+ * Align must be a power of 2 and 8 <= align <= 128.
+ */
+static void* XXH_alignedMalloc(size_t s, size_t align)
+{
+ XXH_ASSERT(align <= 128 && align >= 8); /* range check */
+ XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */
+ XXH_ASSERT(s != 0 && s < (s + align)); /* empty/overflow */
+ { /* Overallocate to make room for manual realignment and an offset byte */
+ xxh_u8* base = (xxh_u8*)XXH_malloc(s + align);
+ if (base != NULL) {
+ /*
+ * Get the offset needed to align this pointer.
+ *
+ * Even if the returned pointer is aligned, there will always be
+ * at least one byte to store the offset to the original pointer.
+ */
+ size_t offset = align - ((size_t)base & (align - 1)); /* base % align */
+ /* Add the offset for the now-aligned pointer */
+ xxh_u8* ptr = base + offset;
+
+ XXH_ASSERT((size_t)ptr % align == 0);
+
+ /* Store the offset immediately before the returned pointer. */
+ ptr[-1] = (xxh_u8)offset;
+ return ptr;
+ }
+ return NULL;
+ }
+}
+/*
+ * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass
+ * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout.
+ */
+static void XXH_alignedFree(void* p)
+{
+ if (p != NULL) {
+ xxh_u8* ptr = (xxh_u8*)p;
+ /* Get the offset byte we added in XXH_malloc. */
+ xxh_u8 offset = ptr[-1];
+ /* Free the original malloc'd pointer */
+ xxh_u8* base = ptr - offset;
+ XXH_free(base);
+ }
+}
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
+{
+ XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
+ if (state==NULL) return NULL;
+ XXH3_INITSTATE(state);
+ return state;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
+{
+ XXH_alignedFree(statePtr);
+ return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API void
+XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)
+{
+ memcpy(dst_state, src_state, sizeof(*dst_state));
+}
+
+static void
+XXH3_64bits_reset_internal(XXH3_state_t* statePtr,
+ XXH64_hash_t seed,
+ const void* secret, size_t secretSize)
+{
+ size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
+ size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
+ XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);
+ XXH_ASSERT(statePtr != NULL);
+ /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */
+ memset((char*)statePtr + initStart, 0, initLength);
+ statePtr->acc[0] = XXH_PRIME32_3;
+ statePtr->acc[1] = XXH_PRIME64_1;
+ statePtr->acc[2] = XXH_PRIME64_2;
+ statePtr->acc[3] = XXH_PRIME64_3;
+ statePtr->acc[4] = XXH_PRIME64_4;
+ statePtr->acc[5] = XXH_PRIME32_2;
+ statePtr->acc[6] = XXH_PRIME64_5;
+ statePtr->acc[7] = XXH_PRIME32_1;
+ statePtr->seed = seed;
+ statePtr->extSecret = (const unsigned char*)secret;
+ XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+ statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
+ statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset(XXH3_state_t* statePtr)
+{
+ if (statePtr == NULL) return XXH_ERROR;
+ XXH3_64bits_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
+ return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
+{
+ if (statePtr == NULL) return XXH_ERROR;
+ XXH3_64bits_reset_internal(statePtr, 0, secret, secretSize);
+ if (secret == NULL) return XXH_ERROR;
+ if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
+ return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
+{
+ if (statePtr == NULL) return XXH_ERROR;
+ if (seed==0) return XXH3_64bits_reset(statePtr);
+ if (seed != statePtr->seed) XXH3_initCustomSecret(statePtr->customSecret, seed);
+ XXH3_64bits_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
+ return XXH_OK;
+}
+
+/* Note : when XXH3_consumeStripes() is invoked,
+ * there must be a guarantee that at least one more byte must be consumed from input
+ * so that the function can blindly consume all stripes using the "normal" secret segment */
+XXH_FORCE_INLINE void
+XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
+ size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
+ const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
+ const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
+ XXH3_f_accumulate_512 f_acc512,
+ XXH3_f_scrambleAcc f_scramble)
+{
+ XXH_ASSERT(nbStripes <= nbStripesPerBlock); /* can handle max 1 scramble per invocation */
+ XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
+ if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) {
+ /* need a scrambling operation */
+ size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;
+ size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;
+ XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock, f_acc512);
+ f_scramble(acc, secret + secretLimit);
+ XXH3_accumulate(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock, f_acc512);
+ *nbStripesSoFarPtr = nbStripesAfterBlock;
+ } else {
+ XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, f_acc512);
+ *nbStripesSoFarPtr += nbStripes;
+ }
+}
+
+/*
+ * Both XXH3_64bits_update and XXH3_128bits_update use this routine.
+ */
+XXH_FORCE_INLINE XXH_errorcode
+XXH3_update(XXH3_state_t* state,
+ const xxh_u8* input, size_t len,
+ XXH3_f_accumulate_512 f_acc512,
+ XXH3_f_scrambleAcc f_scramble)
+{
+ if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+ return XXH_OK;
+#else
+ return XXH_ERROR;
+#endif
+
+ { const xxh_u8* const bEnd = input + len;
+ const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+
+ state->totalLen += len;
+
+ if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) { /* fill in tmp buffer */
+ XXH_memcpy(state->buffer + state->bufferedSize, input, len);
+ state->bufferedSize += (XXH32_hash_t)len;
+ return XXH_OK;
+ }
+ /* total input is now > XXH3_INTERNALBUFFER_SIZE */
+
+ #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
+ XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */
+
+ /*
+ * Internal buffer is partially filled (always, except at beginning)
+ * Complete it, then consume it.
+ */
+ if (state->bufferedSize) {
+ size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
+ XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
+ input += loadSize;
+ XXH3_consumeStripes(state->acc,
+ &state->nbStripesSoFar, state->nbStripesPerBlock,
+ state->buffer, XXH3_INTERNALBUFFER_STRIPES,
+ secret, state->secretLimit,
+ f_acc512, f_scramble);
+ state->bufferedSize = 0;
+ }
+ XXH_ASSERT(input < bEnd);
+
+ /* Consume input by a multiple of internal buffer size */
+ if (input+XXH3_INTERNALBUFFER_SIZE < bEnd) {
+ const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
+ do {
+ XXH3_consumeStripes(state->acc,
+ &state->nbStripesSoFar, state->nbStripesPerBlock,
+ input, XXH3_INTERNALBUFFER_STRIPES,
+ secret, state->secretLimit,
+ f_acc512, f_scramble);
+ input += XXH3_INTERNALBUFFER_SIZE;
+ } while (input<limit);
+ /* for last partial stripe */
+ memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
+ }
+ XXH_ASSERT(input < bEnd);
+
+ /* Some remaining input (always) : buffer it */
+ XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
+ state->bufferedSize = (XXH32_hash_t)(bEnd-input);
+ }
+
+ return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)
+{
+ return XXH3_update(state, (const xxh_u8*)input, len,
+ XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+
+XXH_FORCE_INLINE void
+XXH3_digest_long (XXH64_hash_t* acc,
+ const XXH3_state_t* state,
+ const unsigned char* secret)
+{
+ /*
+ * Digest on a local copy. This way, the state remains unaltered, and it can
+ * continue ingesting more input afterwards.
+ */
+ memcpy(acc, state->acc, sizeof(state->acc));
+ if (state->bufferedSize >= XXH_STRIPE_LEN) {
+ size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
+ size_t nbStripesSoFar = state->nbStripesSoFar;
+ XXH3_consumeStripes(acc,
+ &nbStripesSoFar, state->nbStripesPerBlock,
+ state->buffer, nbStripes,
+ secret, state->secretLimit,
+ XXH3_accumulate_512, XXH3_scrambleAcc);
+ /* last stripe */
+ XXH3_accumulate_512(acc,
+ state->buffer + state->bufferedSize - XXH_STRIPE_LEN,
+ secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+ } else { /* bufferedSize < XXH_STRIPE_LEN */
+ xxh_u8 lastStripe[XXH_STRIPE_LEN];
+ size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
+ XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */
+ memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
+ memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
+ XXH3_accumulate_512(acc,
+ lastStripe,
+ secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+ }
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
+{
+ const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+ if (state->totalLen > XXH3_MIDSIZE_MAX) {
+ XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
+ XXH3_digest_long(acc, state, secret);
+ return XXH3_mergeAccs(acc,
+ secret + XXH_SECRET_MERGEACCS_START,
+ (xxh_u64)state->totalLen * XXH_PRIME64_1);
+ }
+ /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
+ if (state->seed)
+ return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
+ return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
+ secret, state->secretLimit + XXH_STRIPE_LEN);
+}
+
+
+#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API void
+XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSeedSize)
+{
+ XXH_ASSERT(secretBuffer != NULL);
+ if (customSeedSize == 0) {
+ memcpy(secretBuffer, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
+ return;
+ }
+ XXH_ASSERT(customSeed != NULL);
+
+ { size_t const segmentSize = sizeof(XXH128_hash_t);
+ size_t const nbSegments = XXH_SECRET_DEFAULT_SIZE / segmentSize;
+ XXH128_canonical_t scrambler;
+ XXH64_hash_t seeds[12];
+ size_t segnb;
+ XXH_ASSERT(nbSegments == 12);
+ XXH_ASSERT(segmentSize * nbSegments == XXH_SECRET_DEFAULT_SIZE); /* exact multiple */
+ XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
+
+ /*
+ * Copy customSeed to seeds[], truncating or repeating as necessary.
+ */
+ { size_t toFill = XXH_MIN(customSeedSize, sizeof(seeds));
+ size_t filled = toFill;
+ memcpy(seeds, customSeed, toFill);
+ while (filled < sizeof(seeds)) {
+ toFill = XXH_MIN(filled, sizeof(seeds) - filled);
+ memcpy((char*)seeds + filled, seeds, toFill);
+ filled += toFill;
+ } }
+
+ /* generate secret */
+ memcpy(secretBuffer, &scrambler, sizeof(scrambler));
+ for (segnb=1; segnb < nbSegments; segnb++) {
+ size_t const segmentStart = segnb * segmentSize;
+ XXH128_canonical_t segment;
+ XXH128_canonicalFromHash(&segment,
+ XXH128(&scrambler, sizeof(scrambler), XXH_readLE64(seeds + segnb) + segnb) );
+ memcpy((char*)secretBuffer + segmentStart, &segment, sizeof(segment));
+ } }
+}
+
+
+/* ==========================================
+ * XXH3 128 bits (a.k.a XXH128)
+ * ==========================================
+ * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
+ * even without counting the significantly larger output size.
+ *
+ * For example, extra steps are taken to avoid the seed-dependent collisions
+ * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
+ *
+ * This strength naturally comes at the cost of some speed, especially on short
+ * lengths. Note that longer hashes are about as fast as the 64-bit version
+ * due to it using only a slight modification of the 64-bit loop.
+ *
+ * XXH128 is also more oriented towards 64-bit machines. It is still extremely
+ * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
+ */
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+ /* A doubled version of 1to3_64b with different constants. */
+ XXH_ASSERT(input != NULL);
+ XXH_ASSERT(1 <= len && len <= 3);
+ XXH_ASSERT(secret != NULL);
+ /*
+ * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
+ * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
+ * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
+ */
+ { xxh_u8 const c1 = input[0];
+ xxh_u8 const c2 = input[len >> 1];
+ xxh_u8 const c3 = input[len - 1];
+ xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24)
+ | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
+ xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);
+ xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
+ xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;
+ xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl;
+ xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph;
+ XXH128_hash_t h128;
+ h128.low64 = XXH64_avalanche(keyed_lo);
+ h128.high64 = XXH64_avalanche(keyed_hi);
+ return h128;
+ }
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+ XXH_ASSERT(input != NULL);
+ XXH_ASSERT(secret != NULL);
+ XXH_ASSERT(4 <= len && len <= 8);
+ seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
+ { xxh_u32 const input_lo = XXH_readLE32(input);
+ xxh_u32 const input_hi = XXH_readLE32(input + len - 4);
+ xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32);
+ xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;
+ xxh_u64 const keyed = input_64 ^ bitflip;
+
+ /* Shift len to the left to ensure it is even, this avoids even multiplies. */
+ XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));
+
+ m128.high64 += (m128.low64 << 1);
+ m128.low64 ^= (m128.high64 >> 3);
+
+ m128.low64 = XXH_xorshift64(m128.low64, 35);
+ m128.low64 *= 0x9FB21C651E98DF25ULL;
+ m128.low64 = XXH_xorshift64(m128.low64, 28);
+ m128.high64 = XXH3_avalanche(m128.high64);
+ return m128;
+ }
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+ XXH_ASSERT(input != NULL);
+ XXH_ASSERT(secret != NULL);
+ XXH_ASSERT(9 <= len && len <= 16);
+ { xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed;
+ xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed;
+ xxh_u64 const input_lo = XXH_readLE64(input);
+ xxh_u64 input_hi = XXH_readLE64(input + len - 8);
+ XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
+ /*
+ * Put len in the middle of m128 to ensure that the length gets mixed to
+ * both the low and high bits in the 128x64 multiply below.
+ */
+ m128.low64 += (xxh_u64)(len - 1) << 54;
+ input_hi ^= bitfliph;
+ /*
+ * Add the high 32 bits of input_hi to the high 32 bits of m128, then
+ * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to
+ * the high 64 bits of m128.
+ *
+ * The best approach to this operation is different on 32-bit and 64-bit.
+ */
+ if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */
+ /*
+ * 32-bit optimized version, which is more readable.
+ *
+ * On 32-bit, it removes an ADC and delays a dependency between the two
+ * halves of m128.high64, but it generates an extra mask on 64-bit.
+ */
+ m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2);
+ } else {
+ /*
+ * 64-bit optimized (albeit more confusing) version.
+ *
+ * Uses some properties of addition and multiplication to remove the mask:
+ *
+ * Let:
+ * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
+ * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
+ * c = XXH_PRIME32_2
+ *
+ * a + (b * c)
+ * Inverse Property: x + y - x == y
+ * a + (b * (1 + c - 1))
+ * Distributive Property: x * (y + z) == (x * y) + (x * z)
+ * a + (b * 1) + (b * (c - 1))
+ * Identity Property: x * 1 == x
+ * a + b + (b * (c - 1))
+ *
+ * Substitute a, b, and c:
+ * input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
+ *
+ * Since input_hi.hi + input_hi.lo == input_hi, we get this:
+ * input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
+ */
+ m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);
+ }
+ /* m128 ^= XXH_swap64(m128 >> 64); */
+ m128.low64 ^= XXH_swap64(m128.high64);
+
+ { /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
+ XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2);
+ h128.high64 += m128.high64 * XXH_PRIME64_2;
+
+ h128.low64 = XXH3_avalanche(h128.low64);
+ h128.high64 = XXH3_avalanche(h128.high64);
+ return h128;
+ } }
+}
+
+/*
+ * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
+ */
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+ XXH_ASSERT(len <= 16);
+ { if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed);
+ if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed);
+ if (len) return XXH3_len_1to3_128b(input, len, secret, seed);
+ { XXH128_hash_t h128;
+ xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72);
+ xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88);
+ h128.low64 = XXH64_avalanche(seed ^ bitflipl);
+ h128.high64 = XXH64_avalanche( seed ^ bitfliph);
+ return h128;
+ } }
+}
+
+/*
+ * A bit slower than XXH3_mix16B, but handles multiply by zero better.
+ */
+XXH_FORCE_INLINE XXH128_hash_t
+XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
+ const xxh_u8* secret, XXH64_hash_t seed)
+{
+ acc.low64 += XXH3_mix16B (input_1, secret+0, seed);
+ acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);
+ acc.high64 += XXH3_mix16B (input_2, secret+16, seed);
+ acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);
+ return acc;
+}
+
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
+ const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+ XXH64_hash_t seed)
+{
+ XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+ XXH_ASSERT(16 < len && len <= 128);
+
+ { XXH128_hash_t acc;
+ acc.low64 = len * XXH_PRIME64_1;
+ acc.high64 = 0;
+ if (len > 32) {
+ if (len > 64) {
+ if (len > 96) {
+ acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);
+ }
+ acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);
+ }
+ acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
+ }
+ acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
+ { XXH128_hash_t h128;
+ h128.low64 = acc.low64 + acc.high64;
+ h128.high64 = (acc.low64 * XXH_PRIME64_1)
+ + (acc.high64 * XXH_PRIME64_4)
+ + ((len - seed) * XXH_PRIME64_2);
+ h128.low64 = XXH3_avalanche(h128.low64);
+ h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
+ return h128;
+ }
+ }
+}
+
+XXH_NO_INLINE XXH128_hash_t
+XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
+ const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+ XXH64_hash_t seed)
+{
+ XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+ XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
+
+ { XXH128_hash_t acc;
+ int const nbRounds = (int)len / 32;
+ int i;
+ acc.low64 = len * XXH_PRIME64_1;
+ acc.high64 = 0;
+ for (i=0; i<4; i++) {
+ acc = XXH128_mix32B(acc,
+ input + (32 * i),
+ input + (32 * i) + 16,
+ secret + (32 * i),
+ seed);
+ }
+ acc.low64 = XXH3_avalanche(acc.low64);
+ acc.high64 = XXH3_avalanche(acc.high64);
+ XXH_ASSERT(nbRounds >= 4);
+ for (i=4 ; i < nbRounds; i++) {
+ acc = XXH128_mix32B(acc,
+ input + (32 * i),
+ input + (32 * i) + 16,
+ secret + XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)),
+ seed);
+ }
+ /* last bytes */
+ acc = XXH128_mix32B(acc,
+ input + len - 16,
+ input + len - 32,
+ secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
+ 0ULL - seed);
+
+ { XXH128_hash_t h128;
+ h128.low64 = acc.low64 + acc.high64;
+ h128.high64 = (acc.low64 * XXH_PRIME64_1)
+ + (acc.high64 * XXH_PRIME64_4)
+ + ((len - seed) * XXH_PRIME64_2);
+ h128.low64 = XXH3_avalanche(h128.low64);
+ h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
+ return h128;
+ }
+ }
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
+ const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+ XXH3_f_accumulate_512 f_acc512,
+ XXH3_f_scrambleAcc f_scramble)
+{
+ XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
+
+ XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc512, f_scramble);
+
+ /* converge into final hash */
+ XXH_STATIC_ASSERT(sizeof(acc) == 64);
+ XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+ { XXH128_hash_t h128;
+ h128.low64 = XXH3_mergeAccs(acc,
+ secret + XXH_SECRET_MERGEACCS_START,
+ (xxh_u64)len * XXH_PRIME64_1);
+ h128.high64 = XXH3_mergeAccs(acc,
+ secret + secretSize
+ - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
+ ~((xxh_u64)len * XXH_PRIME64_2));
+ return h128;
+ }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH128_hash_t
+XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
+ XXH64_hash_t seed64,
+ const void* XXH_RESTRICT secret, size_t secretLen)
+{
+ (void)seed64; (void)secret; (void)secretLen;
+ return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
+ XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH128_hash_t
+XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
+ XXH64_hash_t seed64,
+ const void* XXH_RESTRICT secret, size_t secretLen)
+{
+ (void)seed64;
+ return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
+ XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
+ XXH64_hash_t seed64,
+ XXH3_f_accumulate_512 f_acc512,
+ XXH3_f_scrambleAcc f_scramble,
+ XXH3_f_initCustomSecret f_initSec)
+{
+ if (seed64 == 0)
+ return XXH3_hashLong_128b_internal(input, len,
+ XXH3_kSecret, sizeof(XXH3_kSecret),
+ f_acc512, f_scramble);
+ { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
+ f_initSec(secret, seed64);
+ return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
+ f_acc512, f_scramble);
+ }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH128_hash_t
+XXH3_hashLong_128b_withSeed(const void* input, size_t len,
+ XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen)
+{
+ (void)secret; (void)secretLen;
+ return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
+ XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
+}
+
+typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
+ XXH64_hash_t, const void* XXH_RESTRICT, size_t);
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_128bits_internal(const void* input, size_t len,
+ XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
+ XXH3_hashLong128_f f_hl128)
+{
+ XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
+ /*
+ * If an action is to be taken if `secret` conditions are not respected,
+ * it should be done here.
+ * For now, it's a contract pre-condition.
+ * Adding a check and a branch here would cost performance at every hash.
+ */
+ if (len <= 16)
+ return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
+ if (len <= 128)
+ return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+ if (len <= XXH3_MIDSIZE_MAX)
+ return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+ return f_hl128(input, len, seed64, secret, secretLen);
+}
+
+
+/* === Public XXH128 API === */
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len)
+{
+ return XXH3_128bits_internal(input, len, 0,
+ XXH3_kSecret, sizeof(XXH3_kSecret),
+ XXH3_hashLong_128b_default);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
+{
+ return XXH3_128bits_internal(input, len, 0,
+ (const xxh_u8*)secret, secretSize,
+ XXH3_hashLong_128b_withSecret);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
+{
+ return XXH3_128bits_internal(input, len, seed,
+ XXH3_kSecret, sizeof(XXH3_kSecret),
+ XXH3_hashLong_128b_withSeed);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH128(const void* input, size_t len, XXH64_hash_t seed)
+{
+ return XXH3_128bits_withSeed(input, len, seed);
+}
+
+
+/* === XXH3 128-bit streaming === */
+
+/*
+ * All the functions are actually the same as for 64-bit streaming variant.
+ * The only difference is the finalizatiom routine.
+ */
+
+static void
+XXH3_128bits_reset_internal(XXH3_state_t* statePtr,
+ XXH64_hash_t seed,
+ const void* secret, size_t secretSize)
+{
+ XXH3_64bits_reset_internal(statePtr, seed, secret, secretSize);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset(XXH3_state_t* statePtr)
+{
+ if (statePtr == NULL) return XXH_ERROR;
+ XXH3_128bits_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
+ return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
+{
+ if (statePtr == NULL) return XXH_ERROR;
+ XXH3_128bits_reset_internal(statePtr, 0, secret, secretSize);
+ if (secret == NULL) return XXH_ERROR;
+ if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
+ return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
+{
+ if (statePtr == NULL) return XXH_ERROR;
+ if (seed==0) return XXH3_128bits_reset(statePtr);
+ if (seed != statePtr->seed) XXH3_initCustomSecret(statePtr->customSecret, seed);
+ XXH3_128bits_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
+ return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
+{
+ return XXH3_update(state, (const xxh_u8*)input, len,
+ XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
+{
+ const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+ if (state->totalLen > XXH3_MIDSIZE_MAX) {
+ XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
+ XXH3_digest_long(acc, state, secret);
+ XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+ { XXH128_hash_t h128;
+ h128.low64 = XXH3_mergeAccs(acc,
+ secret + XXH_SECRET_MERGEACCS_START,
+ (xxh_u64)state->totalLen * XXH_PRIME64_1);
+ h128.high64 = XXH3_mergeAccs(acc,
+ secret + state->secretLimit + XXH_STRIPE_LEN
+ - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
+ ~((xxh_u64)state->totalLen * XXH_PRIME64_2));
+ return h128;
+ }
+ }
+ /* len <= XXH3_MIDSIZE_MAX : short code */
+ if (state->seed)
+ return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
+ return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
+ secret, state->secretLimit + XXH_STRIPE_LEN);
+}
+
+/* 128-bit utility functions */
+
+#include <string.h> /* memcmp, memcpy */
+
+/* return : 1 is equal, 0 if different */
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
+{
+ /* note : XXH128_hash_t is compact, it has no padding byte */
+ return !(memcmp(&h1, &h2, sizeof(h1)));
+}
+
+/* This prototype is compatible with stdlib's qsort().
+ * return : >0 if *h128_1 > *h128_2
+ * <0 if *h128_1 < *h128_2
+ * =0 if *h128_1 == *h128_2 */
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
+{
+ XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
+ XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
+ int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
+ /* note : bets that, in most cases, hash values are different */
+ if (hcmp) return hcmp;
+ return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
+}
+
+
+/*====== Canonical representation ======*/
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API void
+XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
+{
+ XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
+ if (XXH_CPU_LITTLE_ENDIAN) {
+ hash.high64 = XXH_swap64(hash.high64);
+ hash.low64 = XXH_swap64(hash.low64);
+ }
+ memcpy(dst, &hash.high64, sizeof(hash.high64));
+ memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH128_hashFromCanonical(const XXH128_canonical_t* src)
+{
+ XXH128_hash_t h;
+ h.high64 = XXH_readBE64(src);
+ h.low64 = XXH_readBE64(src->digest + 8);
+ return h;
+}
+
+/* Pop our optimization override from above */
+#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
+ && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+ && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
+# pragma GCC pop_options
+#endif
+
+#endif /* XXH_NO_LONG_LONG */
+
+/*!
+ * @}
+ */
+#endif /* XXH_IMPLEMENTATION */
+
+
+#if defined (__cplusplus)
+}
+#endif