From ed47c1557915bb2472f6959e723cd76155312a98 Mon Sep 17 00:00:00 2001 From: Chris Xiong Date: Mon, 6 Apr 2020 00:50:58 +0800 Subject: Add deduper (unfinished tool for finding image duplicates). --- deduper/libpuzzle/src/CMakeLists.txt | 21 + deduper/libpuzzle/src/Makefile.am | 72 +++ deduper/libpuzzle/src/compress.c | 125 ++++ deduper/libpuzzle/src/cvec.c | 202 +++++++ deduper/libpuzzle/src/dvec.c | 663 +++++++++++++++++++++ deduper/libpuzzle/src/globals.h | 26 + deduper/libpuzzle/src/pics/Makefile.am | 8 + deduper/libpuzzle/src/pics/duck.gif | Bin 0 -> 7196 bytes deduper/libpuzzle/src/pics/luxmarket_tshirt01.jpg | Bin 0 -> 41128 bytes .../src/pics/luxmarket_tshirt01_black.jpg | Bin 0 -> 19800 bytes .../libpuzzle/src/pics/luxmarket_tshirt01_sal.jpg | Bin 0 -> 24646 bytes .../src/pics/luxmarket_tshirt01_sheum.jpg | Bin 0 -> 16128 bytes deduper/libpuzzle/src/pics/pic-a-0.jpg | Bin 0 -> 13946 bytes deduper/libpuzzle/src/pics/pic-a-1.jpg | Bin 0 -> 27407 bytes deduper/libpuzzle/src/puzzle-diff.c | 130 ++++ deduper/libpuzzle/src/puzzle.c | 22 + deduper/libpuzzle/src/puzzle.h | 122 ++++ deduper/libpuzzle/src/puzzle_common.h | 18 + deduper/libpuzzle/src/puzzle_p.h | 67 +++ deduper/libpuzzle/src/regress_1.c | 32 + deduper/libpuzzle/src/regress_2.c | 72 +++ deduper/libpuzzle/src/regress_3.c | 35 ++ deduper/libpuzzle/src/tunables.c | 84 +++ deduper/libpuzzle/src/vector_ops.c | 95 +++ 24 files changed, 1794 insertions(+) create mode 100644 deduper/libpuzzle/src/CMakeLists.txt create mode 100644 deduper/libpuzzle/src/Makefile.am create mode 100644 deduper/libpuzzle/src/compress.c create mode 100644 deduper/libpuzzle/src/cvec.c create mode 100644 deduper/libpuzzle/src/dvec.c create mode 100644 deduper/libpuzzle/src/globals.h create mode 100644 deduper/libpuzzle/src/pics/Makefile.am create mode 100644 deduper/libpuzzle/src/pics/duck.gif create mode 100644 deduper/libpuzzle/src/pics/luxmarket_tshirt01.jpg create mode 100644 deduper/libpuzzle/src/pics/luxmarket_tshirt01_black.jpg create mode 100644 deduper/libpuzzle/src/pics/luxmarket_tshirt01_sal.jpg create mode 100644 deduper/libpuzzle/src/pics/luxmarket_tshirt01_sheum.jpg create mode 100644 deduper/libpuzzle/src/pics/pic-a-0.jpg create mode 100644 deduper/libpuzzle/src/pics/pic-a-1.jpg create mode 100644 deduper/libpuzzle/src/puzzle-diff.c create mode 100644 deduper/libpuzzle/src/puzzle.c create mode 100644 deduper/libpuzzle/src/puzzle.h create mode 100644 deduper/libpuzzle/src/puzzle_common.h create mode 100644 deduper/libpuzzle/src/puzzle_p.h create mode 100644 deduper/libpuzzle/src/regress_1.c create mode 100644 deduper/libpuzzle/src/regress_2.c create mode 100644 deduper/libpuzzle/src/regress_3.c create mode 100644 deduper/libpuzzle/src/tunables.c create mode 100644 deduper/libpuzzle/src/vector_ops.c (limited to 'deduper/libpuzzle/src') diff --git a/deduper/libpuzzle/src/CMakeLists.txt b/deduper/libpuzzle/src/CMakeLists.txt new file mode 100644 index 0000000..634ef38 --- /dev/null +++ b/deduper/libpuzzle/src/CMakeLists.txt @@ -0,0 +1,21 @@ +project(puzzle C) + +include(FindPkgConfig) +pkg_search_module(gdlib REQUIRED gdlib) + +add_library(puzzle STATIC + globals.h + puzzle_common.h + puzzle_p.h + puzzle.h + compress.c + cvec.c + dvec.c + puzzle.c + tunables.c + vector_ops.c +) +target_include_directories(puzzle + PRIVATE + ${gdlib_INCLUDE_DIRS} +) diff --git a/deduper/libpuzzle/src/Makefile.am b/deduper/libpuzzle/src/Makefile.am new file mode 100644 index 0000000..3016925 --- /dev/null +++ b/deduper/libpuzzle/src/Makefile.am @@ -0,0 +1,72 @@ +lib_LTLIBRARIES = \ + libpuzzle.la + +libpuzzle_la_LDFLAGS = -version-info 1:0 + +libpuzzle_la_SOURCES = \ + puzzle.c \ + tunables.c \ + dvec.c \ + cvec.c \ + compress.c \ + vector_ops.c \ + puzzle_common.h \ + puzzle_p.h \ + globals.h \ + puzzle.h + +include_HEADERS = \ + puzzle.h + +noinst_HEADERS = \ + puzzle_common.h \ + puzzle_p.h \ + globals.h + +bin_PROGRAMS = \ + puzzle-diff + +puzzle_diff_SOURCES = \ + puzzle-diff.c \ + puzzle_common.h \ + puzzle.h + +puzzle_diff_LDADD = \ + libpuzzle.la + +TESTS = \ + regress_1 \ + regress_2 \ + regress_3 + +check_PROGRAMS = \ + regress_1 \ + regress_2 \ + regress_3 + +regress_1_SOURCES = \ + regress_1.c \ + puzzle_common.h \ + puzzle.h + +regress_2_SOURCES = \ + regress_2.c \ + puzzle_common.h \ + puzzle.h + +regress_3_SOURCES = \ + regress_3.c \ + puzzle_common.h \ + puzzle.h + +regress_1_LDADD = \ + libpuzzle.la + +regress_2_LDADD = \ + libpuzzle.la + +regress_3_LDADD = \ + libpuzzle.la + +SUBDIRS = \ + pics diff --git a/deduper/libpuzzle/src/compress.c b/deduper/libpuzzle/src/compress.c new file mode 100644 index 0000000..e71da95 --- /dev/null +++ b/deduper/libpuzzle/src/compress.c @@ -0,0 +1,125 @@ +#include "puzzle_common.h" +#include "puzzle_p.h" +#include "puzzle.h" +#include "globals.h" + +void puzzle_init_compressed_cvec(PuzzleContext * const context, + PuzzleCompressedCvec * const compressed_cvec) +{ + (void) context; + compressed_cvec->sizeof_compressed_vec = (size_t) 0U; + compressed_cvec->vec = NULL; +} + +void puzzle_free_compressed_cvec(PuzzleContext * const context, + PuzzleCompressedCvec * const compressed_cvec) +{ + (void) context; + free(compressed_cvec->vec); + compressed_cvec->vec = NULL; +} + +int puzzle_compress_cvec(PuzzleContext * const context, + PuzzleCompressedCvec * const compressed_cvec, + const PuzzleCvec * const cvec) +{ +#define PC_NM(X) ((unsigned char) ((X) + 2)) + size_t remaining = cvec->sizeof_vec; + const signed char *ptr; + unsigned char *cptr; + + (void) context; + compressed_cvec->sizeof_compressed_vec = + (cvec->sizeof_vec + (size_t) 2U) / (size_t) 3U; + if ((compressed_cvec->vec = + calloc(compressed_cvec->sizeof_compressed_vec, + sizeof *compressed_cvec->vec)) == NULL) { + return -1; + } + ptr = cvec->vec; + cptr = compressed_cvec->vec; + while (remaining >= (size_t) 3U) { + *cptr++ = PC_NM(ptr[0]) + PC_NM(ptr[1]) * 5U + + PC_NM(ptr[2]) * (5U * 5U); + ptr += 3U; + remaining -= 3U; + } + if (remaining == (size_t) 1U) { + *cptr++ = PC_NM(ptr[0]); + compressed_cvec->vec[0] |= 128U; + } else if (remaining == (size_t) 2U) { + *cptr++ = PC_NM(ptr[0]) + PC_NM(ptr[1]) * 5U; + if (compressed_cvec->sizeof_compressed_vec < (size_t) 2U) { + puzzle_err_bug(__FILE__, __LINE__); + } + compressed_cvec->vec[1] |= 128U; + } + if ((size_t) (cptr - compressed_cvec->vec) != + compressed_cvec->sizeof_compressed_vec) { + puzzle_err_bug(__FILE__, __LINE__); + } + return 0; +} + +int puzzle_uncompress_cvec(PuzzleContext * const context, + const PuzzleCompressedCvec * const compressed_cvec, + PuzzleCvec * const cvec) +{ +#define PC_FL(X) ((X) & 127U) +#define PC_NP(X) ((signed char) (X) - 2) + + size_t remaining; + unsigned char trailing_bits; + const unsigned char *cptr = compressed_cvec->vec; + signed char *ptr; + unsigned char c; + + (void) context; + if (cvec->vec != NULL) { + puzzle_err_bug(__FILE__, __LINE__); + } + if ((remaining = compressed_cvec->sizeof_compressed_vec) < (size_t) 2U) { + puzzle_err_bug(__FILE__, __LINE__); + } + trailing_bits = ((cptr[0] & 128U) >> 7) | ((cptr[1] & 128U) >> 6); + if (trailing_bits > 2U) { + puzzle_err_bug(__FILE__, __LINE__); + } + cvec->sizeof_vec = (size_t) 3U * + (compressed_cvec->sizeof_compressed_vec - trailing_bits) + + trailing_bits; + if (compressed_cvec->sizeof_compressed_vec > + SIZE_MAX / (size_t) 3U - (size_t) 2U) { + puzzle_err_bug(__FILE__, __LINE__); + } + if ((cvec->vec = calloc(cvec->sizeof_vec, sizeof *cvec->vec)) == NULL) { + return -1; + } + if (trailing_bits != 0U) { + if (remaining <= (size_t) 0U) { + puzzle_err_bug(__FILE__, __LINE__); + } + remaining--; + } + ptr = cvec->vec; + while (remaining > (size_t) 0U) { + c = PC_FL(*cptr++); + *ptr++ = PC_NP(c % 5U); + c /= 5U; + *ptr++ = PC_NP(c % 5U); + c /= 5U; + *ptr++ = PC_NP(c % 5U); + remaining--; + } + if (trailing_bits == 1U) { + *ptr++ = PC_NP(PC_FL(*cptr) % 5U); + } else if (trailing_bits == 2U) { + c = PC_FL(*cptr); + *ptr++ = PC_NP(c % 5U); + *ptr++ = PC_NP(c / 5U % 5U); + } + if ((size_t) (ptr - cvec->vec) != cvec->sizeof_vec) { + puzzle_err_bug(__FILE__, __LINE__); + } + return 0; +} diff --git a/deduper/libpuzzle/src/cvec.c b/deduper/libpuzzle/src/cvec.c new file mode 100644 index 0000000..482b445 --- /dev/null +++ b/deduper/libpuzzle/src/cvec.c @@ -0,0 +1,202 @@ +#include "puzzle_common.h" +#include "puzzle_p.h" +#include "puzzle.h" +#include "globals.h" + +static int puzzle_median_cmp(const void * const a_, const void * const b_) +{ + const double a = * (const double *) a_; + const double b = * (const double *) b_; + + if (a < b) { + return -1; + } else if (a > b) { + return 1; + } + return 0; +} + +static double puzzle_median(double * const vec, size_t size) +{ + size_t n; + size_t o; + double avg; + + if (size <= (size_t) 0U) { + return 0.0; + } + qsort((void *) vec, size, sizeof *vec, puzzle_median_cmp); + if ((n = size / (size_t) 2U) == (size_t) 0U) { + if (size > (size_t) 1U) { + o = (size_t) 1U; + } else { + o = (size_t) 0U; + } + } else { + o = n + (size_t) 1U; + } + if (o < n) { + puzzle_err_bug(__FILE__, __LINE__); + } + avg = (vec[n] + vec[o]) / 2.0; + if (avg < vec[n] || avg > vec[o]) { + avg = vec[n]; + } + return avg; +} + +int puzzle_fill_cvec_from_dvec(PuzzleContext * const context, + PuzzleCvec * const cvec, + const PuzzleDvec * const dvec) +{ + size_t s; + const double *dvecptr; + signed char *cvecptr; + double *lights = NULL, *darks = NULL; + size_t pos_lights = (size_t) 0U, pos_darks = (size_t) 0U; + size_t sizeof_lights, sizeof_darks; + double lighter_cutoff, darker_cutoff; + int err = 0; + double dv; + + if ((cvec->sizeof_vec = dvec->sizeof_compressed_vec) <= (size_t) 0U) { + puzzle_err_bug(__FILE__, __LINE__); + } + if ((cvec->vec = calloc(cvec->sizeof_vec, sizeof *cvec->vec)) == NULL) { + return -1; + } + sizeof_lights = sizeof_darks = cvec->sizeof_vec; + if ((lights = calloc(sizeof_lights, sizeof *lights)) == NULL || + (darks = calloc(sizeof_darks, sizeof *darks)) == NULL) { + err = -1; + goto out; + } + dvecptr = dvec->vec; + s = cvec->sizeof_vec; + do { + dv = *dvecptr++; + if (dv >= - context->puzzle_noise_cutoff && + dv <= context->puzzle_noise_cutoff) { + continue; + } + if (dv < context->puzzle_noise_cutoff) { + darks[pos_darks++] = dv; + if (pos_darks > sizeof_darks) { + puzzle_err_bug(__FILE__, __LINE__); + } + } else if (dv > context->puzzle_noise_cutoff) { + lights[pos_lights++] = dv; + if (pos_lights > sizeof_lights) { + puzzle_err_bug(__FILE__, __LINE__); + } + } + } while (--s != (size_t) 0U); + lighter_cutoff = puzzle_median(lights, pos_lights); + darker_cutoff = puzzle_median(darks, pos_darks); + free(lights); + lights = NULL; + free(darks); + darks = NULL; + dvecptr = dvec->vec; + cvecptr = cvec->vec; + s = cvec->sizeof_vec; + do { + dv = *dvecptr++; + if (dv >= - context->puzzle_noise_cutoff && + dv <= context->puzzle_noise_cutoff) { + *cvecptr++ = 0; + } else if (dv < 0.0) { + *cvecptr++ = dv < darker_cutoff ? -2 : -1; + } else { + *cvecptr++ = dv > lighter_cutoff ? +2 : +1; + } + } while (--s != (size_t) 0U); + if ((size_t) (cvecptr - cvec->vec) != cvec->sizeof_vec) { + puzzle_err_bug(__FILE__, __LINE__); + } + out: + free(lights); + free(darks); + + return err; +} + +void puzzle_init_cvec(PuzzleContext * const context, PuzzleCvec * const cvec) +{ + (void) context; + cvec->sizeof_vec = (size_t) 0U; + cvec->vec = NULL; +} + +void puzzle_free_cvec(PuzzleContext * const context, PuzzleCvec * const cvec) +{ + (void) context; + free(cvec->vec); + cvec->vec = NULL; +} + +int puzzle_dump_cvec(PuzzleContext * const context, + const PuzzleCvec * const cvec) +{ + size_t s = cvec->sizeof_vec; + const signed char *vecptr = cvec->vec; + + (void) context; + if (s <= (size_t) 0U) { + puzzle_err_bug(__FILE__, __LINE__); + } + do { + printf("%d\n", *vecptr++); + } while (--s != (size_t) 0U); + + return 0; +} + +int puzzle_cvec_cksum(PuzzleContext * const context, + const PuzzleCvec * const cvec, unsigned int * const sum) +{ + size_t s = cvec->sizeof_vec; + const signed char *vecptr = cvec->vec; + + (void) context; + *sum = 5381; + do { + *sum += *sum << 5; + *sum ^= (unsigned int) *vecptr++; + } while (--s != (size_t) 0U); + + return 0; +} + +int puzzle_fill_cvec_from_file(PuzzleContext * const context, + PuzzleCvec * const cvec, + const char * const file) +{ + PuzzleDvec dvec; + int ret; + + puzzle_init_dvec(context, &dvec); + if ((ret = puzzle_fill_dvec_from_file(context, &dvec, file)) == 0) { + ret = puzzle_fill_cvec_from_dvec(context, cvec, &dvec); + } + puzzle_free_dvec(context, &dvec); + + return ret; +} + +int puzzle_fill_cvec_from_mem(PuzzleContext * const context, + PuzzleCvec * const cvec, + const void * const mem, + const size_t size) +{ + PuzzleDvec dvec; + int ret; + + puzzle_init_dvec(context, &dvec); + if ((ret = puzzle_fill_dvec_from_mem(context, &dvec, mem, size)) == 0) { + ret = puzzle_fill_cvec_from_dvec(context, cvec, &dvec); + } + puzzle_free_dvec(context, &dvec); + + return ret; +} diff --git a/deduper/libpuzzle/src/dvec.c b/deduper/libpuzzle/src/dvec.c new file mode 100644 index 0000000..f5d21f9 --- /dev/null +++ b/deduper/libpuzzle/src/dvec.c @@ -0,0 +1,663 @@ +#include "puzzle_common.h" +#include "puzzle_p.h" +#include "puzzle.h" +#include "globals.h" + +static void puzzle_init_view(PuzzleView * const view) +{ + view->width = view->height = 0U; + view->sizeof_map = (size_t) 0U; + view->map = NULL; +} + +static void puzzle_free_view(PuzzleView * const view) +{ + free(view->map); + view->map = NULL; +} + +static void puzzle_init_avglvls(PuzzleAvgLvls * const avglvls) +{ + avglvls->lambdas = 0U; + avglvls->sizeof_lvls = (size_t) 0U; + avglvls->lvls = NULL; +} + +static void puzzle_free_avglvls(PuzzleAvgLvls * const avglvls) +{ + free(avglvls->lvls); + avglvls->lvls = NULL; +} + +void puzzle_init_dvec(PuzzleContext * const context, PuzzleDvec * const dvec) +{ + (void) context; + dvec->sizeof_vec = dvec->sizeof_compressed_vec = (size_t) 0U; + dvec->vec = NULL; +} + +void puzzle_free_dvec(PuzzleContext * const context, PuzzleDvec * const dvec) +{ + (void) context; + free(dvec->vec); + dvec->vec = NULL; +} + +#define MAX_SIGNATURE_LENGTH 8U + +static PuzzleImageTypeCode puzzle_get_image_type_from_header(const unsigned char * const header) +{ + static const PuzzleImageType image_types[] = { + { (size_t) 4U, (const unsigned char *) + "GIF8", PUZZLE_IMAGE_TYPE_GIF }, + { (size_t) 3U, (const unsigned char *) + "\xff\xd8\xff", PUZZLE_IMAGE_TYPE_JPEG }, + { (size_t) 8U, (const unsigned char *) + "\x89PNG\r\n\x1a\n", PUZZLE_IMAGE_TYPE_PNG }, + { (size_t) 0U, NULL, PUZZLE_IMAGE_TYPE_UNKNOWN } + }; + const PuzzleImageType *image_type = image_types; + PuzzleImageTypeCode ret = PUZZLE_IMAGE_TYPE_UNKNOWN; + do { + if (image_type->sizeof_signature > MAX_SIGNATURE_LENGTH) { + puzzle_err_bug(__FILE__, __LINE__); + } + if (memcmp(header, image_type->signature, + image_type->sizeof_signature) == 0) { + ret = image_type->image_type_code; + break; + } + image_type++; + } while (image_type->signature != NULL); + return ret; +} + +static PuzzleImageTypeCode puzzle_get_image_type_from_fp(FILE * const fp) +{ + unsigned char header[MAX_SIGNATURE_LENGTH]; + PuzzleImageTypeCode ret = PUZZLE_IMAGE_TYPE_ERROR; + fpos_t pos; + + if (fgetpos(fp, &pos) != 0) { + return PUZZLE_IMAGE_TYPE_ERROR; + } + rewind(fp); + if (fread(header, (size_t) 1U, sizeof header, fp) != sizeof header) { + goto bye; + } + ret = puzzle_get_image_type_from_header(header); + bye: + if (fsetpos(fp, &pos) != 0) { + puzzle_err_bug(__FILE__, __LINE__); + } + return ret; +} + +static int puzzle_autocrop_axis(PuzzleContext * const context, + PuzzleView * const view, + unsigned int * const crop0, + unsigned int * const crop1, + const unsigned int axisn, + const unsigned int axiso, + const int omaptrinc, const int nmaptrinc) +{ + double *chunk_contrasts; + size_t sizeof_chunk_contrasts; + double chunk_contrast = 0.0, total_contrast = 0.0, barrier_contrast; + unsigned char level = 0U; + unsigned char previous_level = 0U; + unsigned int chunk_n, chunk_o; + unsigned int chunk_n1, chunk_o1; + unsigned int max_crop; + const unsigned char *maptr; + + chunk_n1 = axisn - 1U; + chunk_o1 = axiso - 1U; + *crop0 = 0U; + *crop1 = chunk_n1; + if (axisn < (unsigned int) PUZZLE_MIN_SIZE_FOR_CROPPING || + axiso < (unsigned int) PUZZLE_MIN_SIZE_FOR_CROPPING) { + return 1; + } + sizeof_chunk_contrasts = chunk_n1 + 1U; + if ((chunk_contrasts = calloc(sizeof_chunk_contrasts, + sizeof *chunk_contrasts)) == NULL) { + return -1; + } + maptr = view->map; + if (axisn >= INT_MAX || axiso >= INT_MAX) { + puzzle_err_bug(__FILE__, __LINE__); + } + if (INT_MAX / axisn < axiso) { + puzzle_err_bug(__FILE__, __LINE__); + } + chunk_n = chunk_n1; + do { + chunk_contrast = 0.0; + chunk_o = chunk_o1; + previous_level = *maptr; + do { + level = *maptr; + if (previous_level > level) { + chunk_contrast += (double) (previous_level - level); + } else { + chunk_contrast += (double) (level - previous_level); + } + previous_level = level; + maptr += omaptrinc; + } while (chunk_o-- != 0U); + chunk_contrasts[chunk_n] = chunk_contrast; + total_contrast += chunk_contrast; + maptr += nmaptrinc; + } while (chunk_n-- != 0U); + barrier_contrast = + total_contrast * context->puzzle_contrast_barrier_for_cropping; + total_contrast = 0.0; + *crop0 = 0U; + do { + total_contrast += chunk_contrasts[*crop0]; + if (total_contrast >= barrier_contrast) { + break; + } + } while ((*crop0)++ < chunk_n1); + total_contrast = 0.0; + *crop1 = chunk_n1; + do { + total_contrast += chunk_contrasts[*crop1]; + if (total_contrast >= barrier_contrast) { + break; + } + } while ((*crop1)-- > 0U); + free(chunk_contrasts); + if (*crop0 > chunk_n1 || *crop1 > chunk_n1) { + puzzle_err_bug(__FILE__, __LINE__); + } + max_crop = (unsigned int) + round((double) chunk_n1 * context->puzzle_max_cropping_ratio); + if (max_crop > chunk_n1) { + puzzle_err_bug(__FILE__, __LINE__); + } + *crop0 = MIN(*crop0, max_crop); + *crop1 = MAX(*crop1, chunk_n1 - max_crop); + + return 0; +} + +static int puzzle_autocrop_view(PuzzleContext * context, + PuzzleView * const view) +{ + unsigned int cropx0, cropx1; + unsigned int cropy0, cropy1; + unsigned int x, y; + unsigned char *maptr; + + if (puzzle_autocrop_axis(context, view, &cropx0, &cropx1, + view->width, view->height, + (int) view->width, + 1 - (int) (view->width * view->height)) < 0 || + puzzle_autocrop_axis(context, view, &cropy0, &cropy1, + view->height, view->width, + 1, 0) < 0) { + return -1; + } + if (cropx0 > cropx1 || cropy0 > cropy1) { + puzzle_err_bug(__FILE__, __LINE__); + } + maptr = view->map; + y = cropy0; + do { + x = cropx0; + do { + *maptr++ = PUZZLE_VIEW_PIXEL(view, x, y); + } while (x++ != cropx1); + } while (y++ != cropy1); + view->width = cropx1 - cropx0 + 1U; + view->height = cropy1 - cropy0 + 1U; + view->sizeof_map = (size_t) view->width * (size_t) view->height; + if (view->width <= 0U || view->height <= 0U || + SIZE_MAX / view->width < view->height) { + puzzle_err_bug(__FILE__, __LINE__); + } + return 0; +} + +static int puzzle_getview_from_gdimage(PuzzleContext * const context, + PuzzleView * const view, + gdImagePtr gdimage) +{ + unsigned int x, y; + const unsigned int x0 = 0U, y0 = 0U; + unsigned int x1, y1; + unsigned char *maptr; + int pixel; + + view->map = NULL; + view->width = (unsigned int) gdImageSX(gdimage); + view->height = (unsigned int) gdImageSY(gdimage); + view->sizeof_map = (size_t) (view->width * view->height); + if (view->width > context->puzzle_max_width || + view->height > context->puzzle_max_height) { + return -1; + } + if (view->sizeof_map <= (size_t) 0U || + INT_MAX / view->width < view->height || + SIZE_MAX / view->width < view->height || + (unsigned int) view->sizeof_map != view->sizeof_map) { + puzzle_err_bug(__FILE__, __LINE__); + } + x1 = view->width - 1U; + y1 = view->height - 1U; + if (view->width <= 0U || view->height <= 0U) { + puzzle_err_bug(__FILE__, __LINE__); + } + if ((view->map = calloc(view->sizeof_map, sizeof *view->map)) == NULL) { + return -1; + } + if (x1 > INT_MAX || y1 > INT_MAX) { /* GD uses "int" for coordinates */ + puzzle_err_bug(__FILE__, __LINE__); + } + maptr = view->map; + x = x1; + if (gdImageTrueColor(gdimage) != 0) { + do { + y = y1; + do { + pixel = gdImageGetTrueColorPixel(gdimage, (int) x, (int) y); + *maptr++ = (unsigned char) + ((gdTrueColorGetRed(pixel) * 77 + + gdTrueColorGetGreen(pixel) * 151 + + gdTrueColorGetBlue(pixel) * 28 + 128) / 256); + } while (y-- != y0); + } while (x-- != x0); + } else { + do { + y = y1; + do { + pixel = gdImagePalettePixel(gdimage, x, y); + *maptr++ = (unsigned char) + ((gdimage->red[pixel] * 77 + + gdimage->green[pixel] * 151 + + gdimage->blue[pixel] * 28 + 128) / 256); + } while (y-- != y0); + } while (x-- != x0); + } + return 0; +} + +static double puzzle_softedgedlvl(const PuzzleView * const view, + const unsigned int x, const unsigned int y) +{ + unsigned int lvl = 0U; + unsigned int ax, ay; + unsigned int count = 0U; + const unsigned int xlimit = x + PUZZLE_PIXEL_FUZZ_SIZE; + const unsigned int ylimit = y + PUZZLE_PIXEL_FUZZ_SIZE; + if (x >= view->width || y >= view->height || xlimit <= x || ylimit <= y) { + puzzle_err_bug(__FILE__, __LINE__); + } + if (x > PUZZLE_PIXEL_FUZZ_SIZE) { + ax = x - PUZZLE_PIXEL_FUZZ_SIZE; + } else { + ax = 0U; + } + do { + if (ax >= view->width) { + break; + } + if (y > PUZZLE_PIXEL_FUZZ_SIZE) { + ay = y - PUZZLE_PIXEL_FUZZ_SIZE; + } else { + ay = 0U; + } + do { + if (ay >= view->height) { + break; + } + count++; + lvl += (unsigned int) PUZZLE_VIEW_PIXEL(view, ax, ay); + } while (ay++ < ylimit); + } while (ax++ < xlimit); + if (count <= 0U) { + return 0.0; + } + return (double) lvl / (double) count; +} + +static double puzzle_get_avglvl(const PuzzleView * const view, + const unsigned int x, const unsigned int y, + const unsigned int width, + const unsigned int height) +{ + double lvl = 0.0; + const unsigned int xlimit = x + width - 1U; + const unsigned int ylimit = y + height - 1U; + unsigned int ax, ay; + + if (width <= 0U || height <= 0U) { + puzzle_err_bug(__FILE__, __LINE__); + } + if (xlimit < x || ylimit < y) { + puzzle_err_bug(__FILE__, __LINE__); + } + ax = x; + do { + if (ax >= view->width) { + puzzle_err_bug(__FILE__, __LINE__); + } + ay = y; + do { + if (ay >= view->height) { + puzzle_err_bug(__FILE__, __LINE__); + } + lvl += puzzle_softedgedlvl(view, ax, ay); + } while (ay++ < ylimit); + } while (ax++ < xlimit); + + return lvl / (double) (width * height); +} + +static int puzzle_fill_avglgls(PuzzleContext * const context, + PuzzleAvgLvls * const avglvls, + const PuzzleView * const view, + const unsigned int lambdas) +{ + double width = (double) view->width; + double height = (double) view->height; + double xshift, yshift; + double x, y; + unsigned int p; + unsigned int lx, ly; + unsigned int xd, yd; + unsigned int px, py; + unsigned int lwidth, lheight; + double avglvl; + + avglvls->lambdas = lambdas; + avglvls->sizeof_lvls = (size_t) lambdas * lambdas; + if (UINT_MAX / lambdas < lambdas || + (unsigned int) avglvls->sizeof_lvls != avglvls->sizeof_lvls) { + puzzle_err_bug(__FILE__, __LINE__); + } + if ((avglvls->lvls = calloc(avglvls->sizeof_lvls, + sizeof *avglvls->lvls)) == NULL) { + return -1; + } + xshift = (width - + (width * (double) lambdas / (double) SUCC(lambdas))) / 2.0; + yshift = (height - + (height * (double) lambdas / (double) SUCC(lambdas))) / 2.0; + p = (unsigned int) round(MIN(width, height) / + (SUCC(lambdas) * context->puzzle_p_ratio)); + if (p < PUZZLE_MIN_P) { + p = PUZZLE_MIN_P; + } + lx = 0U; + do { + ly = 0U; + do { + x = xshift + (double) lx * PRED(width) / SUCC(lambdas); + y = yshift + (double) ly * PRED(height) / SUCC(lambdas); + lwidth = (unsigned int) round + (xshift + (double) SUCC(lx) * PRED(width) / + (double) SUCC(lambdas) - x); + lheight = (unsigned int) round + (yshift + (double) SUCC(ly) * PRED(height) / + (double) SUCC(lambdas) - y); + if (p < lwidth) { + xd = (unsigned int) round(x + (lwidth - p) / 2.0); + } else { + xd = (unsigned int) round(x); + } + if (p < lheight) { + yd = (unsigned int) round(y + (lheight - p) / 2.0); + } else { + yd = (unsigned int) round(y); + } + if (view->width - xd < p) { + px = 1U; + } else { + px = p; + } + if (view->height - yd < p) { + py = 1U; + } else { + py = p; + } + if (px > 0U && py > 0U) { + avglvl = puzzle_get_avglvl(view, xd, yd, px, py); + } else { + avglvl = 0.0; + } + PUZZLE_AVGLVL(avglvls, lx, ly) = avglvl; + } while (++ly < lambdas); + } while (++lx < lambdas); + + return 0; +} + +static unsigned int puzzle_add_neighbors(double ** const vecur, + const unsigned int max_neighbors, + const PuzzleAvgLvls * const avglvls, + const unsigned int lx, + const unsigned int ly) +{ + unsigned int ax, ay; + unsigned int xlimit, ylimit; + unsigned int neighbors = 0U; + const double ref = PUZZLE_AVGLVL(avglvls, lx, ly); + + if (max_neighbors != 8U) { + puzzle_err_bug(__FILE__, __LINE__); + } + if (lx >= avglvls->lambdas - 1U) { + xlimit = avglvls->lambdas - 1U; + } else { + xlimit = lx + 1U; + } + if (ly >= avglvls->lambdas - 1U) { + ylimit = avglvls->lambdas - 1U; + } else { + ylimit = ly + 1U; + } + if (lx <= 0U) { + ax = 0U; + } else { + ax = lx - 1U; + } + do { + if (ly <= 0U) { + ay = 0U; + } else { + ay = ly - 1U; + } + do { + if (ax == lx && ay == ly) { + continue; + } + *(*vecur)++ = ref - PUZZLE_AVGLVL(avglvls, ax, ay); + neighbors++; + if (neighbors <= 0U) { + puzzle_err_bug(__FILE__, __LINE__); + } + } while (ay++ < ylimit); + } while (ax++ < xlimit); + if (neighbors > max_neighbors) { + puzzle_err_bug(__FILE__, __LINE__); + } + return neighbors; +} + +static int puzzle_fill_dvec(PuzzleDvec * const dvec, + const PuzzleAvgLvls * const avglvls) +{ + unsigned int lambdas; + unsigned int lx, ly; + double *vecur; + + lambdas = avglvls->lambdas; + dvec->sizeof_compressed_vec = (size_t) 0U; + dvec->sizeof_vec = (size_t) (lambdas * lambdas * PUZZLE_NEIGHBORS); + if (SIZE_MAX / + ((size_t) (lambdas * lambdas)) < (size_t) PUZZLE_NEIGHBORS || + (unsigned int) dvec->sizeof_vec != dvec->sizeof_vec) { + puzzle_err_bug(__FILE__, __LINE__); + } + if ((dvec->vec = calloc(dvec->sizeof_vec, sizeof *dvec->vec)) == NULL) { + return -1; + } + vecur = dvec->vec; + lx = 0U; + do { + ly = 0U; + do { + (void) puzzle_add_neighbors(&vecur, PUZZLE_NEIGHBORS, + avglvls, lx, ly); + } while (++ly < lambdas); + } while (++lx < lambdas); + dvec->sizeof_compressed_vec = (size_t) (vecur - dvec->vec); + + return 0; +} + +static void puzzle_remove_transparency(gdImagePtr gdimage) +{ + int background = gdTrueColor(255, 255, 255); + int x, y, cpix; + + gdImagePaletteToTrueColor(gdimage); + + for (y = 0; y < gdImageSY(gdimage); y++) { + for (x = 0; x < gdImageSX(gdimage); x++) { + cpix = gdImageGetTrueColorPixel(gdimage, x, y); + gdImageSetPixel(gdimage, x, y, gdAlphaBlend(background, cpix)); + } + } +} + +static gdImagePtr puzzle_create_gdimage_from_file(const char * const file) +{ + gdImagePtr gdimage = NULL; + FILE *fp; + PuzzleImageTypeCode image_type_code; + if ((fp = fopen(file, "rb")) == NULL) { + return NULL; + } + image_type_code = puzzle_get_image_type_from_fp(fp); + switch (image_type_code) { + case PUZZLE_IMAGE_TYPE_JPEG: + gdimage = gdImageCreateFromJpeg(fp); + break; + case PUZZLE_IMAGE_TYPE_PNG: + gdimage = gdImageCreateFromPng(fp); + break; + case PUZZLE_IMAGE_TYPE_GIF: + gdimage = gdImageCreateFromGif(fp); + break; + default: + gdimage = NULL; + } + (void) fclose(fp); + return gdimage; +} + +static gdImagePtr puzzle_create_gdimage_from_mem(const void * const mem, const size_t size) +{ + gdImagePtr gdimage = NULL; + PuzzleImageTypeCode image_type_code = puzzle_get_image_type_from_header(mem); + switch (image_type_code) { + case PUZZLE_IMAGE_TYPE_JPEG: + gdimage = gdImageCreateFromJpegPtr(size, (void *)mem); + break; + case PUZZLE_IMAGE_TYPE_PNG: + gdimage = gdImageCreateFromPngPtr(size, (void *)mem); + break; + case PUZZLE_IMAGE_TYPE_GIF: + gdimage = gdImageCreateFromGifPtr(size, (void *)mem); + break; + default: + gdimage = NULL; + } + return gdimage; +} + +static int puzzle_fill_dvec_from_gdimage(PuzzleContext * const context, + PuzzleDvec * const dvec, + const gdImagePtr gdimage) +{ + PuzzleView view; + PuzzleAvgLvls avglvls; + int ret = 0; + + if (context->magic != PUZZLE_CONTEXT_MAGIC) { + puzzle_err_bug(__FILE__, __LINE__); + } + puzzle_init_view(&view); + puzzle_init_avglvls(&avglvls); + puzzle_init_dvec(context, dvec); + ret = puzzle_getview_from_gdimage(context, &view, gdimage); + if (ret != 0) { + goto out; + } + if (context->puzzle_enable_autocrop != 0 && + (ret = puzzle_autocrop_view(context, &view)) < 0) { + goto out; + } + if ((ret = puzzle_fill_avglgls(context, &avglvls, + &view, context->puzzle_lambdas)) != 0) { + goto out; + } + ret = puzzle_fill_dvec(dvec, &avglvls); + out: + puzzle_free_view(&view); + puzzle_free_avglvls(&avglvls); + + return ret; +} + +int puzzle_fill_dvec_from_file(PuzzleContext * const context, + PuzzleDvec * const dvec, + const char * const file) +{ + int ret; + gdImagePtr gdimage = puzzle_create_gdimage_from_file(file); + if (gdimage == NULL) { + return -1; + } + puzzle_remove_transparency(gdimage); + ret = puzzle_fill_dvec_from_gdimage(context, dvec, gdimage); + gdImageDestroy(gdimage); + return ret; +} + +int puzzle_fill_dvec_from_mem(PuzzleContext * const context, + PuzzleDvec * const dvec, + const void * const mem, + const size_t size) +{ + int ret; + gdImagePtr gdimage = puzzle_create_gdimage_from_mem(mem, size); + if (gdimage == NULL) { + return -1; + } + puzzle_remove_transparency(gdimage); + ret = puzzle_fill_dvec_from_gdimage(context, dvec, gdimage); + gdImageDestroy(gdimage); + return ret; +} + +int puzzle_dump_dvec(PuzzleContext * const context, + const PuzzleDvec * const dvec) +{ + size_t s = dvec->sizeof_compressed_vec; + const double *vecptr = dvec->vec; + + (void) context; + if (s <= (size_t) 0U) { + puzzle_err_bug(__FILE__, __LINE__); + } + do { + printf("%g\n", *vecptr++); + } while (--s != (size_t) 0U); + + return 0; +} diff --git a/deduper/libpuzzle/src/globals.h b/deduper/libpuzzle/src/globals.h new file mode 100644 index 0000000..757c5c7 --- /dev/null +++ b/deduper/libpuzzle/src/globals.h @@ -0,0 +1,26 @@ +#ifndef __GLOBALS_H__ +#define __GLOBALS_H__ 1 + +#ifdef DEFINE_GLOBALS +# define GLOBAL0(A) A +# define GLOBAL(A, B) A = B +#else +# define GLOBAL0(A) extern A +# define GLOBAL(A, B) extern A +#endif + +GLOBAL(PuzzleContext puzzle_global_context, +{ + /* unsigned int puzzle_max_width */ PUZZLE_DEFAULT_MAX_WIDTH _COMA_ + /* unsigned int puzzle_max_height */ PUZZLE_DEFAULT_MAX_HEIGHT _COMA_ + /* unsigned int puzzle_lambdas */ PUZZLE_DEFAULT_LAMBDAS _COMA_ + /* double puzzle_p_ratio */ PUZZLE_DEFAULT_P_RATIO _COMA_ + /* double puzzle_noise_cutoff */ PUZZLE_DEFAULT_NOISE_CUTOFF _COMA_ + /* double puzzle_contrast_barrier_for_cropping */ + PUZZLE_DEFAULT_CONTRAST_BARRIER_FOR_CROPPING _COMA_ + /* double puzzle_max_cropping_ratio */ + PUZZLE_DEFAULT_MAX_CROPPING_RATIO _COMA_ + /* int puzzle_enable_autocrop */ PUZZLE_DEFAULT_ENABLE_AUTOCROP _COMA_ + /* unsigned long magic */ PUZZLE_CONTEXT_MAGIC _COMA_ +}); +#endif diff --git a/deduper/libpuzzle/src/pics/Makefile.am b/deduper/libpuzzle/src/pics/Makefile.am new file mode 100644 index 0000000..510311f --- /dev/null +++ b/deduper/libpuzzle/src/pics/Makefile.am @@ -0,0 +1,8 @@ +EXTRA_DIST = \ + pic-a-0.jpg \ + pic-a-1.jpg \ + luxmarket_tshirt01.jpg \ + luxmarket_tshirt01_black.jpg \ + luxmarket_tshirt01_sal.jpg \ + luxmarket_tshirt01_sheum.jpg \ + duck.gif diff --git a/deduper/libpuzzle/src/pics/duck.gif b/deduper/libpuzzle/src/pics/duck.gif new file mode 100644 index 0000000..96c3037 Binary files /dev/null and b/deduper/libpuzzle/src/pics/duck.gif differ diff --git a/deduper/libpuzzle/src/pics/luxmarket_tshirt01.jpg b/deduper/libpuzzle/src/pics/luxmarket_tshirt01.jpg new file mode 100644 index 0000000..ffaf7eb Binary files /dev/null and b/deduper/libpuzzle/src/pics/luxmarket_tshirt01.jpg differ diff --git a/deduper/libpuzzle/src/pics/luxmarket_tshirt01_black.jpg b/deduper/libpuzzle/src/pics/luxmarket_tshirt01_black.jpg new file mode 100644 index 0000000..73cac7b Binary files /dev/null and b/deduper/libpuzzle/src/pics/luxmarket_tshirt01_black.jpg differ diff --git a/deduper/libpuzzle/src/pics/luxmarket_tshirt01_sal.jpg b/deduper/libpuzzle/src/pics/luxmarket_tshirt01_sal.jpg new file mode 100644 index 0000000..cb0cefe Binary files /dev/null and b/deduper/libpuzzle/src/pics/luxmarket_tshirt01_sal.jpg differ diff --git a/deduper/libpuzzle/src/pics/luxmarket_tshirt01_sheum.jpg b/deduper/libpuzzle/src/pics/luxmarket_tshirt01_sheum.jpg new file mode 100644 index 0000000..185393c Binary files /dev/null and b/deduper/libpuzzle/src/pics/luxmarket_tshirt01_sheum.jpg differ diff --git a/deduper/libpuzzle/src/pics/pic-a-0.jpg b/deduper/libpuzzle/src/pics/pic-a-0.jpg new file mode 100644 index 0000000..3dd4a3b Binary files /dev/null and b/deduper/libpuzzle/src/pics/pic-a-0.jpg differ diff --git a/deduper/libpuzzle/src/pics/pic-a-1.jpg b/deduper/libpuzzle/src/pics/pic-a-1.jpg new file mode 100644 index 0000000..95f0e77 Binary files /dev/null and b/deduper/libpuzzle/src/pics/pic-a-1.jpg differ diff --git a/deduper/libpuzzle/src/puzzle-diff.c b/deduper/libpuzzle/src/puzzle-diff.c new file mode 100644 index 0000000..e0f3626 --- /dev/null +++ b/deduper/libpuzzle/src/puzzle-diff.c @@ -0,0 +1,130 @@ +#include "puzzle_common.h" +#include "puzzle.h" + +typedef struct Opts_ { + const char *file1; + const char *file2; + int fix_for_texts; + int exit; + double similarity_threshold; +} Opts; + +void usage(void) +{ + puts("\nUsage: puzzle-diff [-b ] [-e] [-E similarity threshold] [-h]\n" + " [-H ] [-l ] [-n ]\n" + " [-p

] [-t] [-W ] \n\n" + "Visually compares two images and returns their distance.\n\n" + "-b \n" + "-c : disable autocrop\n" + "-C \n" + "-e : exit with 10 (images are similar) or 20 (images are not)\n" + "-E : for -e\n" + "-h : show help\n" + "-H : set max height\n" + "-l : change lambdas\n" + "-n : change noise cutoff\n" + "-p : set p ratio\n" + "-t disable fix for texts\n" + "-W : set max width\n" + "\n"); + exit(EXIT_SUCCESS); +} + +int parse_opts(Opts * const opts, PuzzleContext * context, + int argc, char * const *argv) { + int opt; + extern char *optarg; + extern int optind; + + opts->fix_for_texts = 1; + opts->exit = 0; + opts->similarity_threshold = PUZZLE_CVEC_SIMILARITY_THRESHOLD; + while ((opt = getopt(argc, argv, "b:cC:eE:hH:l:n:p:tW:")) != -1) { + switch (opt) { + case 'b': + puzzle_set_contrast_barrier_for_cropping(context, atof(optarg)); + break; + case 'c': + puzzle_set_autocrop(context, 0); + break; + case 'C': + puzzle_set_max_cropping_ratio(context, atof(optarg)); + break; + case 'e': + opts->exit = 1; + break; + case 'E': + opts->similarity_threshold = atof(optarg); + break; + case 'h': + usage(); + /* NOTREACHED */ + case 'H': + puzzle_set_max_height(context, strtoul(optarg, NULL, 10)); + break; + case 'l': + puzzle_set_lambdas(context, strtoul(optarg, NULL, 10)); + break; + case 'n': + puzzle_set_noise_cutoff(context, atof(optarg)); + break; + case 'p': + puzzle_set_p_ratio(context, atof(optarg)); + break; + case 't': + opts->fix_for_texts = 0; + break; + case 'W': + puzzle_set_max_width(context, strtoul(optarg, NULL, 10)); + break; + default: + usage(); + /* NOTREACHED */ + } + } + argc -= optind; + argv += optind; + if (argc != 2) { + usage(); + } + opts->file1 = *argv++; + opts->file2 = *argv; + + return 0; +} + +int main(int argc, char *argv[]) +{ + Opts opts; + PuzzleContext context; + PuzzleCvec cvec1, cvec2; + double d; + + puzzle_init_context(&context); + parse_opts(&opts, &context, argc, argv); + puzzle_init_cvec(&context, &cvec1); + puzzle_init_cvec(&context, &cvec2); + if (puzzle_fill_cvec_from_file(&context, &cvec1, opts.file1) != 0) { + fprintf(stderr, "Unable to read [%s]\n", opts.file1); + return 1; + } + if (puzzle_fill_cvec_from_file(&context, &cvec2, opts.file2) != 0) { + fprintf(stderr, "Unable to read [%s]\n", opts.file2); + return 1; + } + d = puzzle_vector_normalized_distance(&context, &cvec1, &cvec2, + opts.fix_for_texts); + puzzle_free_cvec(&context, &cvec1); + puzzle_free_cvec(&context, &cvec2); + puzzle_free_context(&context); + if (opts.exit == 0) { + printf("%g\n", d); + return 0; + } + if (d > opts.similarity_threshold) { + return 20; + } + return 10; +} diff --git a/deduper/libpuzzle/src/puzzle.c b/deduper/libpuzzle/src/puzzle.c new file mode 100644 index 0000000..e21c252 --- /dev/null +++ b/deduper/libpuzzle/src/puzzle.c @@ -0,0 +1,22 @@ +#define DEFINE_GLOBALS 1 +#include "puzzle_common.h" +#include "puzzle_p.h" +#include "puzzle.h" +#include "globals.h" + +void puzzle_init_context(PuzzleContext * const context) +{ + *context = puzzle_global_context; +} + +void puzzle_free_context(PuzzleContext * const context) +{ + (void) context; +} + +void puzzle_err_bug(const char * const file, const int line) +{ + fprintf(stderr, "*BUG* File: [%s] Line: [%d]\n", file, line); + abort(); +} + diff --git a/deduper/libpuzzle/src/puzzle.h b/deduper/libpuzzle/src/puzzle.h new file mode 100644 index 0000000..c31b43f --- /dev/null +++ b/deduper/libpuzzle/src/puzzle.h @@ -0,0 +1,122 @@ +#ifndef __PUZZLE_H__ +#define __PUZZLE_H__ 1 + +#define PUZZLE_VERSION_MAJOR 0 +#define PUZZLE_VERSION_MINOR 11 + +#include "puzzle_common.h" + +typedef struct PuzzleDvec_ { + size_t sizeof_vec; + size_t sizeof_compressed_vec; + double *vec; +} PuzzleDvec; + +typedef struct PuzzleCvec_ { + size_t sizeof_vec; + signed char *vec; +} PuzzleCvec; + +typedef struct PuzzleCompressedCvec_ { + size_t sizeof_compressed_vec; + unsigned char *vec; +} PuzzleCompressedCvec; + +typedef struct PuzzleContext_ { + unsigned int puzzle_max_width; + unsigned int puzzle_max_height; + unsigned int puzzle_lambdas; + double puzzle_p_ratio; + double puzzle_noise_cutoff; + double puzzle_contrast_barrier_for_cropping; + double puzzle_max_cropping_ratio; + int puzzle_enable_autocrop; + unsigned long magic; +} PuzzleContext; + +#ifdef __cplusplus +extern "C" { +#endif +void puzzle_init_context(PuzzleContext * const context); +void puzzle_free_context(PuzzleContext * const context); +int puzzle_set_max_width(PuzzleContext * const context, + const unsigned int width); +int puzzle_set_max_height(PuzzleContext * const context, + const unsigned int height); +int puzzle_set_lambdas(PuzzleContext * const context, + const unsigned int lambdas); +int puzzle_set_noise_cutoff(PuzzleContext * const context, + const double noise_cutoff); +int puzzle_set_p_ratio(PuzzleContext * const context, + const double p_ratio); +int puzzle_set_contrast_barrier_for_cropping(PuzzleContext * const context, + const double barrier); +int puzzle_set_max_cropping_ratio(PuzzleContext * const context, + const double ratio); +int puzzle_set_autocrop(PuzzleContext * const context, + const int enable); +void puzzle_init_cvec(PuzzleContext * const context, + PuzzleCvec * const cvec); +void puzzle_init_dvec(PuzzleContext * const context, + PuzzleDvec * const dvec); +int puzzle_fill_dvec_from_file(PuzzleContext * const context, + PuzzleDvec * const dvec, + const char * const file); +int puzzle_fill_cvec_from_file(PuzzleContext * const context, + PuzzleCvec * const cvec, + const char * const file); +int puzzle_fill_dvec_from_mem(PuzzleContext * const context, + PuzzleDvec * const dvec, + const void * const mem, + const size_t size); +int puzzle_fill_cvec_from_mem(PuzzleContext * const context, + PuzzleCvec * const cvec, + const void * const mem, + const size_t size); +int puzzle_fill_cvec_from_dvec(PuzzleContext * const context, + PuzzleCvec * const cvec, + const PuzzleDvec * const dvec); +void puzzle_free_cvec(PuzzleContext * const context, + PuzzleCvec * const cvec); +void puzzle_free_dvec(PuzzleContext * const context, + PuzzleDvec * const dvec); +int puzzle_dump_cvec(PuzzleContext * const context, + const PuzzleCvec * const cvec); +int puzzle_dump_dvec(PuzzleContext * const context, + const PuzzleDvec * const dvec); +int puzzle_cvec_cksum(PuzzleContext * const context, + const PuzzleCvec * const cvec, unsigned int * const sum); +void puzzle_init_compressed_cvec(PuzzleContext * const context, + PuzzleCompressedCvec * const compressed_cvec); +void puzzle_free_compressed_cvec(PuzzleContext * const context, + PuzzleCompressedCvec * const compressed_cvec); +int puzzle_compress_cvec(PuzzleContext * const context, + PuzzleCompressedCvec * const compressed_cvec, + const PuzzleCvec * const cvec); +int puzzle_uncompress_cvec(PuzzleContext * const context, + const PuzzleCompressedCvec * const compressed_cvec, + PuzzleCvec * const cvec); +int puzzle_vector_sub(PuzzleContext * const context, + PuzzleCvec * const cvecr, + const PuzzleCvec * const cvec1, + const PuzzleCvec * const cvec2, + const int fix_for_texts); +double puzzle_vector_euclidean_length(PuzzleContext * const context, + const PuzzleCvec * const cvec); +double puzzle_vector_normalized_distance(PuzzleContext * const context, + const PuzzleCvec * const cvec1, + const PuzzleCvec * const cvec2, + const int fix_for_texts); + +#ifdef __cplusplus +} +#endif + +#define PUZZLE_CVEC_SIMILARITY_THRESHOLD 0.6 +#define PUZZLE_CVEC_SIMILARITY_HIGH_THRESHOLD 0.7 +#define PUZZLE_CVEC_SIMILARITY_LOW_THRESHOLD 0.3 +#define PUZZLE_CVEC_SIMILARITY_LOWER_THRESHOLD 0.2 + +#define _COMA_ , + +#endif diff --git a/deduper/libpuzzle/src/puzzle_common.h b/deduper/libpuzzle/src/puzzle_common.h new file mode 100644 index 0000000..ebd340b --- /dev/null +++ b/deduper/libpuzzle/src/puzzle_common.h @@ -0,0 +1,18 @@ +#ifndef __PUZZLE_COMMON_H__ +#define __PUZZLE_COMMON_H__ 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef errno +extern int errno; +#endif + +#endif diff --git a/deduper/libpuzzle/src/puzzle_p.h b/deduper/libpuzzle/src/puzzle_p.h new file mode 100644 index 0000000..2f09494 --- /dev/null +++ b/deduper/libpuzzle/src/puzzle_p.h @@ -0,0 +1,67 @@ +#ifndef __PUZZLE_P_H__ +#define __PUZZLE_P_H__ 1 + +#include +#include + +typedef struct PuzzleView_ { + unsigned int width; + unsigned int height; + size_t sizeof_map; + unsigned char *map; +} PuzzleView; + +typedef struct PuzzleAvgLvls_ { + unsigned int lambdas; + size_t sizeof_lvls; + double *lvls; +} PuzzleAvgLvls; + +typedef enum PuzzleImageTypeCode_ { + PUZZLE_IMAGE_TYPE_ERROR, PUZZLE_IMAGE_TYPE_UNKNOWN, PUZZLE_IMAGE_TYPE_JPEG, + PUZZLE_IMAGE_TYPE_GIF, PUZZLE_IMAGE_TYPE_PNG +} PuzzleImageTypeCode; + +typedef struct PuzzleImageType_ { + const size_t sizeof_signature; + const unsigned char *signature; + const PuzzleImageTypeCode image_type_code; +} PuzzleImageType; + +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t) -1) +#endif + +#define PUZZLE_DEFAULT_LAMBDAS 9 +#define PUZZLE_DEFAULT_MAX_WIDTH 3000 +#define PUZZLE_DEFAULT_MAX_HEIGHT 3000 +#define PUZZLE_DEFAULT_NOISE_CUTOFF 2.0 +#define PUZZLE_DEFAULT_P_RATIO 2.0 +#define PUZZLE_MIN_P 2 +#define PUZZLE_PIXEL_FUZZ_SIZE 1 +#define PUZZLE_NEIGHBORS 8 +#define PUZZLE_MIN_SIZE_FOR_CROPPING 100 +#if PUZZLE_MIN_SIZE_FOR_CROPPING < 4 +# error PUZZLE_MIN_SIZE_FOR_CROPPING +#endif +#define PUZZLE_DEFAULT_CONTRAST_BARRIER_FOR_CROPPING 0.05 +#define PUZZLE_DEFAULT_MAX_CROPPING_RATIO 0.25 +#define PUZZLE_DEFAULT_ENABLE_AUTOCROP 1 + +#define PUZZLE_VIEW_PIXEL(V, X, Y) (*((V)->map + (V)->width * (Y) + (X))) +#define PUZZLE_AVGLVL(A, X, Y) (*((A)->lvls + (A)->lambdas * (Y) + (X))) + +#define PUZZLE_CONTEXT_MAGIC 0xdeadbeef + +#ifndef MIN +# define MIN(A, B) ((A) < (B) ? (A) : (B)) +#endif +#ifndef MAX +# define MAX(A, B) ((A) > (B) ? (A) : (B)) +#endif +#define SUCC(A) ((A) + 1) +#define PRED(A) ((A) - 1) + +void puzzle_err_bug(const char * const file, const int line); + +#endif diff --git a/deduper/libpuzzle/src/regress_1.c b/deduper/libpuzzle/src/regress_1.c new file mode 100644 index 0000000..80462b8 --- /dev/null +++ b/deduper/libpuzzle/src/regress_1.c @@ -0,0 +1,32 @@ +#include "puzzle_common.h" +#include "puzzle.h" + +#define EXPECTED_RESULT 111444570 + +int main(void) +{ + PuzzleContext context; + PuzzleCvec cvec; + PuzzleCompressedCvec compressed_cvec; + unsigned int sum; + + puzzle_init_context(&context); + puzzle_init_compressed_cvec(&context, &compressed_cvec); + puzzle_init_cvec(&context, &cvec); + if (puzzle_fill_cvec_from_file(&context, &cvec, + "pics/luxmarket_tshirt01.jpg") != 0) { + fprintf(stderr, "File not found\n"); + exit(0); + } + puzzle_compress_cvec(&context, &compressed_cvec, &cvec); + puzzle_free_cvec(&context, &cvec); + puzzle_init_cvec(&context, &cvec); + puzzle_uncompress_cvec(&context, &compressed_cvec, &cvec); + puzzle_cvec_cksum(&context, &cvec, &sum); + puzzle_free_cvec(&context, &cvec); + puzzle_free_compressed_cvec(&context, &compressed_cvec); + puzzle_free_context(&context); + printf("%u %u\n", sum, (unsigned int) EXPECTED_RESULT); + + return sum != EXPECTED_RESULT; +} diff --git a/deduper/libpuzzle/src/regress_2.c b/deduper/libpuzzle/src/regress_2.c new file mode 100644 index 0000000..a37b626 --- /dev/null +++ b/deduper/libpuzzle/src/regress_2.c @@ -0,0 +1,72 @@ +#include "puzzle_common.h" +#include "puzzle.h" + +int main(void) +{ + PuzzleContext context; + PuzzleCvec cvec1, cvec2, cvec3, cvec4, cvec5, cvec6; + double d1, d2, d3, d4, d5, d6; + + puzzle_init_context(&context); + puzzle_init_cvec(&context, &cvec1); + puzzle_init_cvec(&context, &cvec2); + puzzle_init_cvec(&context, &cvec3); + puzzle_init_cvec(&context, &cvec4); + puzzle_init_cvec(&context, &cvec5); + puzzle_init_cvec(&context, &cvec6); + if (puzzle_fill_cvec_from_file + (&context, &cvec1, "pics/luxmarket_tshirt01.jpg") != 0) { + fprintf(stderr, "File 1 not found\n"); + exit(0); + } + if (puzzle_fill_cvec_from_file + (&context, &cvec2, "pics/luxmarket_tshirt01_black.jpg") != 0) { + fprintf(stderr, "File 2 not found\n"); + exit(0); + } + if (puzzle_fill_cvec_from_file + (&context, &cvec3, "pics/luxmarket_tshirt01_sal.jpg") != 0) { + fprintf(stderr, "File 3 not found\n"); + exit(0); + } + if (puzzle_fill_cvec_from_file + (&context, &cvec4, "pics/luxmarket_tshirt01_sheum.jpg") != 0) { + fprintf(stderr, "File 4 not found\n"); + exit(0); + } + if (puzzle_fill_cvec_from_file + (&context, &cvec5, "pics/duck.gif") != 0) { + fprintf(stderr, "File 5 not found\n"); + exit(0); + } + if (puzzle_fill_cvec_from_file + (&context, &cvec6, "pics/pic-a-0.jpg") != 0) { + fprintf(stderr, "File 6 not found\n"); + exit(0); + } + d1 = puzzle_vector_normalized_distance(&context, &cvec2, &cvec1, 1); + d2 = puzzle_vector_normalized_distance(&context, &cvec1, &cvec2, 1); + d3 = puzzle_vector_normalized_distance(&context, &cvec1, &cvec3, 1); + d4 = puzzle_vector_normalized_distance(&context, &cvec1, &cvec4, 1); + d5 = puzzle_vector_normalized_distance(&context, &cvec1, &cvec5, 1); + d6 = puzzle_vector_normalized_distance(&context, &cvec1, &cvec6, 1); + printf("%g %g %g %g %g %g\n", d1, d2, d3, d4, d5, d6); + puzzle_free_cvec(&context, &cvec1); + puzzle_free_cvec(&context, &cvec2); + puzzle_free_cvec(&context, &cvec3); + puzzle_free_cvec(&context, &cvec4); + puzzle_free_cvec(&context, &cvec5); + puzzle_free_cvec(&context, &cvec6); + puzzle_free_context(&context); + if ((int) (d1 * 100.0) != (int) (d2 * 100.0)) { + return 1; + } + if (d1 > PUZZLE_CVEC_SIMILARITY_THRESHOLD || + d3 > PUZZLE_CVEC_SIMILARITY_THRESHOLD || + d4 > PUZZLE_CVEC_SIMILARITY_THRESHOLD || + d5 < PUZZLE_CVEC_SIMILARITY_THRESHOLD || + d6 < PUZZLE_CVEC_SIMILARITY_THRESHOLD) { + return 2; + } + return 0; +} diff --git a/deduper/libpuzzle/src/regress_3.c b/deduper/libpuzzle/src/regress_3.c new file mode 100644 index 0000000..33698ba --- /dev/null +++ b/deduper/libpuzzle/src/regress_3.c @@ -0,0 +1,35 @@ +#include "puzzle_common.h" +#include "puzzle.h" + +#define PUZZLE_VECTOR_SLICE 0.6 + +int main(void) +{ + PuzzleContext context; + PuzzleCvec cvec1, cvec2; + double d1, d2; + + puzzle_init_context(&context); + puzzle_init_cvec(&context, &cvec1); + puzzle_init_cvec(&context, &cvec2); + if (puzzle_fill_cvec_from_file(&context, &cvec1, + "pics/pic-a-0.jpg") != 0) { + fprintf(stderr, "File 1 not found\n"); + exit(0); + } + if (puzzle_fill_cvec_from_file(&context, &cvec2, + "pics/pic-a-1.jpg") != 0) { + fprintf(stderr, "File 2 not found\n"); + exit(0); + } + d1 = puzzle_vector_normalized_distance(&context, &cvec1, &cvec2, 1); + d2 = puzzle_vector_normalized_distance(&context, &cvec1, &cvec2, 0); + printf("%g %g\n", d1, d2); + puzzle_free_cvec(&context, &cvec1); + puzzle_free_cvec(&context, &cvec2); + puzzle_free_context(&context); + if (d1 > PUZZLE_VECTOR_SLICE || d2 > PUZZLE_VECTOR_SLICE) { + return 2; + } + return 0; +} diff --git a/deduper/libpuzzle/src/tunables.c b/deduper/libpuzzle/src/tunables.c new file mode 100644 index 0000000..280dfb2 --- /dev/null +++ b/deduper/libpuzzle/src/tunables.c @@ -0,0 +1,84 @@ +#include "puzzle_common.h" +#include "puzzle_p.h" +#include "puzzle.h" +#include "globals.h" + +int puzzle_set_max_width(PuzzleContext * const context, + const unsigned int width) +{ + if (width <= 0U) { + return -1; + } + context->puzzle_max_width = width; + + return 0; +} + +int puzzle_set_max_height(PuzzleContext * const context, + const unsigned int height) +{ + if (height <= 0U) { + return -1; + } + context->puzzle_max_height = height; + + return 0; +} + +int puzzle_set_lambdas(PuzzleContext * const context, + const unsigned int lambdas) +{ + if (lambdas <= 0U) { + return -1; + } + context->puzzle_lambdas = lambdas; + + return 0; +} + +int puzzle_set_p_ratio(PuzzleContext * const context, const double p_ratio) +{ + if (p_ratio < 1.0) { + return -1; + } + context->puzzle_p_ratio = p_ratio; + + return 0; +} + +int puzzle_set_noise_cutoff(PuzzleContext * const context, + const double noise_cutoff) +{ + context->puzzle_noise_cutoff = noise_cutoff; + + return 0; +} + +int puzzle_set_contrast_barrier_for_cropping(PuzzleContext * const context, + const double barrier) +{ + if (barrier <= 0.0) { + return -1; + } + context->puzzle_contrast_barrier_for_cropping = barrier; + + return 0; +} + +int puzzle_set_max_cropping_ratio(PuzzleContext * const context, + const double ratio) +{ + if (ratio <= 0.0) { + return -1; + } + context->puzzle_max_cropping_ratio = ratio; + + return 0; +} + +int puzzle_set_autocrop(PuzzleContext * const context, const int enable) +{ + context->puzzle_enable_autocrop = (enable != 0); + + return 0; +} diff --git a/deduper/libpuzzle/src/vector_ops.c b/deduper/libpuzzle/src/vector_ops.c new file mode 100644 index 0000000..4fad5bf --- /dev/null +++ b/deduper/libpuzzle/src/vector_ops.c @@ -0,0 +1,95 @@ +#include "puzzle_common.h" +#include "puzzle_p.h" +#include "puzzle.h" +#include "globals.h" + +int puzzle_vector_sub(PuzzleContext * const context, + PuzzleCvec * const cvecr, + const PuzzleCvec * const cvec1, + const PuzzleCvec * const cvec2, + const int fix_for_texts) +{ + size_t remaining; + signed char c1, c2, cr; + + (void) context; + if (cvec1->sizeof_vec != cvec2->sizeof_vec || + cvec1->sizeof_vec <= (size_t) 0U) { + puzzle_err_bug(__FILE__, __LINE__); + } + if (cvecr->vec != NULL) { + puzzle_err_bug(__FILE__, __LINE__); + } + cvecr->sizeof_vec = cvec1->sizeof_vec; + if ((cvecr->vec = calloc(cvecr->sizeof_vec, sizeof *cvecr->vec)) == NULL) { + return -1; + } + remaining = cvec1->sizeof_vec; + if (fix_for_texts != 0) { + do { + remaining--; + c1 = cvec1->vec[remaining]; + c2 = cvec2->vec[remaining]; + if ((c1 == 0 && c2 == -2) || (c1 == -2 && c2 == 0)) { + cr = -3; + } else if ((c1 == 0 && c2 == +2) || (c1 == +2 && c2 == 0)) { + cr = +3; + } else { + cr = c1 - c2; + } + cvecr->vec[remaining] = cr; + } while (remaining > (size_t) 0U); + } else { + do { + remaining--; + cvecr->vec[remaining] = + cvec1->vec[remaining] - cvec2->vec[remaining]; + } while (remaining > (size_t) 0U); + } + return 0; +} + +double puzzle_vector_euclidean_length(PuzzleContext * const context, + const PuzzleCvec * const cvec) +{ + unsigned long t = 0U; + unsigned long c; + int c2; + size_t remaining; + + (void) context; + if ((remaining = cvec->sizeof_vec) <= (size_t) 0U) { + return 0.0; + } + do { + remaining--; + c2 = (int) cvec->vec[remaining]; + c = (unsigned long) (c2 * c2); + if (ULONG_MAX - t < c) { + puzzle_err_bug(__FILE__, __LINE__); + } + t += c; + } while (remaining > (size_t) 0U); + + return sqrt((double) t); +} + +double puzzle_vector_normalized_distance(PuzzleContext * const context, + const PuzzleCvec * const cvec1, + const PuzzleCvec * const cvec2, + const int fix_for_texts) +{ + PuzzleCvec cvecr; + double dt, dr; + + puzzle_init_cvec(context, &cvecr); + puzzle_vector_sub(context, &cvecr, cvec1, cvec2, fix_for_texts); + dt = puzzle_vector_euclidean_length(context, &cvecr); + puzzle_free_cvec(context, &cvecr); + dr = puzzle_vector_euclidean_length(context, cvec1) + + puzzle_vector_euclidean_length(context, cvec2); + if (dr == 0.0) { + return 0.0; + } + return dt / dr; +} -- cgit v1.2.3