From ed47c1557915bb2472f6959e723cd76155312a98 Mon Sep 17 00:00:00 2001 From: Chris Xiong Date: Mon, 6 Apr 2020 00:50:58 +0800 Subject: Add deduper (unfinished tool for finding image duplicates). --- deduper/libpuzzle/src/vector_ops.c | 95 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 deduper/libpuzzle/src/vector_ops.c (limited to 'deduper/libpuzzle/src/vector_ops.c') diff --git a/deduper/libpuzzle/src/vector_ops.c b/deduper/libpuzzle/src/vector_ops.c new file mode 100644 index 0000000..4fad5bf --- /dev/null +++ b/deduper/libpuzzle/src/vector_ops.c @@ -0,0 +1,95 @@ +#include "puzzle_common.h" +#include "puzzle_p.h" +#include "puzzle.h" +#include "globals.h" + +int puzzle_vector_sub(PuzzleContext * const context, + PuzzleCvec * const cvecr, + const PuzzleCvec * const cvec1, + const PuzzleCvec * const cvec2, + const int fix_for_texts) +{ + size_t remaining; + signed char c1, c2, cr; + + (void) context; + if (cvec1->sizeof_vec != cvec2->sizeof_vec || + cvec1->sizeof_vec <= (size_t) 0U) { + puzzle_err_bug(__FILE__, __LINE__); + } + if (cvecr->vec != NULL) { + puzzle_err_bug(__FILE__, __LINE__); + } + cvecr->sizeof_vec = cvec1->sizeof_vec; + if ((cvecr->vec = calloc(cvecr->sizeof_vec, sizeof *cvecr->vec)) == NULL) { + return -1; + } + remaining = cvec1->sizeof_vec; + if (fix_for_texts != 0) { + do { + remaining--; + c1 = cvec1->vec[remaining]; + c2 = cvec2->vec[remaining]; + if ((c1 == 0 && c2 == -2) || (c1 == -2 && c2 == 0)) { + cr = -3; + } else if ((c1 == 0 && c2 == +2) || (c1 == +2 && c2 == 0)) { + cr = +3; + } else { + cr = c1 - c2; + } + cvecr->vec[remaining] = cr; + } while (remaining > (size_t) 0U); + } else { + do { + remaining--; + cvecr->vec[remaining] = + cvec1->vec[remaining] - cvec2->vec[remaining]; + } while (remaining > (size_t) 0U); + } + return 0; +} + +double puzzle_vector_euclidean_length(PuzzleContext * const context, + const PuzzleCvec * const cvec) +{ + unsigned long t = 0U; + unsigned long c; + int c2; + size_t remaining; + + (void) context; + if ((remaining = cvec->sizeof_vec) <= (size_t) 0U) { + return 0.0; + } + do { + remaining--; + c2 = (int) cvec->vec[remaining]; + c = (unsigned long) (c2 * c2); + if (ULONG_MAX - t < c) { + puzzle_err_bug(__FILE__, __LINE__); + } + t += c; + } while (remaining > (size_t) 0U); + + return sqrt((double) t); +} + +double puzzle_vector_normalized_distance(PuzzleContext * const context, + const PuzzleCvec * const cvec1, + const PuzzleCvec * const cvec2, + const int fix_for_texts) +{ + PuzzleCvec cvecr; + double dt, dr; + + puzzle_init_cvec(context, &cvecr); + puzzle_vector_sub(context, &cvecr, cvec1, cvec2, fix_for_texts); + dt = puzzle_vector_euclidean_length(context, &cvecr); + puzzle_free_cvec(context, &cvecr); + dr = puzzle_vector_euclidean_length(context, cvec1) + + puzzle_vector_euclidean_length(context, cvec2); + if (dr == 0.0) { + return 0.0; + } + return dt / dr; +} -- cgit v1.2.3