From ed47c1557915bb2472f6959e723cd76155312a98 Mon Sep 17 00:00:00 2001 From: Chris Xiong Date: Mon, 6 Apr 2020 00:50:58 +0800 Subject: Add deduper (unfinished tool for finding image duplicates). --- deduper/libpuzzle/src/puzzle-diff.c | 130 ++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 deduper/libpuzzle/src/puzzle-diff.c (limited to 'deduper/libpuzzle/src/puzzle-diff.c') diff --git a/deduper/libpuzzle/src/puzzle-diff.c b/deduper/libpuzzle/src/puzzle-diff.c new file mode 100644 index 0000000..e0f3626 --- /dev/null +++ b/deduper/libpuzzle/src/puzzle-diff.c @@ -0,0 +1,130 @@ +#include "puzzle_common.h" +#include "puzzle.h" + +typedef struct Opts_ { + const char *file1; + const char *file2; + int fix_for_texts; + int exit; + double similarity_threshold; +} Opts; + +void usage(void) +{ + puts("\nUsage: puzzle-diff [-b ] [-e] [-E similarity threshold] [-h]\n" + " [-H ] [-l ] [-n ]\n" + " [-p

] [-t] [-W ] \n\n" + "Visually compares two images and returns their distance.\n\n" + "-b \n" + "-c : disable autocrop\n" + "-C \n" + "-e : exit with 10 (images are similar) or 20 (images are not)\n" + "-E : for -e\n" + "-h : show help\n" + "-H : set max height\n" + "-l : change lambdas\n" + "-n : change noise cutoff\n" + "-p : set p ratio\n" + "-t disable fix for texts\n" + "-W : set max width\n" + "\n"); + exit(EXIT_SUCCESS); +} + +int parse_opts(Opts * const opts, PuzzleContext * context, + int argc, char * const *argv) { + int opt; + extern char *optarg; + extern int optind; + + opts->fix_for_texts = 1; + opts->exit = 0; + opts->similarity_threshold = PUZZLE_CVEC_SIMILARITY_THRESHOLD; + while ((opt = getopt(argc, argv, "b:cC:eE:hH:l:n:p:tW:")) != -1) { + switch (opt) { + case 'b': + puzzle_set_contrast_barrier_for_cropping(context, atof(optarg)); + break; + case 'c': + puzzle_set_autocrop(context, 0); + break; + case 'C': + puzzle_set_max_cropping_ratio(context, atof(optarg)); + break; + case 'e': + opts->exit = 1; + break; + case 'E': + opts->similarity_threshold = atof(optarg); + break; + case 'h': + usage(); + /* NOTREACHED */ + case 'H': + puzzle_set_max_height(context, strtoul(optarg, NULL, 10)); + break; + case 'l': + puzzle_set_lambdas(context, strtoul(optarg, NULL, 10)); + break; + case 'n': + puzzle_set_noise_cutoff(context, atof(optarg)); + break; + case 'p': + puzzle_set_p_ratio(context, atof(optarg)); + break; + case 't': + opts->fix_for_texts = 0; + break; + case 'W': + puzzle_set_max_width(context, strtoul(optarg, NULL, 10)); + break; + default: + usage(); + /* NOTREACHED */ + } + } + argc -= optind; + argv += optind; + if (argc != 2) { + usage(); + } + opts->file1 = *argv++; + opts->file2 = *argv; + + return 0; +} + +int main(int argc, char *argv[]) +{ + Opts opts; + PuzzleContext context; + PuzzleCvec cvec1, cvec2; + double d; + + puzzle_init_context(&context); + parse_opts(&opts, &context, argc, argv); + puzzle_init_cvec(&context, &cvec1); + puzzle_init_cvec(&context, &cvec2); + if (puzzle_fill_cvec_from_file(&context, &cvec1, opts.file1) != 0) { + fprintf(stderr, "Unable to read [%s]\n", opts.file1); + return 1; + } + if (puzzle_fill_cvec_from_file(&context, &cvec2, opts.file2) != 0) { + fprintf(stderr, "Unable to read [%s]\n", opts.file2); + return 1; + } + d = puzzle_vector_normalized_distance(&context, &cvec1, &cvec2, + opts.fix_for_texts); + puzzle_free_cvec(&context, &cvec1); + puzzle_free_cvec(&context, &cvec2); + puzzle_free_context(&context); + if (opts.exit == 0) { + printf("%g\n", d); + return 0; + } + if (d > opts.similarity_threshold) { + return 20; + } + return 10; +} -- cgit v1.2.3