diff options
author | Chris Xiong <chirs241097@gmail.com> | 2022-08-27 22:36:28 -0400 |
---|---|---|
committer | Chris Xiong <chirs241097@gmail.com> | 2022-08-27 22:36:28 -0400 |
commit | 137960a0261245a433f161d8fa4cbb53e4e50e6a (patch) | |
tree | b6e124f744a5db48c5741ee0663fcf90faa0eccb | |
parent | 96fc17b99d56eb636c894c5be9ab39bfdb4ba454 (diff) | |
download | deduper-137960a0261245a433f161d8fa4cbb53e4e50e6a.tar.xz |
Fix distance calculation.
-rw-r--r-- | imageutil.hpp | 10 | ||||
-rw-r--r-- | signature.cpp | 93 | ||||
-rw-r--r-- | signature.hpp | 26 | ||||
-rw-r--r-- | tests/signature_test.cpp | 4 | ||||
-rw-r--r-- | tests/testdrive.cpp | 55 |
5 files changed, 123 insertions, 65 deletions
diff --git a/imageutil.hpp b/imageutil.hpp index 438c06b..15be98b 100644 --- a/imageutil.hpp +++ b/imageutil.hpp @@ -7,6 +7,8 @@ #include "compressed_vector.hpp" +#define sqr(x) ((x) * (x)) + class image_util { public: @@ -21,7 +23,7 @@ public: double ret = 0; for (size_t i = 0; i < v.size(); ++i) { - ret += (double)(v.get(i) - center) * (v.get(i) - center); + ret += sqr(1. * v.get(i) - center); } return sqrt(ret); } @@ -35,7 +37,7 @@ public: if (abs((int)v1.get(i) - (int)v2.get(i)) == 2 && (v1.get(i) == 2 || v2.get(i) == 2)) ret += 9; else - ret += (double)(v1.get(i) - v2.get(i)) * (v1.get(i) - v2.get(i)); + ret += sqr(1. * v1.get(i) - v2.get(i)); } return sqrt(ret); } @@ -44,7 +46,7 @@ public: double ret = 0; for (size_t i = 0; i < v.size(); ++i) { - ret += (double)(v[i] - center) * (v[i] - center); + ret += sqr(1. * v[i] - center); } return sqrt(ret); } @@ -57,7 +59,7 @@ public: if (abs((int)v1[i] - (int)v2[i]) == 2 && (v1[i] == 2 || v2[i] == 2)) ret += 9; else - ret += (double)(v1[i] - v2[i]) * (v1[i] - v2[i]); + ret += sqr(1. * v1[i] - v2[i]); } return sqrt(ret); } diff --git a/signature.cpp b/signature.cpp index 21de945..cd55a77 100644 --- a/signature.cpp +++ b/signature.cpp @@ -20,17 +20,17 @@ #include "imageutil.hpp" #include "signature.hpp" -signature_config signature::cfg = +static signature_config _default_cfg = { - 9, - 3, - 2, - true, - false, - 0.5, - 1./128, - 0.05, - 0.25 + 9, //slices + 3, //blur_window + 2, //min_window + true, //crop + false, //comp + 0.5, //pr + 1./128,//noise_threshold + 0.05, //contrast_threshold + 0.25 //max_cropping }; class signature_priv @@ -42,6 +42,7 @@ private: compressed_vector<uint8_t, 3> ct; std::vector<uint8_t> uct; bool compressed; + signature_config cfg; public: float get_light_charistics_cell(int x, int y, int w, int h); void get_light_charistics(); @@ -50,6 +51,7 @@ public: double length() const; double distance(const signature_priv &o) const; bool operator==(const signature_priv &o) const; + void dump() const; friend class signature; friend struct signature_hash; }; @@ -65,12 +67,12 @@ void signature_priv::get_light_charistics() int iw, ih, slc; iw = fimg.size().width; ih = fimg.size().height; - slc = signature::cfg.slices; + slc = cfg.slices; windowx = iw / (double)slc / 2; windowy = ih / (double)slc / 2; - int windows = round(std::min(iw, ih) / slc * signature::cfg.pr); - if (windows < signature::cfg.min_window) - windows = signature::cfg.min_window; + int windows = round(std::min(iw, ih) / slc * cfg.pr); + if (windows < cfg.min_window) + windows = cfg.min_window; double ww = (iw - 1) / (slc + 1.); double wh = (ih - 1) / (slc + 1.); double wxs = 0, wys = 0; @@ -99,7 +101,7 @@ void signature_priv::get_light_variance() { const int dx[8] = {-1, -1, -1, 0, 0, 1, 1, 1}; const int dy[8] = {-1, 0, 1, -1, 1, -1, 0, 1}; - int slc = signature::cfg.slices; + int slc = cfg.slices; float *lp = lch.ptr<float>(0); for (int x = 0; x < slc; ++x) { @@ -125,7 +127,7 @@ void signature_priv::get_signature() std::vector<double> darks; for (float &l : lv) { - if (fabsf(l) > signature::cfg.noise_threshold) + if (fabsf(l) > cfg.noise_threshold) { if (l > 0) lights.push_back(l); @@ -135,12 +137,12 @@ void signature_priv::get_signature() } double lth = image_util::median(lights); double dth = image_util::median(darks); - if (signature::cfg.compress) + if (cfg.compress) { compressed = true; for (float &l : lv) { - if (fabsf(l) > signature::cfg.noise_threshold) + if (fabsf(l) > cfg.noise_threshold) { if (l > 0) ct.push_back(l > lth ? 4 : 3); @@ -155,7 +157,7 @@ void signature_priv::get_signature() compressed = false; for (float &l : lv) { - if (fabsf(l) > signature::cfg.noise_threshold) + if (fabsf(l) > cfg.noise_threshold) { if (l > 0) uct.push_back(l > lth ? 4 : 3); @@ -178,9 +180,9 @@ double signature_priv::length() const double signature_priv::distance(const signature_priv &o) const { if (compressed && o.compressed) - return image_util::distance(ct, o.ct); + return image_util::distance(ct, o.ct) / (image_util::length(ct, uint8_t(2)) + image_util::length(o.ct, uint8_t(2))); else - return image_util::distance(uct, o.uct); + return image_util::distance(uct, o.uct) / (image_util::length(uct, uint8_t(2)) + image_util::length(o.uct, uint8_t(2))); } bool signature_priv::operator==(const signature_priv &o) const @@ -191,10 +193,26 @@ bool signature_priv::operator==(const signature_priv &o) const return uct == o.uct; } +void signature_priv::dump() const +{ + if (!compressed) + for (auto &x : this->uct) + printf("%u ", x); + else + for (size_t i = 0; i < this->ct.size(); ++i) + printf("%u ", this->ct.get(i)); + printf("\n"); +} + signature::signature() = default; -signature::signature(signature_priv* _p) : p(_p) {} +signature::signature(signature_priv* _p) : p(_p){} signature::~signature() = default; +void signature::dump() const +{ + if (p) p->dump(); +} + signature signature::clone() const { return signature(*this); @@ -218,21 +236,17 @@ bool signature::operator==(const signature &o) const return *p == *o.p; } -void signature::configure(signature_config _cfg) -{signature::cfg = _cfg;} - -signature_config signature::config() -{return signature::cfg;} - -signature signature::from_preprocessed_matrix(cv::Mat m) +signature signature::from_preprocessed_matrix(cv::Mat m, const signature_config &cfg) { signature_priv *p = new signature_priv; - if (signature::cfg.crop) - p->fimg = image_util::crop(m, signature::cfg.contrast_threshold, signature::cfg.max_cropping); + p->cfg = cfg; + + if (cfg.crop) + p->fimg = image_util::crop(m, cfg.contrast_threshold, cfg.max_cropping); else p->fimg = m; - if (signature::cfg.blur_window > 1) - cv::blur(p->fimg, p->fimg, cv::Size(signature::cfg.blur_window, signature::cfg.blur_window)); + if (cfg.blur_window > 1) + cv::blur(p->fimg, p->fimg, cv::Size(cfg.blur_window, cfg.blur_window)); p->get_light_charistics(); p->get_light_variance(); p->get_signature(); @@ -242,7 +256,7 @@ signature signature::from_preprocessed_matrix(cv::Mat m) return signature(p); } -signature signature::from_cvmatrix(cv::Mat m) +signature signature::from_cvmatrix(cv::Mat m, const signature_config &cfg) { cv::Mat ma, bw; double sc = 1; @@ -258,13 +272,18 @@ signature signature::from_cvmatrix(cv::Mat m) cv::cvtColor(ma, bw, cv::COLOR_RGB2GRAY); else bw = ma; - return signature::from_preprocessed_matrix(bw); + return signature::from_preprocessed_matrix(bw, cfg); } -signature signature::from_file(const char *fn) +signature signature::from_file(const char *fn, const signature_config &cfg) { cv::Mat img = cv::imread(fn, cv::IMREAD_UNCHANGED); - return signature::from_cvmatrix(img); + return signature::from_cvmatrix(img, cfg); +} + +signature_config signature::default_cfg() +{ + return _default_cfg; } size_t signature_hash::operator()(signature const& sig) const noexcept diff --git a/signature.hpp b/signature.hpp index d9899c0..b3c5c40 100644 --- a/signature.hpp +++ b/signature.hpp @@ -22,7 +22,6 @@ class signature { private: std::shared_ptr<signature_priv> p; - static signature_config cfg; signature(signature_priv* _p); signature(const signature&)=default; signature& operator=(const signature&)=default; @@ -32,27 +31,12 @@ public: signature(signature&&)=default; signature& operator=(signature&&)=default; signature clone() const;//do not use unless absolutely needed + void dump() const; double length() const; double distance(const signature &o) const; bool operator ==(const signature &o) const; - /* - * Configure parameters for signature calculation. - * Please note: - * Comparing signatures calculated using different - * parameters gives no meaningful results. - * - * If never called, a default configuration is used. - * See signature.cpp. - */ - static void configure(signature_config _cfg); - /* - * Get current signature calculation parameters. - * If it's never set explicitly, the default configuration - * is returned. - */ - static signature_config config(); - static signature from_file(const char *fn); + static signature from_file(const char *fn, const signature_config &cfg); /* * Input will be stripped of alpha channel (by blending with white), @@ -60,7 +44,7 @@ public: * Then it will be passed to from_preprocessed_matrix. * The matrix doesn't have to be continuous. */ - static signature from_cvmatrix(cv::Mat m); + static signature from_cvmatrix(cv::Mat m, const signature_config &cfg); /* * Input must be a single channel, floating point matrix @@ -69,7 +53,9 @@ public: * STILL *Will* be cropped if config().crop == true * STILL *Will* be blurred if config().blur_window > 1 */ - static signature from_preprocessed_matrix(cv::Mat m); + static signature from_preprocessed_matrix(cv::Mat m, const signature_config &cfg); + + static signature_config default_cfg(); friend class signature_priv; friend struct signature_hash; diff --git a/tests/signature_test.cpp b/tests/signature_test.cpp index 0b6b1f9..8d44431 100644 --- a/tests/signature_test.cpp +++ b/tests/signature_test.cpp @@ -5,8 +5,8 @@ int main() { std::vector<signature> a; - a.push_back(std::move(signature::from_file("img/x.jpg"))); - a.push_back(std::move(signature::from_file("img/z.jpg"))); + a.push_back(std::move(signature::from_file("img/x.jpg", signature::default_cfg()))); + a.push_back(std::move(signature::from_file("img/z.jpg", signature::default_cfg()))); for (size_t i = 0; i < a.size(); ++i) for (size_t j = 0; j < a.size(); ++j) { diff --git a/tests/testdrive.cpp b/tests/testdrive.cpp index c104e8a..b57d792 100644 --- a/tests/testdrive.cpp +++ b/tests/testdrive.cpp @@ -18,6 +18,8 @@ #include "thread_pool.hpp" +#define DEBUG 0 + int ctr; int recursive; int njobs=1; @@ -28,6 +30,32 @@ std::vector<std::string> files; int nsliceh = 3; int nslicev = 3; +signature_config cfg_full = +{ + 9, //slices + 3, //blur_window + 2, //min_window + true, //crop + true, //comp + 0.5, //pr + 1./128,//noise_threshold + 0.05, //contrast_threshold + 0.25 //max_cropping +}; + +signature_config cfg_subslice = +{ + 4, //slices + 16, //blur_window + 2, //min_window + false, //crop + true, //comp + 0.5, //pr + 1./64, //noise_threshold + 0.05, //contrast_threshold + 0.25 //max_cropping +}; + struct sig_eq { bool operator()(const signature& a, const signature& b) const @@ -125,7 +153,12 @@ void build_file_list(std::filesystem::path path,bool recursive,std::vector<std:: size_t sz = fread((void*)c,1,6,fp); if (sz < 6) continue; if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3)) + { out.push_back(p.path().string()); +#if DEBUG > 0 + printf("%ld, %s\n", out.size() - 1, out.back().c_str()); +#endif + } fclose(fp); } } @@ -139,7 +172,12 @@ void build_file_list(std::filesystem::path path,bool recursive,std::vector<std:: size_t sz = fread((void*)c,1,6,fp); if (sz < 6) continue; if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3)) + { out.push_back(p.path().string()); +#if DEBUG > 0 + printf("%ld, %s\n", out.size() - 1, out.back().c_str()); +#endif + } fclose(fp); } } @@ -148,7 +186,10 @@ void build_file_list(std::filesystem::path path,bool recursive,std::vector<std:: void job_func(int thid, size_t id) { cv::Mat img = cv::imread(files[id].c_str(), cv::IMREAD_UNCHANGED); - signature s = signature::from_cvmatrix(img); + signature s = signature::from_cvmatrix(img, cfg_full); +#if DEBUG > 1 + s.dump(); +#endif int ssw = img.size().width / nsliceh; int ssh = img.size().height / nslicev; std::vector<signature> subsigs; @@ -159,7 +200,13 @@ void job_func(int thid, size_t id) int r = (i == nsliceh) ? img.size().width : (i + 1) * ssw; int t = j * ssh; int b = (j == nslicev) ? img.size().height : (j + 1) * ssh; - subsigs.push_back(std::move(signature::from_cvmatrix(img(cv::Range(t, b), cv::Range(l, r))))); + subsigs.push_back(std::move(signature::from_cvmatrix(img(cv::Range(t, b), cv::Range(l, r)), cfg_subslice))); +#if DEBUG > 0 + printf("%ld, (%d, %d) %lu\n", id, i, j, signature_hash{}(subsigs.back())); +#endif +#if DEBUG > 1 + subsigs.back().dump(); +#endif } printf("%d %lu\r", thid, id); @@ -177,6 +224,10 @@ void job_func(int thid, size_t id) { if (si.second == i) { +#if DEBUG > 1 + printf("%d@(%ld <-> %ld) %f\n", i, id, si.first, s.distance(signatures[si.first])); +#endif + if (!v[si.first] && s.distance(signatures[si.first]) < threshold) { out.emplace_back(id, std::move(si.first)); |