aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Chris Xiong <chirs241097@gmail.com> 2022-08-27 22:36:28 -0400
committerGravatar Chris Xiong <chirs241097@gmail.com> 2022-08-27 22:36:28 -0400
commit137960a0261245a433f161d8fa4cbb53e4e50e6a (patch)
treeb6e124f744a5db48c5741ee0663fcf90faa0eccb
parent96fc17b99d56eb636c894c5be9ab39bfdb4ba454 (diff)
downloaddeduper-137960a0261245a433f161d8fa4cbb53e4e50e6a.tar.xz
Fix distance calculation.
-rw-r--r--imageutil.hpp10
-rw-r--r--signature.cpp93
-rw-r--r--signature.hpp26
-rw-r--r--tests/signature_test.cpp4
-rw-r--r--tests/testdrive.cpp55
5 files changed, 123 insertions, 65 deletions
diff --git a/imageutil.hpp b/imageutil.hpp
index 438c06b..15be98b 100644
--- a/imageutil.hpp
+++ b/imageutil.hpp
@@ -7,6 +7,8 @@
#include "compressed_vector.hpp"
+#define sqr(x) ((x) * (x))
+
class image_util
{
public:
@@ -21,7 +23,7 @@ public:
double ret = 0;
for (size_t i = 0; i < v.size(); ++i)
{
- ret += (double)(v.get(i) - center) * (v.get(i) - center);
+ ret += sqr(1. * v.get(i) - center);
}
return sqrt(ret);
}
@@ -35,7 +37,7 @@ public:
if (abs((int)v1.get(i) - (int)v2.get(i)) == 2 && (v1.get(i) == 2 || v2.get(i) == 2))
ret += 9;
else
- ret += (double)(v1.get(i) - v2.get(i)) * (v1.get(i) - v2.get(i));
+ ret += sqr(1. * v1.get(i) - v2.get(i));
}
return sqrt(ret);
}
@@ -44,7 +46,7 @@ public:
double ret = 0;
for (size_t i = 0; i < v.size(); ++i)
{
- ret += (double)(v[i] - center) * (v[i] - center);
+ ret += sqr(1. * v[i] - center);
}
return sqrt(ret);
}
@@ -57,7 +59,7 @@ public:
if (abs((int)v1[i] - (int)v2[i]) == 2 && (v1[i] == 2 || v2[i] == 2))
ret += 9;
else
- ret += (double)(v1[i] - v2[i]) * (v1[i] - v2[i]);
+ ret += sqr(1. * v1[i] - v2[i]);
}
return sqrt(ret);
}
diff --git a/signature.cpp b/signature.cpp
index 21de945..cd55a77 100644
--- a/signature.cpp
+++ b/signature.cpp
@@ -20,17 +20,17 @@
#include "imageutil.hpp"
#include "signature.hpp"
-signature_config signature::cfg =
+static signature_config _default_cfg =
{
- 9,
- 3,
- 2,
- true,
- false,
- 0.5,
- 1./128,
- 0.05,
- 0.25
+ 9, //slices
+ 3, //blur_window
+ 2, //min_window
+ true, //crop
+ false, //comp
+ 0.5, //pr
+ 1./128,//noise_threshold
+ 0.05, //contrast_threshold
+ 0.25 //max_cropping
};
class signature_priv
@@ -42,6 +42,7 @@ private:
compressed_vector<uint8_t, 3> ct;
std::vector<uint8_t> uct;
bool compressed;
+ signature_config cfg;
public:
float get_light_charistics_cell(int x, int y, int w, int h);
void get_light_charistics();
@@ -50,6 +51,7 @@ public:
double length() const;
double distance(const signature_priv &o) const;
bool operator==(const signature_priv &o) const;
+ void dump() const;
friend class signature;
friend struct signature_hash;
};
@@ -65,12 +67,12 @@ void signature_priv::get_light_charistics()
int iw, ih, slc;
iw = fimg.size().width;
ih = fimg.size().height;
- slc = signature::cfg.slices;
+ slc = cfg.slices;
windowx = iw / (double)slc / 2;
windowy = ih / (double)slc / 2;
- int windows = round(std::min(iw, ih) / slc * signature::cfg.pr);
- if (windows < signature::cfg.min_window)
- windows = signature::cfg.min_window;
+ int windows = round(std::min(iw, ih) / slc * cfg.pr);
+ if (windows < cfg.min_window)
+ windows = cfg.min_window;
double ww = (iw - 1) / (slc + 1.);
double wh = (ih - 1) / (slc + 1.);
double wxs = 0, wys = 0;
@@ -99,7 +101,7 @@ void signature_priv::get_light_variance()
{
const int dx[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
const int dy[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
- int slc = signature::cfg.slices;
+ int slc = cfg.slices;
float *lp = lch.ptr<float>(0);
for (int x = 0; x < slc; ++x)
{
@@ -125,7 +127,7 @@ void signature_priv::get_signature()
std::vector<double> darks;
for (float &l : lv)
{
- if (fabsf(l) > signature::cfg.noise_threshold)
+ if (fabsf(l) > cfg.noise_threshold)
{
if (l > 0)
lights.push_back(l);
@@ -135,12 +137,12 @@ void signature_priv::get_signature()
}
double lth = image_util::median(lights);
double dth = image_util::median(darks);
- if (signature::cfg.compress)
+ if (cfg.compress)
{
compressed = true;
for (float &l : lv)
{
- if (fabsf(l) > signature::cfg.noise_threshold)
+ if (fabsf(l) > cfg.noise_threshold)
{
if (l > 0)
ct.push_back(l > lth ? 4 : 3);
@@ -155,7 +157,7 @@ void signature_priv::get_signature()
compressed = false;
for (float &l : lv)
{
- if (fabsf(l) > signature::cfg.noise_threshold)
+ if (fabsf(l) > cfg.noise_threshold)
{
if (l > 0)
uct.push_back(l > lth ? 4 : 3);
@@ -178,9 +180,9 @@ double signature_priv::length() const
double signature_priv::distance(const signature_priv &o) const
{
if (compressed && o.compressed)
- return image_util::distance(ct, o.ct);
+ return image_util::distance(ct, o.ct) / (image_util::length(ct, uint8_t(2)) + image_util::length(o.ct, uint8_t(2)));
else
- return image_util::distance(uct, o.uct);
+ return image_util::distance(uct, o.uct) / (image_util::length(uct, uint8_t(2)) + image_util::length(o.uct, uint8_t(2)));
}
bool signature_priv::operator==(const signature_priv &o) const
@@ -191,10 +193,26 @@ bool signature_priv::operator==(const signature_priv &o) const
return uct == o.uct;
}
+void signature_priv::dump() const
+{
+ if (!compressed)
+ for (auto &x : this->uct)
+ printf("%u ", x);
+ else
+ for (size_t i = 0; i < this->ct.size(); ++i)
+ printf("%u ", this->ct.get(i));
+ printf("\n");
+}
+
signature::signature() = default;
-signature::signature(signature_priv* _p) : p(_p) {}
+signature::signature(signature_priv* _p) : p(_p){}
signature::~signature() = default;
+void signature::dump() const
+{
+ if (p) p->dump();
+}
+
signature signature::clone() const
{
return signature(*this);
@@ -218,21 +236,17 @@ bool signature::operator==(const signature &o) const
return *p == *o.p;
}
-void signature::configure(signature_config _cfg)
-{signature::cfg = _cfg;}
-
-signature_config signature::config()
-{return signature::cfg;}
-
-signature signature::from_preprocessed_matrix(cv::Mat m)
+signature signature::from_preprocessed_matrix(cv::Mat m, const signature_config &cfg)
{
signature_priv *p = new signature_priv;
- if (signature::cfg.crop)
- p->fimg = image_util::crop(m, signature::cfg.contrast_threshold, signature::cfg.max_cropping);
+ p->cfg = cfg;
+
+ if (cfg.crop)
+ p->fimg = image_util::crop(m, cfg.contrast_threshold, cfg.max_cropping);
else
p->fimg = m;
- if (signature::cfg.blur_window > 1)
- cv::blur(p->fimg, p->fimg, cv::Size(signature::cfg.blur_window, signature::cfg.blur_window));
+ if (cfg.blur_window > 1)
+ cv::blur(p->fimg, p->fimg, cv::Size(cfg.blur_window, cfg.blur_window));
p->get_light_charistics();
p->get_light_variance();
p->get_signature();
@@ -242,7 +256,7 @@ signature signature::from_preprocessed_matrix(cv::Mat m)
return signature(p);
}
-signature signature::from_cvmatrix(cv::Mat m)
+signature signature::from_cvmatrix(cv::Mat m, const signature_config &cfg)
{
cv::Mat ma, bw;
double sc = 1;
@@ -258,13 +272,18 @@ signature signature::from_cvmatrix(cv::Mat m)
cv::cvtColor(ma, bw, cv::COLOR_RGB2GRAY);
else
bw = ma;
- return signature::from_preprocessed_matrix(bw);
+ return signature::from_preprocessed_matrix(bw, cfg);
}
-signature signature::from_file(const char *fn)
+signature signature::from_file(const char *fn, const signature_config &cfg)
{
cv::Mat img = cv::imread(fn, cv::IMREAD_UNCHANGED);
- return signature::from_cvmatrix(img);
+ return signature::from_cvmatrix(img, cfg);
+}
+
+signature_config signature::default_cfg()
+{
+ return _default_cfg;
}
size_t signature_hash::operator()(signature const& sig) const noexcept
diff --git a/signature.hpp b/signature.hpp
index d9899c0..b3c5c40 100644
--- a/signature.hpp
+++ b/signature.hpp
@@ -22,7 +22,6 @@ class signature
{
private:
std::shared_ptr<signature_priv> p;
- static signature_config cfg;
signature(signature_priv* _p);
signature(const signature&)=default;
signature& operator=(const signature&)=default;
@@ -32,27 +31,12 @@ public:
signature(signature&&)=default;
signature& operator=(signature&&)=default;
signature clone() const;//do not use unless absolutely needed
+ void dump() const;
double length() const;
double distance(const signature &o) const;
bool operator ==(const signature &o) const;
- /*
- * Configure parameters for signature calculation.
- * Please note:
- * Comparing signatures calculated using different
- * parameters gives no meaningful results.
- *
- * If never called, a default configuration is used.
- * See signature.cpp.
- */
- static void configure(signature_config _cfg);
- /*
- * Get current signature calculation parameters.
- * If it's never set explicitly, the default configuration
- * is returned.
- */
- static signature_config config();
- static signature from_file(const char *fn);
+ static signature from_file(const char *fn, const signature_config &cfg);
/*
* Input will be stripped of alpha channel (by blending with white),
@@ -60,7 +44,7 @@ public:
* Then it will be passed to from_preprocessed_matrix.
* The matrix doesn't have to be continuous.
*/
- static signature from_cvmatrix(cv::Mat m);
+ static signature from_cvmatrix(cv::Mat m, const signature_config &cfg);
/*
* Input must be a single channel, floating point matrix
@@ -69,7 +53,9 @@ public:
* STILL *Will* be cropped if config().crop == true
* STILL *Will* be blurred if config().blur_window > 1
*/
- static signature from_preprocessed_matrix(cv::Mat m);
+ static signature from_preprocessed_matrix(cv::Mat m, const signature_config &cfg);
+
+ static signature_config default_cfg();
friend class signature_priv;
friend struct signature_hash;
diff --git a/tests/signature_test.cpp b/tests/signature_test.cpp
index 0b6b1f9..8d44431 100644
--- a/tests/signature_test.cpp
+++ b/tests/signature_test.cpp
@@ -5,8 +5,8 @@
int main()
{
std::vector<signature> a;
- a.push_back(std::move(signature::from_file("img/x.jpg")));
- a.push_back(std::move(signature::from_file("img/z.jpg")));
+ a.push_back(std::move(signature::from_file("img/x.jpg", signature::default_cfg())));
+ a.push_back(std::move(signature::from_file("img/z.jpg", signature::default_cfg())));
for (size_t i = 0; i < a.size(); ++i)
for (size_t j = 0; j < a.size(); ++j)
{
diff --git a/tests/testdrive.cpp b/tests/testdrive.cpp
index c104e8a..b57d792 100644
--- a/tests/testdrive.cpp
+++ b/tests/testdrive.cpp
@@ -18,6 +18,8 @@
#include "thread_pool.hpp"
+#define DEBUG 0
+
int ctr;
int recursive;
int njobs=1;
@@ -28,6 +30,32 @@ std::vector<std::string> files;
int nsliceh = 3;
int nslicev = 3;
+signature_config cfg_full =
+{
+ 9, //slices
+ 3, //blur_window
+ 2, //min_window
+ true, //crop
+ true, //comp
+ 0.5, //pr
+ 1./128,//noise_threshold
+ 0.05, //contrast_threshold
+ 0.25 //max_cropping
+};
+
+signature_config cfg_subslice =
+{
+ 4, //slices
+ 16, //blur_window
+ 2, //min_window
+ false, //crop
+ true, //comp
+ 0.5, //pr
+ 1./64, //noise_threshold
+ 0.05, //contrast_threshold
+ 0.25 //max_cropping
+};
+
struct sig_eq
{
bool operator()(const signature& a, const signature& b) const
@@ -125,7 +153,12 @@ void build_file_list(std::filesystem::path path,bool recursive,std::vector<std::
size_t sz = fread((void*)c,1,6,fp);
if (sz < 6) continue;
if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3))
+ {
out.push_back(p.path().string());
+#if DEBUG > 0
+ printf("%ld, %s\n", out.size() - 1, out.back().c_str());
+#endif
+ }
fclose(fp);
}
}
@@ -139,7 +172,12 @@ void build_file_list(std::filesystem::path path,bool recursive,std::vector<std::
size_t sz = fread((void*)c,1,6,fp);
if (sz < 6) continue;
if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3))
+ {
out.push_back(p.path().string());
+#if DEBUG > 0
+ printf("%ld, %s\n", out.size() - 1, out.back().c_str());
+#endif
+ }
fclose(fp);
}
}
@@ -148,7 +186,10 @@ void build_file_list(std::filesystem::path path,bool recursive,std::vector<std::
void job_func(int thid, size_t id)
{
cv::Mat img = cv::imread(files[id].c_str(), cv::IMREAD_UNCHANGED);
- signature s = signature::from_cvmatrix(img);
+ signature s = signature::from_cvmatrix(img, cfg_full);
+#if DEBUG > 1
+ s.dump();
+#endif
int ssw = img.size().width / nsliceh;
int ssh = img.size().height / nslicev;
std::vector<signature> subsigs;
@@ -159,7 +200,13 @@ void job_func(int thid, size_t id)
int r = (i == nsliceh) ? img.size().width : (i + 1) * ssw;
int t = j * ssh;
int b = (j == nslicev) ? img.size().height : (j + 1) * ssh;
- subsigs.push_back(std::move(signature::from_cvmatrix(img(cv::Range(t, b), cv::Range(l, r)))));
+ subsigs.push_back(std::move(signature::from_cvmatrix(img(cv::Range(t, b), cv::Range(l, r)), cfg_subslice)));
+#if DEBUG > 0
+ printf("%ld, (%d, %d) %lu\n", id, i, j, signature_hash{}(subsigs.back()));
+#endif
+#if DEBUG > 1
+ subsigs.back().dump();
+#endif
}
printf("%d %lu\r", thid, id);
@@ -177,6 +224,10 @@ void job_func(int thid, size_t id)
{
if (si.second == i)
{
+#if DEBUG > 1
+ printf("%d@(%ld <-> %ld) %f\n", i, id, si.first, s.distance(signatures[si.first]));
+#endif
+
if (!v[si.first] && s.distance(signatures[si.first]) < threshold)
{
out.emplace_back(id, std::move(si.first));