aboutsummaryrefslogtreecommitdiff
path: root/tests/testdrive.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tests/testdrive.cpp')
-rw-r--r--tests/testdrive.cpp226
1 files changed, 226 insertions, 0 deletions
diff --git a/tests/testdrive.cpp b/tests/testdrive.cpp
new file mode 100644
index 0000000..c104e8a
--- /dev/null
+++ b/tests/testdrive.cpp
@@ -0,0 +1,226 @@
+#include "signature.hpp"
+
+#include <cstdio>
+#include <cstring>
+
+#include <filesystem>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include <thread>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include <getopt.h>
+
+#include "thread_pool.hpp"
+
+int ctr;
+int recursive;
+int njobs=1;
+double threshold=0.3;
+std::vector<std::string> paths;
+std::vector<std::string> files;
+
+int nsliceh = 3;
+int nslicev = 3;
+
+struct sig_eq
+{
+ bool operator()(const signature& a, const signature& b) const
+ {
+ //return a.distance(b) < 0.1;
+ return a == b;
+ }
+};
+
+typedef std::pair<size_t, int> slice_info;
+std::unordered_map<signature, std::vector<slice_info>, signature_hash, sig_eq> slices;
+std::vector<signature> signatures;
+std::mutex sigmtx;
+std::vector<std::pair<size_t, size_t>> out;
+
+int parse_arguments(int argc,char **argv)
+{
+ recursive=0;
+ int help=0;
+ option longopt[]=
+ {
+ {"recursive",no_argument ,&recursive,1},
+// {"destdir" ,required_argument,0 ,'D'},
+ {"jobs" ,required_argument,0 ,'j'},
+// {"threshold",required_argument,0 ,'d'},
+ {"help" ,no_argument ,&help ,1},
+ {0 ,0 ,0 ,0}
+ };
+ while(1)
+ {
+ int idx=0;
+ int c=getopt_long(argc,argv,"rhj:",longopt,&idx);
+ if(!~c)break;
+ switch(c)
+ {
+ case 0:
+ if(longopt[idx].flag)break;
+ if(std::string("jobs")==longopt[idx].name)
+ sscanf(optarg,"%d",&njobs);
+ //if(std::string("threshold")==longopt[idx].name)
+ //sscanf(optarg,"%lf",&threshold);
+ break;
+ case 'r':
+ recursive=1;
+ break;
+ case 'h':
+ help=1;
+ break;
+ case 'j':
+ sscanf(optarg,"%d",&njobs);
+ break;
+ case 'd':
+ sscanf(optarg,"%lf",&threshold);
+ break;
+ }
+ }
+ for(;optind<argc;++optind)
+ paths.push_back(argv[optind]);
+ if(help||argc<2)
+ {
+ printf(
+ "Usage: %s [OPTION] PATH...\n"
+ "Detect potentially duplicate images in PATHs and optionally perform an action on them.\n\n"
+ " -h, --help Display this help message and exit.\n"
+ " -r, --recursive Recurse into all directories.\n"
+ " -j, --jobs Number of concurrent tasks to run at once.\n"
+// " -d, --threshold Threshold distance below which images will be considered similar.\n"
+ ,argv[0]
+ );
+ return 1;
+ }
+ if(threshold>1||threshold<0)
+ {
+ puts("Invalid threshold value.");
+ return 2;
+ }
+ if(threshold<1e-6)threshold=1e-6;
+ if(!paths.size())
+ {
+ puts("Missing image path.");
+ return 2;
+ }
+ return 0;
+}
+
+void build_file_list(std::filesystem::path path,bool recursive,std::vector<std::string>&out)
+{
+ if(recursive)
+ {
+ auto dirit=std::filesystem::recursive_directory_iterator(path);
+ for(auto &p:dirit)
+ {
+ FILE* fp = fopen(p.path().c_str(),"r");
+ char c[8];
+ size_t sz = fread((void*)c,1,6,fp);
+ if (sz < 6) continue;
+ if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3))
+ out.push_back(p.path().string());
+ fclose(fp);
+ }
+ }
+ else
+ {
+ auto dirit=std::filesystem::directory_iterator(path);
+ for(auto &p:dirit)
+ {
+ FILE* fp = fopen(p.path().c_str(),"r");
+ char c[8];
+ size_t sz = fread((void*)c,1,6,fp);
+ if (sz < 6) continue;
+ if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3))
+ out.push_back(p.path().string());
+ fclose(fp);
+ }
+ }
+}
+
+void job_func(int thid, size_t id)
+{
+ cv::Mat img = cv::imread(files[id].c_str(), cv::IMREAD_UNCHANGED);
+ signature s = signature::from_cvmatrix(img);
+ int ssw = img.size().width / nsliceh;
+ int ssh = img.size().height / nslicev;
+ std::vector<signature> subsigs;
+ for (int i = 0; i < nsliceh; ++i)
+ for (int j = 0; j < nslicev; ++j)
+ {
+ int l = i * ssw;
+ int r = (i == nsliceh) ? img.size().width : (i + 1) * ssw;
+ int t = j * ssh;
+ int b = (j == nslicev) ? img.size().height : (j + 1) * ssh;
+ subsigs.push_back(std::move(signature::from_cvmatrix(img(cv::Range(t, b), cv::Range(l, r)))));
+ }
+
+ printf("%d %lu\r", thid, id);
+ fflush(stdout);
+
+ sigmtx.lock();
+ std::vector<bool> v;
+ v.resize(files.size());
+ for (int i = 0; i < nsliceh * nslicev; ++i)
+ {
+ auto it = slices.find(subsigs[i]);
+ if (it != slices.end())
+ {
+ for (auto &si : it->second)
+ {
+ if (si.second == i)
+ {
+ if (!v[si.first] && s.distance(signatures[si.first]) < threshold)
+ {
+ out.emplace_back(id, std::move(si.first));
+ }
+ v[si.first] = true;
+ }
+ }
+ it->second.emplace_back(id, i);
+ }
+ else
+ {
+ slices.emplace(std::move(subsigs[i].clone()),
+ std::vector<slice_info>{{id, i}});
+ }
+ }
+ signatures[id] = std::move(s);
+ sigmtx.unlock();
+}
+
+void run()
+{
+ thread_pool tp(njobs);
+ for(size_t i=0;i<files.size();++i)
+ {
+ tp.create_task(job_func,i);
+ }
+ tp.wait();
+}
+
+int main(int argc,char** argv)
+{
+ if(int pr=parse_arguments(argc,argv))return pr-1;
+ puts("building list of files to compare...");
+ for(auto&p:paths)
+ build_file_list(p,recursive,files);
+ printf("%lu files to compare.\n",files.size());
+ puts("computing signature vectors...");
+
+ signatures.resize(files.size());
+ run();
+ for(auto &p : out)
+ {
+ printf("%s %s %f\n", files[p.first].c_str(), files[p.second].c_str(), signatures[p.first].distance(signatures[p.second]));
+ }
+ return 0;
+}
+