#include "signature.hpp" #include #include #include #include #include #include #include #include #include #include #include #include #include "thread_pool.hpp" #define DEBUG 0 int ctr; int recursive; int njobs=1; double threshold=0.3; std::vector paths; std::vector files; int nsliceh = 3; int nslicev = 3; signature_config cfg_full = { 9, //slices 3, //blur_window 2, //min_window true, //crop true, //comp 0.5, //pr 1./128,//noise_threshold 0.05, //contrast_threshold 0.25 //max_cropping }; signature_config cfg_subslice = { 4, //slices 16, //blur_window 2, //min_window false, //crop true, //comp 0.5, //pr 1./64, //noise_threshold 0.05, //contrast_threshold 0.25 //max_cropping }; struct sig_eq { bool operator()(const signature& a, const signature& b) const { //return a.distance(b) < 0.1; return a == b; } }; typedef std::pair slice_info; std::unordered_map, signature_hash, sig_eq> slices; std::vector signatures; std::mutex sigmtx; std::vector> out; int parse_arguments(int argc,char **argv) { recursive=0; int help=0; option longopt[]= { {"recursive",no_argument ,&recursive,1}, // {"destdir" ,required_argument,0 ,'D'}, {"jobs" ,required_argument,0 ,'j'}, // {"threshold",required_argument,0 ,'d'}, {"help" ,no_argument ,&help ,1}, {0 ,0 ,0 ,0} }; while(1) { int idx=0; int c=getopt_long(argc,argv,"rhj:",longopt,&idx); if(!~c)break; switch(c) { case 0: if(longopt[idx].flag)break; if(std::string("jobs")==longopt[idx].name) sscanf(optarg,"%d",&njobs); //if(std::string("threshold")==longopt[idx].name) //sscanf(optarg,"%lf",&threshold); break; case 'r': recursive=1; break; case 'h': help=1; break; case 'j': sscanf(optarg,"%d",&njobs); break; case 'd': sscanf(optarg,"%lf",&threshold); break; } } for(;optind1||threshold<0) { puts("Invalid threshold value."); return 2; } if(threshold<1e-6)threshold=1e-6; if(!paths.size()) { puts("Missing image path."); return 2; } return 0; } void build_file_list(std::filesystem::path path,bool recursive,std::vector&out) { if(recursive) { auto dirit=std::filesystem::recursive_directory_iterator(path); for(auto &p:dirit) { FILE* fp = fopen(p.path().c_str(),"r"); char c[8]; size_t sz = fread((void*)c,1,6,fp); if (sz < 6) continue; if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3)) { out.push_back(p.path().string()); #if DEBUG > 0 printf("%ld, %s\n", out.size() - 1, out.back().c_str()); #endif } fclose(fp); } } else { auto dirit=std::filesystem::directory_iterator(path); for(auto &p:dirit) { FILE* fp = fopen(p.path().c_str(),"r"); char c[8]; size_t sz = fread((void*)c,1,6,fp); if (sz < 6) continue; if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3)) { out.push_back(p.path().string()); #if DEBUG > 0 printf("%ld, %s\n", out.size() - 1, out.back().c_str()); #endif } fclose(fp); } } } void job_func(int thid, size_t id) { cv::Mat img = cv::imread(files[id].c_str(), cv::IMREAD_UNCHANGED); signature s = signature::from_cvmatrix(img, cfg_full); #if DEBUG > 1 s.dump(); #endif int ssw = img.size().width / nsliceh; int ssh = img.size().height / nslicev; std::vector subsigs; for (int i = 0; i < nsliceh; ++i) for (int j = 0; j < nslicev; ++j) { int l = i * ssw; int r = (i == nsliceh) ? img.size().width : (i + 1) * ssw; int t = j * ssh; int b = (j == nslicev) ? img.size().height : (j + 1) * ssh; subsigs.push_back(std::move(signature::from_cvmatrix(img(cv::Range(t, b), cv::Range(l, r)), cfg_subslice))); #if DEBUG > 0 printf("%ld, (%d, %d) %lu\n", id, i, j, signature_hash{}(subsigs.back())); #endif #if DEBUG > 1 subsigs.back().dump(); #endif } printf("%d %lu\r", thid, id); fflush(stdout); sigmtx.lock(); std::vector v; v.resize(files.size()); for (int i = 0; i < nsliceh * nslicev; ++i) { auto it = slices.find(subsigs[i]); if (it != slices.end()) { for (auto &si : it->second) { if (si.second == i) { #if DEBUG > 1 printf("%d@(%ld <-> %ld) %f\n", i, id, si.first, s.distance(signatures[si.first])); #endif if (!v[si.first] && s.distance(signatures[si.first]) < threshold) { out.emplace_back(id, std::move(si.first)); } v[si.first] = true; } } it->second.emplace_back(id, i); } else { slices.emplace(std::move(subsigs[i].clone()), std::vector{{id, i}}); } } signatures[id] = std::move(s); sigmtx.unlock(); } void run() { thread_pool tp(njobs); for(size_t i=0;i