00001 /* 00002 Copyright (C) 2008 Wei Dong <wdong@princeton.edu>. All Rights Reserved. 00003 00004 This file is part of LSHKIT. 00005 00006 LSHKIT is free software: you can redistribute it and/or modify 00007 it under the terms of the GNU General Public License as published by 00008 the Free Software Foundation, either version 3 of the License, or 00009 (at your option) any later version. 00010 00011 LSHKIT is distributed in the hope that it will be useful, 00012 but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 GNU General Public License for more details. 00015 00016 You should have received a copy of the GNU General Public License 00017 along with LSHKIT. If not, see <http://www.gnu.org/licenses/>. 00018 */ 00019 00020 #ifndef __LSHKIT_LSH__ 00021 #define __LSHKIT_LSH__ 00022 00034 namespace lshkit { 00035 00037 00071 template <typename DIST> 00072 class StableDistLsh 00073 { 00074 std::vector<float> a_; 00075 float b_; 00076 float W_; 00077 unsigned dim_; 00078 public: 00082 struct Parameter 00083 { 00085 unsigned dim; 00087 float W; 00088 }; 00089 00090 typedef const float *Domain; 00091 00092 StableDistLsh () 00093 { 00094 } 00095 00096 template <typename RNG> 00097 void reset(const Parameter ¶m, RNG &rng) 00098 { 00099 a_.resize(param.dim); 00100 W_ = param.W; 00101 dim_ = param.dim; 00102 00103 boost::variate_generator<RNG &, DIST> gen(rng, DIST()); 00104 00105 for (unsigned i = 0; i < dim_; ++i) a_[i] = gen(); 00106 00107 b_ = boost::variate_generator<RNG &, Uniform>(rng, Uniform(0,W_))(); 00108 } 00109 00110 template <typename RNG> 00111 StableDistLsh(const Parameter ¶m, RNG &rng) 00112 { 00113 reset(param, rng); 00114 } 00115 00116 00117 unsigned getRange () const 00118 { 00119 return 0; 00120 } 00121 00122 unsigned operator () (Domain obj) const 00123 { 00124 float ret = b_; 00125 for (unsigned i = 0; i < dim_; ++i) 00126 { 00127 ret += a_[i] * obj[i]; 00128 } 00129 return unsigned(int(std::floor(ret / W_))); 00130 } 00131 00132 unsigned operator () (Domain obj, float *delta) const 00133 { 00134 float ret = b_; 00135 for (unsigned i = 0; i < dim_; ++i) 00136 { 00137 ret += a_[i] * obj[i]; 00138 } 00139 ret /= W_; 00140 00141 float flr = std::floor(ret); 00142 *delta = ret - flr; 00143 return unsigned(int(flr)); 00144 } 00145 00146 template<class Archive> 00147 void serialize(Archive & ar, const unsigned int version) 00148 { 00149 ar & a_; 00150 ar & b_; 00151 ar & W_; 00152 ar & dim_; 00153 assert(a_.size() == dim_); 00154 } 00155 00156 }; 00157 00159 typedef StableDistLsh<Cauchy> CauchyLsh; 00161 typedef StableDistLsh<Gaussian> GaussianLsh; 00162 00164 00189 class HyperPlaneLsh 00190 { 00191 std::vector<float> a_; 00192 00193 public: 00197 struct Parameter 00198 { 00200 unsigned dim; 00201 }; 00202 typedef const float *Domain; 00203 00204 HyperPlaneLsh () 00205 { 00206 } 00207 00208 template <typename RNG> 00209 void reset(const Parameter ¶m, RNG &rng) 00210 { 00211 a_ = boost::variate_generator<RNG &, boost::uniform_on_sphere<float> > 00212 (rng, boost::uniform_on_sphere<float>(param.dim))(); 00213 } 00214 00215 template <typename RNG> 00216 HyperPlaneLsh(const Parameter ¶m, RNG &rng) 00217 { 00218 reset(param, rng); 00219 } 00220 00221 00222 unsigned getRange () const 00223 { 00224 return 2; 00225 } 00226 00227 unsigned operator () (Domain obj) const 00228 { 00229 float ret = 0; 00230 for (unsigned i = 0; i < a_.size(); ++i) 00231 { 00232 ret += a_[i] * obj[i]; 00233 } 00234 return ret >= 0 ? 1 : 0; 00235 } 00236 00237 unsigned operator () (Domain obj, float *delta) const 00238 { 00239 float ret = 0; 00240 for (unsigned i = 0; i < a_.size(); ++i) 00241 { 00242 ret += a_[i] * obj[i]; 00243 } 00244 if (ret >= 0) 00245 { 00246 *delta = ret; 00247 return 1; 00248 } 00249 else 00250 { 00251 *delta = -ret; 00252 return 0; 00253 } 00254 } 00255 00256 template<class Archive> 00257 void serialize(Archive & ar, const unsigned int version) 00258 { 00259 ar & a_; 00260 } 00261 }; 00262 00263 00265 00298 class ThresholdingLsh 00299 { 00300 unsigned dim_; 00301 float threshold_; 00302 public: 00306 struct Parameter 00307 { 00309 unsigned dim; 00311 float min; 00313 float max; 00314 }; 00315 typedef const float *Domain; 00316 00317 ThresholdingLsh () 00318 { 00319 } 00320 00321 template <typename RNG> 00322 void reset(const Parameter ¶m, RNG &rng) 00323 { 00324 dim_ = boost::variate_generator<RNG &, UniformUnsigned>(rng, UniformUnsigned(0,param.dim - 1))(); 00325 threshold_ = boost::variate_generator<RNG &, Uniform>(rng, Uniform(param.min,param.max))(); 00326 } 00327 00328 template <typename RNG> 00329 ThresholdingLsh(const Parameter ¶m, RNG &rng) 00330 { 00331 reset(param, rng); 00332 } 00333 00334 unsigned getRange () const 00335 { 00336 return 2; 00337 } 00338 00339 unsigned operator () (Domain obj) const 00340 { 00341 return obj[dim_] >= threshold_ ? 1 : 0; 00342 } 00343 00344 unsigned operator () (Domain obj, float *delta) const 00345 { 00346 float ret = obj[dim_] - threshold_; 00347 if (ret >= 0) 00348 { 00349 *delta = ret; 00350 return 1; 00351 } 00352 else 00353 { 00354 *delta = -ret; 00355 return 0; 00356 } 00357 } 00358 00359 template<class Archive> 00360 void serialize(Archive & ar, const unsigned int version) 00361 { 00362 ar & dim_; 00363 ar & threshold_; 00364 } 00365 }; 00366 00367 } 00368 00369 #endif 00370