rf_diag.cpp

00001 //
00002 //  Copyright (c) 2005-2007
00003 //  James N Knight
00004 //
00005 //  Permission to use, copy, modify, distribute and sell this software
00006 //  and its documentation for any purpose is hereby granted without fee,
00007 //  provided that the above copyright notice appear in all copies and
00008 //  that both that copyright notice and this permission notice appear
00009 //  in supporting documentation.  The authors make no representations
00010 //  about the suitability of this software for any purpose.
00011 //  It is provided "as is" without express or implied warranty.
00012 //
00013 //  
00014 // See http://homepages.inf.ed.ac.uk/svijayak/publications/vijayakumar-NeuCom2005.pdf
00015 // for the original publication of this algorithm.
00016 
00017 
00018 
00019 #include <boost/numeric/ublas/vector.hpp>
00020 #include <boost/numeric/ublas/matrix.hpp>
00021 #include <boost/numeric/ublas/matrix_expression.hpp>
00022 #include <boost/numeric/ublas/triangular.hpp>
00023 #include <boost/numeric/ublas/io.hpp>
00024 
00025 #include <algorithm>
00026 #include <iomanip>
00027 
00028 #include <math.h>
00029 
00030 #include "rf_diag.hpp"
00031 #include "my_expression.hpp"
00032 
00033 RFDiag::RFDiag()
00034 {
00035 
00036 }
00037 
00038 RFDiag::RFDiag(Types::Input c) : 
00039     RF(c),
00040     D(c.size()), 
00041     M(c.size()), 
00042     alpha(c.size()),
00043     dwdM(c.size()),
00044     dJ2dM(c.size()),
00045     dJdM(c.size()),
00046     meta_h(c.size()),
00047     b(c.size()),
00048     dJ2_2dM_2(c.size()), 
00049     dw_2dM_2(c.size()), 
00050     aux(c.size())
00051 {
00052     D.assign(d_def*ublas::scalar_vector<Types::RValue>(c.size()));
00053     M.assign(sqrt(d_def)*ublas::scalar_vector<Types::RValue>(c.size()));
00054     alpha.assign(ublas::scalar_vector<Types::RParam>(c.size(),alpha_init));
00055 
00056     meta_h.clear();
00057     b.assign(ublas::scalar_vector<Types::RParam>(c.size(),std::log(alpha_init+1e-10)));
00058 }
00059 
00060 RFDiag::RFDiag(Types::Input c, const boost::program_options::variables_map& vm) : 
00061     RF(c,vm),
00062     D(c.size()), 
00063     M(c.size()), 
00064     alpha(c.size()),
00065     dwdM(c.size()),
00066     dJ2dM(c.size()),
00067     dJdM(c.size()),
00068     meta_h(c.size()),
00069     b(c.size()),
00070     dJ2_2dM_2(c.size()), 
00071     dw_2dM_2(c.size()), 
00072     aux(c.size())
00073 {
00074     std::cerr << "Adding Receptive Field" << std::endl;
00075     D.assign(d_def*ublas::scalar_vector<Types::RValue>(c.size()));
00076     M.assign(sqrt(d_def)*ublas::scalar_vector<Types::RValue>(c.size()));
00077     alpha.assign(ublas::scalar_vector<Types::RParam>(c.size(),alpha_init));
00078 
00079     meta_h.clear();
00080     b.assign(ublas::scalar_vector<Types::RParam>(c.size(),std::log(alpha_init+1e-10)));
00081 }
00082 
00083 RFDiag::RFDiag(const RFDiag& rf, Types::Input c, const boost::program_options::variables_map& vm) : 
00084     RF(c, vm),
00085     D(c.size()), 
00086     M(c.size()), 
00087     alpha(c.size()),
00088     dwdM(c.size()),
00089     dJ2dM(c.size()),
00090     dJdM(c.size()),
00091     meta_h(c.size()),
00092     b(c.size()),
00093     dJ2_2dM_2(c.size()), 
00094     dw_2dM_2(c.size()), 
00095     aux(c.size())
00096 {
00097     alpha.assign(rf.getAlpha());
00098     D.assign(rf.getD());
00099     M.assign(rf.getM());
00100 
00101     meta_h.clear();
00102     b.assign(ublas::scalar_vector<Types::RParam>(c.size(),std::log(alpha_init+1e-10)));
00103 }
00104 
00105 Types::RValue RFDiag::learn(Types::Input x, Types::Output y, Types::RValueP w){
00106     // get the current activation
00107 //    const Types::RValue w = getActivation(x);
00108 
00109     // update some sufficient statistics
00110     const Types::RValue W_old = W;
00111     W = lambda * W + w;
00112 
00113     //update the local model
00114     localModel.learn(x,y,w,W,W_old);
00115 
00116     // stuff from local model, this relies on learn being called first, perhaps I should rework this
00117     const Types::RValue e_cv = localModel.get_e_cv();
00118     const Types::RValue e = localModel.get_e();
00119     Types::Input z = localModel.get_z();
00120 
00121 
00122     // Update the distance matrix as in Equation 3.6 and Table 4
00123     // \f$ M^{n+1} = M^n - \alpha \frac{\partial J}{\partial M} \f$
00124     
00125     // do some strange stuff to help with numerical stability
00126     Types::Input derivative = localModel.check_derivatives();
00127 
00128     if(derivative(0) != 0){    // update is ok
00129 
00130         // update squared error trackers
00131         e_2 = lambda * e_2 + w*e*e;
00132 
00133         // this seems odd but paper says update it here, before use
00134         // update a_E
00135         a_E = lambda * a_E + w*e_cv*e_cv;
00136     
00137         Types::RValue transient_multiplier = std::min(1.0, pow( e_2 / (a_E + 1e-10) , 4));
00138 
00139         // compute \f$ \sum_{i=1}^M \frac{\partial J_1}{\partial w} \f$
00140         q = element_div(element_prod(z,derivative),localModel.get_a_zz());
00141 
00142         // sufficient statistic for confidence intervals
00143         a_pk = lambda*a_pk + w*w*prec_inner_prod(q,z);
00144         
00145         const Types::RValue dJ1dw = (e_cv*e_cv)/W - 2*e/W * prec_inner_prod(q,a_H) - 2/W*prec_inner_prod(element_prod(q,q),a_G) - a_E/(W*W);
00146 
00147         temp2 = x - center;
00148 
00149         // code for diagonal M
00150         for(int k=0; k<dwdM.size(); ++k){
00151           const Types::RValue dDdM = 2*M(k);
00152           dwdM(k) = -0.5*w*temp2(k)*temp2(k)*dDdM;
00153           dJ2dM(k) = 2 * gamma / center.size() * D(k) * dDdM;
00154 
00155             // declare dJ2_2dM_2 and dw_2dM_2
00156             if(meta){
00157                 const Types::RValue dDdM_2 = dDdM*dDdM;
00158                 dJ2_2dM_2(k) = 2*gamma/center.size()*(2*D(k)+dDdM_2);
00159                 dw_2dM_2(k) = 2*dwdM(k)*dwdM(k)/w - w*temp2(k)*temp2(k);
00160             }
00161         }
00162 
00163         dJdM = dJ1dw * dwdM + (w/W)*dJ2dM;
00164     
00165         // compute h
00166         const Types::RValue h = w * prec_inner_prod(z,q);
00167 
00168         // declare (dbouel)dJ1_2dw_2, (vector)dJ_2dM_2, vector)aux, (vector)b, (vector) meta_h,
00169         // exp(matrix) function
00170         if(meta){
00171             const Types::RValue dJ1_2dw_2 = -(e_cv*e_cv)/(W*W) 
00172                           + 2*e*e*h/(W*w) 
00173                                     + a_E/(W*W*W) 
00174                                     - 2/W*((-(e/W) - 2*e*prec_inner_prod(q,z))*prec_inner_prod(q,a_H))
00175                                     - 1/(W*W)*( (e_cv*e_cv) - 2*e*prec_inner_prod(q,a_H) );
00176 
00177             dJ_2dM_2 = (dw_2dM_2*dJ1dw + element_prod(dwdM,dwdM)*dJ1_2dw_2) + w/W*dJ2_2dM_2;
00178 
00179             aux = meta_alpha * transient_multiplier * element_prod(dJdM,meta_h);
00180 
00181             for(int i=0; i<aux.size(); ++i)
00182                 if(fabs(aux(i)) > 0.1)
00183                     aux(i) = 0.1 * ((aux(i)>0)?1:-1);
00184 
00185             b -= aux;
00186 
00187             for(int i=0; i<b.size(); ++i)
00188                 if(fabs(b(i)) > 10)
00189                     b(i) = 10 * ((b(i)>0)?1:-1);
00190 
00191             // probably need to define this
00192 //            alpha = exp(b);
00193 //            alpha = ublas::apply_to_all<functor::exp<Types::RValue> >( b );
00194             alpha = (ublas::apply_to_all<ublas::scalar_exp<Types::RValue> >( b ));
00195 
00196             aux = ublas::scalar_vector<Types::RValue>(alpha.size(),1.0) - (element_prod(alpha,dJ_2dM_2)*transient_multiplier);
00197 
00198             for(int i=0; i<aux.size(); ++i)
00199                 if(aux(i) < 0.0)
00200                     aux(i) = 0;
00201 
00202             meta_h = element_prod(meta_h,aux) - element_prod(alpha,dJdM)*transient_multiplier;
00203 
00204         }
00205 
00206         // update a_H, a_G
00207         H_temp =  lambda * a_H + (w*e_cv*z*transient_multiplier)/(1-h);
00208         a_H = element_prod(derivative, H_temp) + element_prod(ublas::scalar_vector<Types::RValue>(derivative.size(),1.0)-derivative,a_H);
00209         H_temp = lambda * a_G + (w*w*(e_cv*e_cv)*element_prod(z,z)*transient_multiplier)/(1-h);
00210         a_G = element_prod(derivative, H_temp) + element_prod(ublas::scalar_vector<Types::RValue>(derivative.size(),1.0)-derivative,a_G);
00211 
00212         Types::RValue maxM = 0.0;
00213         for(int i=0; i<M.size(); ++i)
00214             if(fabs(M(i)) > maxM)
00215                 maxM = fabs(M(i));
00216 
00217         for(int i=0; i<M.size(); ++i)
00218             if(alpha(i) * dJdM(i) * transient_multiplier > .1*maxM)
00219                 alpha(i) /= 2;
00220 
00221         // code for diagonal M, also need to change getActivation()
00222         M -= element_prod(alpha,dJdM) * transient_multiplier;
00223         D = element_prod(M,M);
00224 
00225     }
00226 
00227 
00228     // Check if projection needs to be added, update local ss if projection added
00229     if(localModel.updateNumProjections()){
00230         int R = a_H.size()+1;
00231         a_H.resize(R);
00232         a_H(R-1) = 0;
00233         a_G.resize(R);
00234         a_G(R-1) = 0;
00235         H_temp.resize(R);
00236     }
00237 
00238 }
00239 
00240 std::ostream& operator<<(std::ostream& out, const RFDiag& rf)
00241 {
00242     for(int i=0; i<rf.center.size(); ++i)
00243         out << rf.center(i) << " ";
00244 
00245     for(int i=0; i<rf.D.size(); ++i)
00246         for(int j=0; j<rf.D.size(); ++j)
00247             if(i==j)
00248                 out << rf.D(0) << " ";
00249             else 
00250                 out << 0.0 << " ";
00251     out << std::endl;
00252 
00253 }

Generated on Fri Jul 27 00:24:01 2007 for LWPR by  doxygen 1.5.1