PredictiveModel.cpp

Go to the documentation of this file.
00001 /************************************************************************
00002 * Verve                                                                 *
00003 * Copyright (C) 2004-2006                                               *
00004 * Tyler Streeter  tylerstreeter@gmail.com                               *
00005 * All rights reserved.                                                  *
00006 * Web: http://verve-agents.sourceforge.net                              *
00007 *                                                                       *
00008 * This library is free software; you can redistribute it and/or         *
00009 * modify it under the terms of EITHER:                                  *
00010 *   (1) The GNU Lesser General Public License as published by the Free  *
00011 *       Software Foundation; either version 2.1 of the License, or (at  *
00012 *       your option) any later version. The text of the GNU Lesser      *
00013 *       General Public License is included with this library in the     *
00014 *       file license-LGPL.txt.                                          *
00015 *   (2) The BSD-style license that is included with this library in     *
00016 *       the file license-BSD.txt.                                       *
00017 *                                                                       *
00018 * This library is distributed in the hope that it will be useful,       *
00019 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00020 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
00021 * license-LGPL.txt and license-BSD.txt for more details.                *
00022 ************************************************************************/
00023 
00024 #include "PredictiveModel.h"
00025 #include "RBFPopulation.h"
00026 #include "Neuron.h"
00027 
00028 namespace verve
00029 {
00030         PredictiveModel::PredictiveModel(const Observation& obs, 
00031                 bool isDynamicRBFEnabled, unsigned int numActions)
00032         {
00033                 // Setup the RBFInputData structure.  We must add an additional 
00034                 // discrete input for the action input.
00035                 unsigned int* discreteNumOptionsData = 
00036                         new unsigned int[obs.getNumDiscreteInputs() + 1];
00037                 unsigned int* discreteInputData = 
00038                         new unsigned int[obs.getNumDiscreteInputs() + 1];
00039                 for (unsigned int i = 0; i < obs.getNumDiscreteInputs(); ++i)
00040                 {
00041                         discreteNumOptionsData[i] = obs.getDiscreteInputNumOptions(i);
00042                         discreteInputData[i] = 0;
00043                 }
00044                 discreteNumOptionsData[obs.getNumDiscreteInputs()] = numActions;
00045                 discreteInputData[obs.getNumDiscreteInputs()] = 0;
00046 
00047                 mStateActionInputData.init(obs.getNumDiscreteInputs() + 1, 
00048                         discreteNumOptionsData, 
00049                         discreteInputData, 
00050                         obs.getNumContinuousInputs(), 
00051                         obs.getContinuousResolution(), 
00052                         obs.getContinuousCircularData(), 
00053                         obs.getContinuousInputData());
00054 
00055                 delete [] discreteNumOptionsData;
00056                 delete [] discreteInputData;
00057 
00058                 mDiscObsTrainingData = new real[obs.getNumDiscreteInputs()];
00059                 for (unsigned int i = 0; i < obs.getNumDiscreteInputs(); ++i)
00060                 {
00061                         mDiscObsTrainingData[i] = 0;
00062                 }
00063 
00064                 // Create the state-action representation Population.
00065                 mStateActionRepresentation = new RBFPopulation();
00066                 mStateActionRepresentation->init(mStateActionInputData, 
00067                         isDynamicRBFEnabled);
00068                 mAllPopulations.push_back(mStateActionRepresentation);
00069 
00070                 // Create the discrete Observation prediction Population.
00071                 mDiscObsPredPopulation = new Population();
00072                 mDiscObsPredPopulation->init(obs.getNumDiscreteInputs());
00073                 mAllPopulations.push_back(mDiscObsPredPopulation);
00074 
00075                 // Create the continuous Observation prediction Population.
00076                 mContObsPredPopulation = new Population();
00077                 mContObsPredPopulation->init(obs.getNumContinuousInputs());
00078                 mAllPopulations.push_back(mContObsPredPopulation);
00079 
00080                 // Create the reward prediction Population.
00081                 mRewardPredPopulation = new Population();
00082                 mRewardPredPopulation->init(1);
00083                 mAllPopulations.push_back(mRewardPredPopulation);
00084 
00085                 // Create the uncertainty prediction Population.
00086                 mUncertaintyPredPopulation = new Population();
00087                 mUncertaintyPredPopulation->init(1);
00088                 mAllPopulations.push_back(mUncertaintyPredPopulation);
00089 
00090                 // Create a Projection from the state-action representation to the 
00091                 // discrete Observation predictor.
00092                 mStateActionRepresentation->project(mDiscObsPredPopulation, 
00093                         IDEAL_NOISE, 
00094                         mStateActionRepresentation->computeMaxActivationSum());
00095 
00096                 // Create a Projection from the state-action representation to the 
00097                 // continuous Observation predictor.
00098                 mStateActionRepresentation->project(mContObsPredPopulation, 
00099                         IDEAL_NOISE, 
00100                         mStateActionRepresentation->computeMaxActivationSum());
00101 
00102                 // Create a Projection from the state-action representation to the 
00103                 // reward predictor.
00104                 mStateActionRepresentation->project(mRewardPredPopulation, 
00105                         IDEAL_NOISE, 
00106                         mStateActionRepresentation->computeMaxActivationSum());
00107 
00108                 // Create a Projection from the state-action representation to the 
00109                 // uncertainty predictor.  Make the new weights initially positive, 
00110                 // forcing the uncertainty predictions to be high at first.
00111                 mStateActionRepresentation->project(mUncertaintyPredPopulation, 
00112                         WEIGHTS_NEAR_1, 
00113                         mStateActionRepresentation->computeMaxActivationSum());
00114 
00115                 mLatestPredMSE = 0;
00116                 mDeltaLearningTimeConstant = 0;
00117                 mDeltaLearningFactor = 0;
00118         }
00119 
00120         PredictiveModel::~PredictiveModel()
00121         {
00122                 delete [] mDiscObsTrainingData;
00123 
00124                 // Destroy Populations, including the Neurons and Projections 
00125                 // contained within them.
00126                 while (!mAllPopulations.empty())
00127                 {
00128                         delete mAllPopulations.back();
00129                         mAllPopulations.pop_back();
00130                 }
00131         }
00132 
00133         void PredictiveModel::resetShortTermMemory()
00134         {
00135                 mStateActionInputData.zeroInputData();
00136 
00137                 mLatestPredMSE = 0;
00138 
00139                 unsigned int size = (unsigned int)mAllPopulations.size();
00140                 for (unsigned int i = 0; i < size; ++i)
00141                 {
00142                         mAllPopulations[i]->resetShortTermMemory();
00143                 }
00144         }
00145 
00146         void PredictiveModel::predictAndTrain(const Observation& actualPrevObs, 
00147                         unsigned int prevAction, const Observation& actualCurrentObs, 
00148                         const real actualCurrentReward, Observation& predCurrentObs, 
00149                         real& predCurrentReward, real& predUncertainty)
00150         {
00151                 // Update the predictor Populations' outputs to represent 
00152                 // the predicted current values.  Allow dynamic RBF creation 
00153                 // since we're in training.
00154                 predict(actualPrevObs, prevAction, predCurrentObs, 
00155                         predCurrentReward, predUncertainty, true);
00156 
00157                 // Now we must train the predictors using the actual current 
00158                 // values.  We need to scale each actual discrete value to 
00159                 // be within [-1, 1] first.
00160                 unsigned numDiscreteValues = 
00161                         actualCurrentObs.getNumDiscreteInputs();
00162                 for (unsigned int i = 0; i < numDiscreteValues; ++i)
00163                 {
00164                         real increment = 2 / (real)
00165                                 (actualCurrentObs.getDiscreteInputNumOptions(i) - 1);
00166                         mDiscObsTrainingData[i] = -1 + increment * 
00167                                 actualCurrentObs.getDiscreteValue(i);
00168                 }
00169                 real rewardPredMSE = mRewardPredPopulation->
00170                         trainPreDeltaRuleLinear(&actualCurrentReward, 
00171                         mDeltaLearningFactor);
00172                 real discObsPredMSE = mDiscObsPredPopulation->
00173                         trainPreDeltaRuleLinear(mDiscObsTrainingData, 
00174                         mDeltaLearningFactor);
00175                 real contObsPredMSE = mContObsPredPopulation->
00176                         trainPreDeltaRuleLinear(
00177                         actualCurrentObs.getContinuousInputData(), 
00178                         mDeltaLearningFactor);
00179 
00180                 // Here we combine the MSE from all predictions into a single MSE 
00181                 // value.
00182                 mLatestPredMSE = 
00183                         (discObsPredMSE * mDiscObsPredPopulation->getNumNeurons() + 
00184                         contObsPredMSE * mContObsPredPopulation->getNumNeurons() + 
00185                         rewardPredMSE) / (mDiscObsPredPopulation->getNumNeurons() + 
00186                         mContObsPredPopulation->getNumNeurons() + 1);
00187 
00188                 // Now that we know the actual current uncertainty, we can train 
00189                 // the uncertainty predictor.
00190                 mUncertaintyPredPopulation->trainPreDeltaRuleLinear(
00191                         &mLatestPredMSE, mDeltaLearningFactor);
00192         }
00193 
00194         void PredictiveModel::predict(const Observation& actualCurrentObs, 
00195                 unsigned int currentAction, Observation& predNextObs, 
00196                 real& predNextReward, real& predUncertainty, 
00197                 bool allowDynamicRBFCreation)
00198         {
00199                 // Convert the data for the current Observation and action into 
00200                 // a form the state-action Population can use.
00201                 unsigned int numDiscInputs = actualCurrentObs.getNumDiscreteInputs();
00202                 for (unsigned int i = 0; i < numDiscInputs; ++i)
00203                 {
00204                         // Copy the discrete Observation input data.
00205                         mStateActionInputData.discInputData[i] = 
00206                                 actualCurrentObs.getDiscreteValue(i);
00207                 }
00208 
00209                 // Copy the action.
00210                 mStateActionInputData.discInputData[numDiscInputs] = currentAction;
00211 
00212                 unsigned int numContInputs = 
00213                         actualCurrentObs.getNumContinuousInputs();
00214                 for (unsigned int i = 0; i < numContInputs; ++i)
00215                 {
00216                         mStateActionInputData.contInputData[i] = 
00217                                 actualCurrentObs.getContinuousValue(i);
00218                 }
00219 
00220                 // Update the state representation using the given current 
00221                 // Observation and action.
00222                 mStateActionRepresentation->updateFiringRatesRBF(
00223                         mStateActionInputData, allowDynamicRBFCreation);
00224 
00225                 // Update the predictor Populations.
00226                 mDiscObsPredPopulation->updateFiringRatesLinearBoundedNegOneToOne();
00227                 mContObsPredPopulation->updateFiringRatesLinearBoundedNegOneToOne();
00228                 mRewardPredPopulation->updateFiringRatesLinearBoundedNegOneToOne();
00229                 mUncertaintyPredPopulation->updateFiringRatesLinearBoundedZeroToOne();
00230 
00231                 // Fill the predicted next Observation, reward, and uncertainty data 
00232                 // to be returned.
00233                 for (unsigned int i = 0; i < numDiscInputs; ++i)
00234                 {
00235                         // We must convert the discrete value from [-1, 1] to its 
00236                         // actual range.  We must use a rounding function here to 
00237                         // ensure correct results.
00238                         unsigned int numOptions = 
00239                                 actualCurrentObs.getDiscreteInputNumOptions(i);
00240                         real increment = 2 / (real)(numOptions - 1);
00241                         int discValue = globals::roundToInt(
00242                                 (mDiscObsPredPopulation->getNeuron(i)->
00243                                 getFiringRate() + 1) / increment);
00244 
00245                         if (discValue < 0)
00246                         {
00247                                 discValue = 0;
00248                         }
00249                         else if (discValue >= (int)numOptions)
00250                         {
00251                                 discValue = numOptions - 1;
00252                         }
00253                         predNextObs.setDiscreteValue(i, (unsigned int)discValue);
00254                 }
00255 
00256                 for (unsigned int i = 0; i < numContInputs; ++i)
00257                 {
00258                         predNextObs.setContinuousValue(i, 
00259                                 mContObsPredPopulation->getNeuron(i)->getFiringRate());
00260                 }
00261 
00262                 predNextReward = 
00263                         mRewardPredPopulation->getNeuron(0)->getFiringRate();
00264                 predUncertainty = 
00265                         mUncertaintyPredPopulation->getNeuron(0)->getFiringRate();
00266         }
00267 
00268         void PredictiveModel::changeStepSize(real newValue)
00269         {
00270                 setDeltaLearningRate(mDeltaLearningTimeConstant, newValue);
00271         }
00272 
00273         void PredictiveModel::setDeltaLearningRate(real timeConstant, 
00274                 real stepSize)
00275         {
00276                 mDeltaLearningTimeConstant = timeConstant;
00277                 mDeltaLearningFactor = 1 - globals::calcDecayConstant(
00278                         mDeltaLearningTimeConstant, stepSize);
00279 
00280                 // The learning factor should be normalized as follows: 
00281                 // 
00282                 // learning factor = learning factor / # of active features
00283                 // 
00284                 // This method allows us to change the number of active features 
00285                 // in the state representation without making learning unstable.  
00286                 // Since we're using an RBF state representation, the number of 
00287                 // active features is equal to the total sum of RBF activation.
00288                 mDeltaLearningFactor = mDeltaLearningFactor / 
00289                         mStateActionRepresentation->computeMaxActivationSum();
00290         }
00291 
00292         real PredictiveModel::getPredictionMSE()
00293         {
00294                 return mLatestPredMSE;
00295         }
00296 }

Generated on Tue Jan 24 21:46:37 2006 for Verve by  doxygen 1.4.6-NO