00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "PredictiveModel.h"
00025 #include "RBFPopulation.h"
00026 #include "Neuron.h"
00027
00028 namespace verve
00029 {
00030 PredictiveModel::PredictiveModel(const Observation& obs,
00031 bool isDynamicRBFEnabled, unsigned int numActions)
00032 {
00033
00034
00035 unsigned int* discreteNumOptionsData =
00036 new unsigned int[obs.getNumDiscreteInputs() + 1];
00037 unsigned int* discreteInputData =
00038 new unsigned int[obs.getNumDiscreteInputs() + 1];
00039 for (unsigned int i = 0; i < obs.getNumDiscreteInputs(); ++i)
00040 {
00041 discreteNumOptionsData[i] = obs.getDiscreteInputNumOptions(i);
00042 discreteInputData[i] = 0;
00043 }
00044 discreteNumOptionsData[obs.getNumDiscreteInputs()] = numActions;
00045 discreteInputData[obs.getNumDiscreteInputs()] = 0;
00046
00047 mStateActionInputData.init(obs.getNumDiscreteInputs() + 1,
00048 discreteNumOptionsData,
00049 discreteInputData,
00050 obs.getNumContinuousInputs(),
00051 obs.getContinuousResolution(),
00052 obs.getContinuousCircularData(),
00053 obs.getContinuousInputData());
00054
00055 delete [] discreteNumOptionsData;
00056 delete [] discreteInputData;
00057
00058 mDiscObsTrainingData = new real[obs.getNumDiscreteInputs()];
00059 for (unsigned int i = 0; i < obs.getNumDiscreteInputs(); ++i)
00060 {
00061 mDiscObsTrainingData[i] = 0;
00062 }
00063
00064
00065 mStateActionRepresentation = new RBFPopulation();
00066 mStateActionRepresentation->init(mStateActionInputData,
00067 isDynamicRBFEnabled);
00068 mAllPopulations.push_back(mStateActionRepresentation);
00069
00070
00071 mDiscObsPredPopulation = new Population();
00072 mDiscObsPredPopulation->init(obs.getNumDiscreteInputs());
00073 mAllPopulations.push_back(mDiscObsPredPopulation);
00074
00075
00076 mContObsPredPopulation = new Population();
00077 mContObsPredPopulation->init(obs.getNumContinuousInputs());
00078 mAllPopulations.push_back(mContObsPredPopulation);
00079
00080
00081 mRewardPredPopulation = new Population();
00082 mRewardPredPopulation->init(1);
00083 mAllPopulations.push_back(mRewardPredPopulation);
00084
00085
00086 mUncertaintyPredPopulation = new Population();
00087 mUncertaintyPredPopulation->init(1);
00088 mAllPopulations.push_back(mUncertaintyPredPopulation);
00089
00090
00091
00092 mStateActionRepresentation->project(mDiscObsPredPopulation,
00093 IDEAL_NOISE,
00094 mStateActionRepresentation->computeMaxActivationSum());
00095
00096
00097
00098 mStateActionRepresentation->project(mContObsPredPopulation,
00099 IDEAL_NOISE,
00100 mStateActionRepresentation->computeMaxActivationSum());
00101
00102
00103
00104 mStateActionRepresentation->project(mRewardPredPopulation,
00105 IDEAL_NOISE,
00106 mStateActionRepresentation->computeMaxActivationSum());
00107
00108
00109
00110
00111 mStateActionRepresentation->project(mUncertaintyPredPopulation,
00112 WEIGHTS_NEAR_1,
00113 mStateActionRepresentation->computeMaxActivationSum());
00114
00115 mLatestPredMSE = 0;
00116 mDeltaLearningTimeConstant = 0;
00117 mDeltaLearningFactor = 0;
00118 }
00119
00120 PredictiveModel::~PredictiveModel()
00121 {
00122 delete [] mDiscObsTrainingData;
00123
00124
00125
00126 while (!mAllPopulations.empty())
00127 {
00128 delete mAllPopulations.back();
00129 mAllPopulations.pop_back();
00130 }
00131 }
00132
00133 void PredictiveModel::resetShortTermMemory()
00134 {
00135 mStateActionInputData.zeroInputData();
00136
00137 mLatestPredMSE = 0;
00138
00139 unsigned int size = (unsigned int)mAllPopulations.size();
00140 for (unsigned int i = 0; i < size; ++i)
00141 {
00142 mAllPopulations[i]->resetShortTermMemory();
00143 }
00144 }
00145
00146 void PredictiveModel::predictAndTrain(const Observation& actualPrevObs,
00147 unsigned int prevAction, const Observation& actualCurrentObs,
00148 const real actualCurrentReward, Observation& predCurrentObs,
00149 real& predCurrentReward, real& predUncertainty)
00150 {
00151
00152
00153
00154 predict(actualPrevObs, prevAction, predCurrentObs,
00155 predCurrentReward, predUncertainty, true);
00156
00157
00158
00159
00160 unsigned numDiscreteValues =
00161 actualCurrentObs.getNumDiscreteInputs();
00162 for (unsigned int i = 0; i < numDiscreteValues; ++i)
00163 {
00164 real increment = 2 / (real)
00165 (actualCurrentObs.getDiscreteInputNumOptions(i) - 1);
00166 mDiscObsTrainingData[i] = -1 + increment *
00167 actualCurrentObs.getDiscreteValue(i);
00168 }
00169 real rewardPredMSE = mRewardPredPopulation->
00170 trainPreDeltaRuleLinear(&actualCurrentReward,
00171 mDeltaLearningFactor);
00172 real discObsPredMSE = mDiscObsPredPopulation->
00173 trainPreDeltaRuleLinear(mDiscObsTrainingData,
00174 mDeltaLearningFactor);
00175 real contObsPredMSE = mContObsPredPopulation->
00176 trainPreDeltaRuleLinear(
00177 actualCurrentObs.getContinuousInputData(),
00178 mDeltaLearningFactor);
00179
00180
00181
00182 mLatestPredMSE =
00183 (discObsPredMSE * mDiscObsPredPopulation->getNumNeurons() +
00184 contObsPredMSE * mContObsPredPopulation->getNumNeurons() +
00185 rewardPredMSE) / (mDiscObsPredPopulation->getNumNeurons() +
00186 mContObsPredPopulation->getNumNeurons() + 1);
00187
00188
00189
00190 mUncertaintyPredPopulation->trainPreDeltaRuleLinear(
00191 &mLatestPredMSE, mDeltaLearningFactor);
00192 }
00193
00194 void PredictiveModel::predict(const Observation& actualCurrentObs,
00195 unsigned int currentAction, Observation& predNextObs,
00196 real& predNextReward, real& predUncertainty,
00197 bool allowDynamicRBFCreation)
00198 {
00199
00200
00201 unsigned int numDiscInputs = actualCurrentObs.getNumDiscreteInputs();
00202 for (unsigned int i = 0; i < numDiscInputs; ++i)
00203 {
00204
00205 mStateActionInputData.discInputData[i] =
00206 actualCurrentObs.getDiscreteValue(i);
00207 }
00208
00209
00210 mStateActionInputData.discInputData[numDiscInputs] = currentAction;
00211
00212 unsigned int numContInputs =
00213 actualCurrentObs.getNumContinuousInputs();
00214 for (unsigned int i = 0; i < numContInputs; ++i)
00215 {
00216 mStateActionInputData.contInputData[i] =
00217 actualCurrentObs.getContinuousValue(i);
00218 }
00219
00220
00221
00222 mStateActionRepresentation->updateFiringRatesRBF(
00223 mStateActionInputData, allowDynamicRBFCreation);
00224
00225
00226 mDiscObsPredPopulation->updateFiringRatesLinearBoundedNegOneToOne();
00227 mContObsPredPopulation->updateFiringRatesLinearBoundedNegOneToOne();
00228 mRewardPredPopulation->updateFiringRatesLinearBoundedNegOneToOne();
00229 mUncertaintyPredPopulation->updateFiringRatesLinearBoundedZeroToOne();
00230
00231
00232
00233 for (unsigned int i = 0; i < numDiscInputs; ++i)
00234 {
00235
00236
00237
00238 unsigned int numOptions =
00239 actualCurrentObs.getDiscreteInputNumOptions(i);
00240 real increment = 2 / (real)(numOptions - 1);
00241 int discValue = globals::roundToInt(
00242 (mDiscObsPredPopulation->getNeuron(i)->
00243 getFiringRate() + 1) / increment);
00244
00245 if (discValue < 0)
00246 {
00247 discValue = 0;
00248 }
00249 else if (discValue >= (int)numOptions)
00250 {
00251 discValue = numOptions - 1;
00252 }
00253 predNextObs.setDiscreteValue(i, (unsigned int)discValue);
00254 }
00255
00256 for (unsigned int i = 0; i < numContInputs; ++i)
00257 {
00258 predNextObs.setContinuousValue(i,
00259 mContObsPredPopulation->getNeuron(i)->getFiringRate());
00260 }
00261
00262 predNextReward =
00263 mRewardPredPopulation->getNeuron(0)->getFiringRate();
00264 predUncertainty =
00265 mUncertaintyPredPopulation->getNeuron(0)->getFiringRate();
00266 }
00267
00268 void PredictiveModel::changeStepSize(real newValue)
00269 {
00270 setDeltaLearningRate(mDeltaLearningTimeConstant, newValue);
00271 }
00272
00273 void PredictiveModel::setDeltaLearningRate(real timeConstant,
00274 real stepSize)
00275 {
00276 mDeltaLearningTimeConstant = timeConstant;
00277 mDeltaLearningFactor = 1 - globals::calcDecayConstant(
00278 mDeltaLearningTimeConstant, stepSize);
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288 mDeltaLearningFactor = mDeltaLearningFactor /
00289 mStateActionRepresentation->computeMaxActivationSum();
00290 }
00291
00292 real PredictiveModel::getPredictionMSE()
00293 {
00294 return mLatestPredMSE;
00295 }
00296 }