Verve: PredictiveModel.h Source File

00001 /************************************************************************
00002 * Verve                                                                 *
00003 * Copyright (C) 2004-2006                                               *
00004 * Tyler Streeter  tylerstreeter@gmail.com                               *
00005 * All rights reserved.                                                  *
00006 * Web: http://verve-agents.sourceforge.net                              *
00007 *                                                                       *
00008 * This library is free software; you can redistribute it and/or         *
00009 * modify it under the terms of EITHER:                                  *
00010 *   (1) The GNU Lesser General Public License as published by the Free  *
00011 *       Software Foundation; either version 2.1 of the License, or (at  *
00012 *       your option) any later version. The text of the GNU Lesser      *
00013 *       General Public License is included with this library in the     *
00014 *       file license-LGPL.txt.                                          *
00015 *   (2) The BSD-style license that is included with this library in     *
00016 *       the file license-BSD.txt.                                       *
00017 *                                                                       *
00018 * This library is distributed in the hope that it will be useful,       *
00019 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00020 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
00021 * license-LGPL.txt and license-BSD.txt for more details.                *
00022 ************************************************************************/
00023 
00024 #ifndef VERVE_PREDICTIVE_MODEL_H
00025 #define VERVE_PREDICTIVE_MODEL_H
00026 
00027 #include "Globals.h"
00028 #include "Observation.h"
00029 #include "RBFInputData.h"
00030 
00031 // PREDICTIVE MODEL STATE REPRESENTATION (currently, METHOD 1 is used): 
00032 // METHOD 1. predictive model uses a state-action representation 
00033 //              (takes obs and action inputs).  This is more standard.
00034 // METHOD 2. predictive model just uses a state representation 
00035 //              (takes only an obs input); since the policy eventually 
00036 //              becomes predictable, presumably the next state will 
00037 //              become predictable.
00038 
00039 namespace verve
00040 {
00041         class Population;
00042         class RBFPopulation;
00043 
00049         class PredictiveModel
00050         {
00051         public:
00055                 VERVE_DECL PredictiveModel(const Observation& obs, 
00056                         bool isDynamicRBFEnabled, unsigned int numActions);
00057 
00058                 VERVE_DECL virtual ~PredictiveModel();
00059 
00061                 VERVE_DECL virtual void VERVE_CALL resetShortTermMemory();
00062 
00069                 VERVE_DECL virtual void VERVE_CALL predictAndTrain(
00070                         const Observation& actualPrevObs, 
00071                         unsigned int prevAction, const Observation& actualCurrentObs, 
00072                         const real actualCurrentReward, Observation& predCurrentObs, 
00073                         real& predCurrentReward, real& predUncertainty);
00074 
00078                 VERVE_DECL virtual void VERVE_CALL predict(
00079                         const Observation& actualCurrentObs, 
00080                         unsigned int currentAction, Observation& predNextObs, 
00081                         real& predNextReward, real& predUncertainty, 
00082                         bool allowDynamicRBFCreation);
00083 
00085                 VERVE_DECL virtual void VERVE_CALL changeStepSize(real newValue);
00086 
00094                 VERVE_DECL virtual void VERVE_CALL setDeltaLearningRate(
00095                         real timeConstant, real stepSize);
00096 
00099                 VERVE_DECL virtual real VERVE_CALL getPredictionMSE();
00100 
00101         protected:
00104                 RBFInputData mStateActionInputData;
00105 
00110                 real* mDiscObsTrainingData;
00111 
00113                 RBFPopulation* mStateActionRepresentation;
00114 
00116                 Population* mDiscObsPredPopulation;
00117 
00119                 Population* mContObsPredPopulation;
00120 
00122                 Population* mRewardPredPopulation;
00123 
00125                 Population* mUncertaintyPredPopulation;
00126 
00128                 std::vector<Population*> mAllPopulations;
00129 
00131                 real mLatestPredMSE;
00132 
00134                 real mDeltaLearningTimeConstant;
00135 
00137                 real mDeltaLearningFactor;
00138         };
00139 }
00140 
00141 #endif