RLModule.h

Go to the documentation of this file.
00001 /************************************************************************
00002 * Verve                                                                 *
00003 * Copyright (C) 2004-2006                                               *
00004 * Tyler Streeter  tylerstreeter@gmail.com                               *
00005 * All rights reserved.                                                  *
00006 * Web: http://verve-agents.sourceforge.net                              *
00007 *                                                                       *
00008 * This library is free software; you can redistribute it and/or         *
00009 * modify it under the terms of EITHER:                                  *
00010 *   (1) The GNU Lesser General Public License as published by the Free  *
00011 *       Software Foundation; either version 2.1 of the License, or (at  *
00012 *       your option) any later version. The text of the GNU Lesser      *
00013 *       General Public License is included with this library in the     *
00014 *       file license-LGPL.txt.                                          *
00015 *   (2) The BSD-style license that is included with this library in     *
00016 *       the file license-BSD.txt.                                       *
00017 *                                                                       *
00018 * This library is distributed in the hope that it will be useful,       *
00019 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00020 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
00021 * license-LGPL.txt and license-BSD.txt for more details.                *
00022 ************************************************************************/
00023 
00024 #ifndef VERVE_RL_MODULE_H
00025 #define VERVE_RL_MODULE_H
00026 
00027 #include "Globals.h"
00028 #include "RBFInputData.h"
00029 #include "ActiveTDConnectionList.h"
00030 
00031 namespace verve
00032 {
00033         class Observation;
00034         class Population;
00035         class Projection;
00036         class RBFPopulation;
00037         class WinnerTakeAllPopulation;
00038 
00041         class RLModule
00042         {
00043         public:
00047                 VERVE_DECL RLModule(const Observation& obs, 
00048                         bool isDynamicRBFEnabled, unsigned int numActions);
00049 
00050                 VERVE_DECL virtual ~RLModule();
00051 
00053                 VERVE_DECL virtual void VERVE_CALL resetShortTermMemory();
00054 
00060                 VERVE_DECL virtual unsigned int VERVE_CALL update(
00061                         const Observation& obs, real reinforcement);
00062 
00065                 VERVE_DECL virtual unsigned int VERVE_CALL updatePolicyOnly(
00066                         const Observation& obs);
00067 
00069                 VERVE_DECL virtual void VERVE_CALL changeStepSize(real newValue);
00070 
00074                 VERVE_DECL virtual void VERVE_CALL setETraceTimeConstant(
00075                         real timeConstant, real stepSize);
00076 
00080                 VERVE_DECL virtual void VERVE_CALL setTDDiscountTimeConstant(
00081                         real timeConstant, real stepSize);
00082 
00090                 VERVE_DECL virtual void VERVE_CALL setTDLearningRate(
00091                         real valueFunctionTimeConstant, real policyLearningMultiplier, 
00092                         real stepSize);
00093 
00095                 VERVE_DECL virtual real VERVE_CALL getTDError();
00096 
00103                 VERVE_DECL virtual void VERVE_CALL resetState(const Observation& obs);
00104 
00108                 VERVE_DECL virtual real VERVE_CALL computeValueEstimation(
00109                         const Observation& obs);
00110 
00122                 VERVE_DECL virtual void VERVE_CALL saveValueData(
00123                         unsigned int continuousResolution, 
00124                         const std::string& filename = "");
00125 
00132                 VERVE_DECL virtual void VERVE_CALL saveStateRBFData(
00133                         const std::string& filename = "");
00134 
00135         protected:
00137                 void updateActiveTDConnectionList();
00138 
00140                 void trainTDRule();
00141 
00144                 real updateCriticOutput();
00145 
00147                 unsigned int updateActorOutput();
00148 
00153                 RBFInputData mLatestInputData;
00154 
00156                 RBFPopulation* mStateRepresentation;
00157 
00159                 WinnerTakeAllPopulation* mActorPopulation;
00160 
00162                 Population* mCriticPopulation;
00163 
00165                 std::vector<Population*> mAllPopulations;
00166 
00168                 ActiveTDConnectionList mActiveValueFunctionTDConnections;
00169 
00171                 ActiveTDConnectionList mActivePolicyTDConnections;
00172 
00176                 bool mFirstStep;
00177 
00179                 real mTDError;
00180 
00183                 real mOldValueEstimation;
00184 
00187                 real mNewValueEstimation;
00188 
00191                 real mETraceTimeConstant;
00192 
00194                 real mTDDiscountTimeConstant;
00195 
00198                 real mTDDiscountFactor;
00199 
00202                 real mValueFunctionLearningTimeConstant;
00203 
00206                 real mValueFunctionLearningFactor;
00207 
00210                 real mPolicyLearningMultiplier;
00211         };
00212 }
00213 
00214 #endif

Generated on Tue Jan 24 21:46:37 2006 for Verve by  doxygen 1.4.6-NO