00001 /************************************************************************ 00002 * Verve * 00003 * Copyright (C) 2004-2006 * 00004 * Tyler Streeter tylerstreeter@gmail.com * 00005 * All rights reserved. * 00006 * Web: http://verve-agents.sourceforge.net * 00007 * * 00008 * This library is free software; you can redistribute it and/or * 00009 * modify it under the terms of EITHER: * 00010 * (1) The GNU Lesser General Public License as published by the Free * 00011 * Software Foundation; either version 2.1 of the License, or (at * 00012 * your option) any later version. The text of the GNU Lesser * 00013 * General Public License is included with this library in the * 00014 * file license-LGPL.txt. * 00015 * (2) The BSD-style license that is included with this library in * 00016 * the file license-BSD.txt. * 00017 * * 00018 * This library is distributed in the hope that it will be useful, * 00019 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 00020 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files * 00021 * license-LGPL.txt and license-BSD.txt for more details. * 00022 ************************************************************************/ 00023 00024 #ifndef VERVE_RL_MODULE_H 00025 #define VERVE_RL_MODULE_H 00026 00027 #include "Globals.h" 00028 #include "RBFInputData.h" 00029 #include "ActiveTDConnectionList.h" 00030 00031 namespace verve 00032 { 00033 class Observation; 00034 class Population; 00035 class Projection; 00036 class RBFPopulation; 00037 class WinnerTakeAllPopulation; 00038 00041 class RLModule 00042 { 00043 public: 00047 VERVE_DECL RLModule(const Observation& obs, 00048 bool isDynamicRBFEnabled, unsigned int numActions); 00049 00050 VERVE_DECL virtual ~RLModule(); 00051 00053 VERVE_DECL virtual void VERVE_CALL resetShortTermMemory(); 00054 00060 VERVE_DECL virtual unsigned int VERVE_CALL update( 00061 const Observation& obs, real reinforcement); 00062 00065 VERVE_DECL virtual unsigned int VERVE_CALL updatePolicyOnly( 00066 const Observation& obs); 00067 00069 VERVE_DECL virtual void VERVE_CALL changeStepSize(real newValue); 00070 00074 VERVE_DECL virtual void VERVE_CALL setETraceTimeConstant( 00075 real timeConstant, real stepSize); 00076 00080 VERVE_DECL virtual void VERVE_CALL setTDDiscountTimeConstant( 00081 real timeConstant, real stepSize); 00082 00090 VERVE_DECL virtual void VERVE_CALL setTDLearningRate( 00091 real valueFunctionTimeConstant, real policyLearningMultiplier, 00092 real stepSize); 00093 00095 VERVE_DECL virtual real VERVE_CALL getTDError(); 00096 00103 VERVE_DECL virtual void VERVE_CALL resetState(const Observation& obs); 00104 00108 VERVE_DECL virtual real VERVE_CALL computeValueEstimation( 00109 const Observation& obs); 00110 00122 VERVE_DECL virtual void VERVE_CALL saveValueData( 00123 unsigned int continuousResolution, 00124 const std::string& filename = ""); 00125 00132 VERVE_DECL virtual void VERVE_CALL saveStateRBFData( 00133 const std::string& filename = ""); 00134 00135 protected: 00137 void updateActiveTDConnectionList(); 00138 00140 void trainTDRule(); 00141 00144 real updateCriticOutput(); 00145 00147 unsigned int updateActorOutput(); 00148 00153 RBFInputData mLatestInputData; 00154 00156 RBFPopulation* mStateRepresentation; 00157 00159 WinnerTakeAllPopulation* mActorPopulation; 00160 00162 Population* mCriticPopulation; 00163 00165 std::vector<Population*> mAllPopulations; 00166 00168 ActiveTDConnectionList mActiveValueFunctionTDConnections; 00169 00171 ActiveTDConnectionList mActivePolicyTDConnections; 00172 00176 bool mFirstStep; 00177 00179 real mTDError; 00180 00183 real mOldValueEstimation; 00184 00187 real mNewValueEstimation; 00188 00191 real mETraceTimeConstant; 00192 00194 real mTDDiscountTimeConstant; 00195 00198 real mTDDiscountFactor; 00199 00202 real mValueFunctionLearningTimeConstant; 00203 00206 real mValueFunctionLearningFactor; 00207 00210 real mPolicyLearningMultiplier; 00211 }; 00212 } 00213 00214 #endif