Simulation of Reinforcement Learning Gaming Project in C++

#include <iostream>
#include <vector>
#include <cstdlib>
#include <ctime>
#include <iomanip>

const int GRID_SIZE = 5;
const double ALPHA = 0.1;  // Learning rate
const double GAMMA = 0.9;  // Discount factor
const double EPSILON = 0.1; // Exploration rate
const int NUM_EPISODES = 1000;

enum Actions { UP, DOWN, LEFT, RIGHT, NUM_ACTIONS };

struct State {
    int x, y;
};

class QLearningAgent {
public:
    QLearningAgent() {
        // Initialize Q-table with zeros
        qTable.resize(GRID_SIZE * GRID_SIZE, std::vector<double>(NUM_ACTIONS, 0.0));
        std::srand(static_cast<unsigned>(std::time(nullptr)));
    }

    // Choose action based on epsilon-greedy policy
    int chooseAction(const State& state) {
        if (static_cast<double>(std::rand()) / RAND_MAX < EPSILON) {
            // Explore: choose a random action
            return std::rand() % NUM_ACTIONS;
        } else {
            // Exploit: choose the best action based on Q-table
            int stateIndex = getStateIndex(state);
            double maxQ = qTable[stateIndex][0];
            int bestAction = 0;
            for (int a = 1; a < NUM_ACTIONS; ++a) {
                if (qTable[stateIndex][a] > maxQ) {
                    maxQ = qTable[stateIndex][a];
                    bestAction = a;
                }
            }
            return bestAction;
        }
    }

    // Update Q-table based on the agent's experience
    void updateQTable(const State& state, int action, double reward, const State& nextState) {
        int stateIndex = getStateIndex(state);
        int nextStateIndex = getStateIndex(nextState);
        double maxNextQ = *std::max_element(qTable[nextStateIndex].begin(), qTable[nextStateIndex].end());
        qTable[stateIndex][action] += ALPHA * (reward + GAMMA * maxNextQ - qTable[stateIndex][action]);
    }

private:
    // Convert state to a unique index
    int getStateIndex(const State& state) const {
        return state.y * GRID_SIZE + state.x;
    }

    std::vector<std::vector<double>> qTable;
};

void printGrid(const State& agentPos, const State& goalPos) {
    for (int y = 0; y < GRID_SIZE; ++y) {
        for (int x = 0; x < GRID_SIZE; ++x) {
            if (x == agentPos.x && y == agentPos.y) {
                std::cout << "A ";
            } else if (x == goalPos.x && y == goalPos.y) {
                std::cout << "G ";
            } else {
                std::cout << ". ";
            }
        }
        std::cout << "\n";
    }
}

int main() {
    QLearningAgent agent;
    State goalPos = {GRID_SIZE - 1, GRID_SIZE - 1};
    double reward = 10.0;
    double stepReward = -1.0;

    for (int episode = 0; episode < NUM_EPISODES; ++episode) {
        State agentPos = {0, 0}; // Start position

        while (agentPos.x != goalPos.x || agentPos.y != goalPos.y) {
            int action = agent.chooseAction(agentPos);

            // Move the agent based on the action
            State nextPos = agentPos;
            switch (action) {
                case UP:    if (nextPos.y > 0) --nextPos.y; break;
                case DOWN:  if (nextPos.y < GRID_SIZE - 1) ++nextPos.y; break;
                case LEFT:  if (nextPos.x > 0) --nextPos.x; break;
                case RIGHT: if (nextPos.x < GRID_SIZE - 1) ++nextPos.x; break;
            }

            double rewardValue = (nextPos.x == goalPos.x && nextPos.y == goalPos.y) ? reward : stepReward;
            agent.updateQTable(agentPos, action, rewardValue, nextPos);

            agentPos = nextPos;
        }
    }

    std::cout << "Trained Q-table:\n";
    for (int y = 0; y < GRID_SIZE; ++y) {
        for (int x = 0; x < GRID_SIZE; ++x) {
            std::cout << std::fixed << std::setprecision(2) << agent.qTable[y * GRID_SIZE + x][UP] << " ";
        }
        std::cout << "\n";
    }

    std::cout << "Final grid:\n";
    printGrid({GRID_SIZE - 1, GRID_SIZE - 1}, goalPos);

    return 0;
}

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

#include <iostream>

#include <vector>

#include <cstdlib>

#include <ctime>

#include <iomanip>

const int GRID_SIZE = 5;

const double ALPHA = 0.1; // Learning rate

const double GAMMA = 0.9; // Discount factor

const double EPSILON = 0.1; // Exploration rate

const int NUM_EPISODES = 1000;

enum Actions { UP, DOWN, LEFT, RIGHT, NUM_ACTIONS };

struct State {

int x, y;

};

class QLearningAgent {

public:

QLearningAgent() {

// Initialize Q-table with zeros

qTable.resize(GRID_SIZE * GRID_SIZE, std::vector<double>(NUM_ACTIONS, 0.0));

std::srand(static_cast<unsigned>(std::time(nullptr)));

}

// Choose action based on epsilon-greedy policy

int chooseAction(const State& state) {

if (static_cast<double>(std::rand()) / RAND_MAX < EPSILON) {

// Explore: choose a random action

return std::rand() % NUM_ACTIONS;

} else {

// Exploit: choose the best action based on Q-table

int stateIndex = getStateIndex(state);

double maxQ = qTable[stateIndex][0];

int bestAction = 0;

for (int a = 1; a < NUM_ACTIONS; ++a) {

if (qTable[stateIndex][a] > maxQ) {

maxQ = qTable[stateIndex][a];

bestAction = a;

}

return bestAction;

}

// Update Q-table based on the agent's experience

void updateQTable(const State& state, int action, double reward, const State& nextState) {

int stateIndex = getStateIndex(state);

int nextStateIndex = getStateIndex(nextState);

double maxNextQ = *std::max_element(qTable[nextStateIndex].begin(), qTable[nextStateIndex].end());

qTable[stateIndex][action] += ALPHA * (reward + GAMMA * maxNextQ - qTable[stateIndex][action]);

}

private:

// Convert state to a unique index

int getStateIndex(const State& state) const {

return state.y * GRID_SIZE + state.x;

}

std::vector<std::vector<double>> qTable;

};

void printGrid(const State& agentPos, const State& goalPos) {

for (int y = 0; y < GRID_SIZE; ++y) {

for (int x = 0; x < GRID_SIZE; ++x) {

if (x == agentPos.x && y == agentPos.y) {

std::cout << "A ";

} else if (x == goalPos.x && y == goalPos.y) {

std::cout << "G ";

} else {

std::cout << ". ";

}

std::cout << "\n";

}

int main() {

QLearningAgent agent;

State goalPos = {GRID_SIZE - 1, GRID_SIZE - 1};

double reward = 10.0;

double stepReward = -1.0;

for (int episode = 0; episode < NUM_EPISODES; ++episode) {

State agentPos = {0, 0}; // Start position

while (agentPos.x != goalPos.x || agentPos.y != goalPos.y) {

int action = agent.chooseAction(agentPos);

// Move the agent based on the action

State nextPos = agentPos;

switch (action) {

case UP: if (nextPos.y > 0) --nextPos.y; break;

case DOWN: if (nextPos.y < GRID_SIZE - 1) ++nextPos.y; break;

case LEFT: if (nextPos.x > 0) --nextPos.x; break;

case RIGHT: if (nextPos.x < GRID_SIZE - 1) ++nextPos.x; break;

}

double rewardValue = (nextPos.x == goalPos.x && nextPos.y == goalPos.y) ? reward : stepReward;

agent.updateQTable(agentPos, action, rewardValue, nextPos);

agentPos = nextPos;

}

std::cout << "Trained Q-table:\n";

for (int y = 0; y < GRID_SIZE; ++y) {

for (int x = 0; x < GRID_SIZE; ++x) {

std::cout << std::fixed << std::setprecision(2) << agent.qTable[y * GRID_SIZE + x][UP] << " ";

}

std::cout << "\n";

}

std::cout << "Final grid:\n";

printGrid({GRID_SIZE - 1, GRID_SIZE - 1}, goalPos);

return 0;

}

Explanation

Constants:
- GRID_SIZE: Size of the grid (5×5).
- ALPHA: Learning rate, which controls how much new information overrides old information.
- GAMMA: Discount factor, which models the importance of future rewards.
- EPSILON: Exploration rate for the epsilon-greedy policy.
- NUM_EPISODES: Number of training episodes for the Q-learning algorithm.
Class QLearningAgent:
- Attributes:
  - qTable: A 2D vector representing the Q-table with states and actions.
- Methods:
  - chooseAction(const State& state): Chooses an action based on the epsilon-greedy policy.
  - updateQTable(const State& state, int action, double reward, const State& nextState): Updates the Q-table based on the agent’s experience.
Function printGrid(const State& agentPos, const State& goalPos):
- Purpose: Prints the grid showing the positions of the agent (A) and goal (G).
Main Function:
- Setup: Initializes the Q-learning agent, sets the goal position, and defines rewards.
- Training Loop:
  - The agent starts at the initial position and learns to navigate to the goal.
  - Chooses actions, moves, and updates the Q-table.
- Final Output:
  - Prints the trained Q-table.
  - Displays the final grid showing the agent’s goal.

Usage

Reinforcement Learning Simulation: Demonstrates a basic implementation of Q-learning for an agent navigating a grid.
Agent Training: Shows how an agent can learn from exploration and exploitation to reach a goal.

Simulation of Reinforcement Learning Gaming Project in C++

Explanation

Usage

Related Posts:

Leave a Comment Cancel Reply