#include <iostream>
#include <vector>
#include <cstdlib>
#include <ctime>
#include <iomanip>
const int GRID_SIZE = 5;
const double ALPHA = 0.1; // Learning rate
const double GAMMA = 0.9; // Discount factor
const double EPSILON = 0.1; // Exploration rate
const int NUM_EPISODES = 1000;
enum Actions { UP, DOWN, LEFT, RIGHT, NUM_ACTIONS };
struct State {
int x, y;
};
class QLearningAgent {
public:
QLearningAgent() {
// Initialize Q-table with zeros
qTable.resize(GRID_SIZE * GRID_SIZE, std::vector<double>(NUM_ACTIONS, 0.0));
std::srand(static_cast<unsigned>(std::time(nullptr)));
}
// Choose action based on epsilon-greedy policy
int chooseAction(const State& state) {
if (static_cast<double>(std::rand()) / RAND_MAX < EPSILON) {
// Explore: choose a random action
return std::rand() % NUM_ACTIONS;
} else {
// Exploit: choose the best action based on Q-table
int stateIndex = getStateIndex(state);
double maxQ = qTable[stateIndex][0];
int bestAction = 0;
for (int a = 1; a < NUM_ACTIONS; ++a) {
if (qTable[stateIndex][a] > maxQ) {
maxQ = qTable[stateIndex][a];
bestAction = a;
}
}
return bestAction;
}
}
// Update Q-table based on the agent's experience
void updateQTable(const State& state, int action, double reward, const State& nextState) {
int stateIndex = getStateIndex(state);
int nextStateIndex = getStateIndex(nextState);
double maxNextQ = *std::max_element(qTable[nextStateIndex].begin(), qTable[nextStateIndex].end());
qTable[stateIndex][action] += ALPHA * (reward + GAMMA * maxNextQ - qTable[stateIndex][action]);
}
private:
// Convert state to a unique index
int getStateIndex(const State& state) const {
return state.y * GRID_SIZE + state.x;
}
std::vector<std::vector<double>> qTable;
};
void printGrid(const State& agentPos, const State& goalPos) {
for (int y = 0; y < GRID_SIZE; ++y) {
for (int x = 0; x < GRID_SIZE; ++x) {
if (x == agentPos.x && y == agentPos.y) {
std::cout << "A ";
} else if (x == goalPos.x && y == goalPos.y) {
std::cout << "G ";
} else {
std::cout << ". ";
}
}
std::cout << "\n";
}
}
int main() {
QLearningAgent agent;
State goalPos = {GRID_SIZE - 1, GRID_SIZE - 1};
double reward = 10.0;
double stepReward = -1.0;
for (int episode = 0; episode < NUM_EPISODES; ++episode) {
State agentPos = {0, 0}; // Start position
while (agentPos.x != goalPos.x || agentPos.y != goalPos.y) {
int action = agent.chooseAction(agentPos);
// Move the agent based on the action
State nextPos = agentPos;
switch (action) {
case UP: if (nextPos.y > 0) --nextPos.y; break;
case DOWN: if (nextPos.y < GRID_SIZE - 1) ++nextPos.y; break;
case LEFT: if (nextPos.x > 0) --nextPos.x; break;
case RIGHT: if (nextPos.x < GRID_SIZE - 1) ++nextPos.x; break;
}
double rewardValue = (nextPos.x == goalPos.x && nextPos.y == goalPos.y) ? reward : stepReward;
agent.updateQTable(agentPos, action, rewardValue, nextPos);
agentPos = nextPos;
}
}
std::cout << "Trained Q-table:\n";
for (int y = 0; y < GRID_SIZE; ++y) {
for (int x = 0; x < GRID_SIZE; ++x) {
std::cout << std::fixed << std::setprecision(2) << agent.qTable[y * GRID_SIZE + x][UP] << " ";
}
std::cout << "\n";
}
std::cout << "Final grid:\n";
printGrid({GRID_SIZE - 1, GRID_SIZE - 1}, goalPos);
return 0;
}