1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
#include <iostream> #include <vector> #include <cmath> #include <algorithm> // Define a structure to hold data points struct DataPoint { std::vector<double> features; // Features of the data point int label; // Class label }; // Function to calculate Euclidean distance between two data points double euclideanDistance(const DataPoint& a, const DataPoint& b) { double sum = 0.0; for (size_t i = 0; i < a.features.size(); ++i) { double diff = a.features[i] - b.features[i]; sum += diff * diff; } return std::sqrt(sum); } // Function to perform K-Nearest Neighbors classification int knn(const std::vector<DataPoint>& trainingData, const DataPoint& testPoint, int k) { std::vector<std::pair<double, int>> distances; // Calculate distance from testPoint to each point in trainingData for (const auto& trainPoint : trainingData) { double dist = euclideanDistance(testPoint, trainPoint); distances.emplace_back(dist, trainPoint.label); } // Sort distances by the first element (distance) std::sort(distances.begin(), distances.end()); // Count the labels of the k nearest neighbors std::vector<int> labelCount(10, 0); // Assuming labels are in the range [0, 9] for (int i = 0; i < k; ++i) { labelCount[distances[i].second]++; } // Find the most frequent label int maxCount = 0; int predictedLabel = -1; for (int i = 0; i < labelCount.size(); ++i) { if (labelCount[i] > maxCount) { maxCount = labelCount[i]; predictedLabel = i; } } return predictedLabel; } int main() { // Sample training data std::vector<DataPoint> trainingData = { {{1.0, 2.0}, 0}, {{2.0, 3.0}, 0}, {{3.0, 4.0}, 1}, {{5.0, 5.0}, 1} }; // Sample test data DataPoint testPoint = {{4.0, 4.0}, -1}; // -1 indicates that we don't know the label // Number of neighbors to consider int k = 3; // Perform KNN classification int predictedLabel = knn(trainingData, testPoint, k); // Display the result std::cout << "Predicted label for test point: " << predictedLabel << "\n"; return 0; } |
Explanation
- DataPoint Structure:
- Represents a data point with a vector of
features
and alabel
.
- Represents a data point with a vector of
- euclideanDistance Function:
- Computes the Euclidean distance between two
DataPoint
objects based on their feature vectors.
- Computes the Euclidean distance between two
- knn Function:
- Performs K-Nearest Neighbors classification.
- Calculates the distance between the test point and each training data point.
- Sorts the distances and selects the
k
nearest neighbors. - Counts the occurrence of each label among the nearest neighbors and determines the most frequent label as the predicted label.
- main Function:
- Defines a set of sample training data and a test point.
- Specifies the number of neighbors
k
to consider. - Calls the
knn
function to classify the test point. - Displays the predicted label for the test point.