The STPGA (Subset Training Population Genetic Algorithm) package provides advanced tools for optimizing training set selection in genomic prediction and experimental design. This vignette demonstrates the package’s key functionalities using real wheat genomic data.
Let’s start by loading the package and exploring the included wheat dataset:
library(STPGA)
# Load the wheat dataset
data("WheatData")
# Explore the data structure
cat("Dataset components:\n")
#> Dataset components:
cat("- Wheat.Y: Phenotype data (", nrow(Wheat.Y), " observations)\n")
#> - Wheat.Y: Phenotype data ( 200 observations)
cat("- Wheat.M: Marker data (", nrow(Wheat.M), " individuals x ", ncol(Wheat.M), " markers)\n")
#> - Wheat.M: Marker data ( 200 individuals x 4670 markers)
cat("- Wheat.K: Kinship matrix (", nrow(Wheat.K), " x ", ncol(Wheat.K), ")\n")
#> - Wheat.K: Kinship matrix ( 200 x 200 )# Look at the phenotype data
head(Wheat.Y)
#> id plant.height
#> 1846 10542-63/87 91.44
#> 250 131A 124.46
#> 1541 1548 129.54
#> 1516 155/71 124.46
#> 508 1664 134.62
#> 1549 1666 144.78
# Check marker data dimensions and first few markers
cat("Marker matrix preview:\n")
#> Marker matrix preview:
Wheat.M[1:5, 1:5]
#> IWA1 IWA2 IWA3 IWA4 IWA5
#> IWA8610266 1 1 -1 -1 -1
#> IWA8606816 1 1 1 -1 -1
#> 3883 1 1 1 -1 -1
#> NW86A 1 1 1 1 -1
#> PI345476 1 1 1 -1 -1
# Examine kinship matrix structure
cat("Kinship matrix diagonal (first 10):\n")
#> Kinship matrix diagonal (first 10):
diag(Wheat.K)[1:10]
#> IWA8610266 IWA8606816 3883 NW86A PI345476 H86-708 IWA8602323
#> 1.957836 2.003338 1.980769 2.281661 1.808700 1.047744 1.970056
#> 237-VII/2 1666 IWA8610164
#> 1.760409 2.084154 1.983354
cat("Kinship matrix off-diagonal (first 5x5):\n")
#> Kinship matrix off-diagonal (first 5x5):
Wheat.K[1:5, 1:5]
#> IWA8610266 IWA8606816 3883 NW86A PI345476
#> IWA8610266 1.9578361 0.8261588 0.7546713 0.5149389 -0.2697654
#> IWA8606816 0.8261588 2.0033376 0.5756170 0.5241991 -0.2315324
#> 3883 0.7546713 0.5756170 1.9807687 0.7077443 -0.2888690
#> NW86A 0.5149389 0.5241991 0.7077443 2.2816612 -0.2645939
#> PI345476 -0.2697654 -0.2315324 -0.2888690 -0.2645939 1.8086996STPGA provides several optimization criteria for training set selection. Let’s demonstrate each using the wheat data.
# Define candidate pool and test sets
all_individuals <- rownames(Wheat.M)
n_total <- length(all_individuals)
# Create training candidates and test set
set.seed(123)
test_set <- sample(all_individuals, 40)
candidates <- setdiff(all_individuals, test_set)
cat("Total individuals:", n_total, "\n")
#> Total individuals: 200
cat("Test set size:", length(test_set), "\n")
#> Test set size: 40
cat("Candidate pool size:", length(candidates), "\n")
#> Candidate pool size: 160
# Extract first 5 principal components for non-mixed model criteria
# This reduces computational complexity for high-dimensional marker data
cat("\nExtracting first 5 principal components for non-mixed model criteria...\n")
#>
#> Extracting first 5 principal components for non-mixed model criteria...
pca_result <- prcomp(Wheat.M, center = TRUE, scale. = TRUE)
Wheat.PC5 <- pca_result$x[, 1:5]
rownames(Wheat.PC5) <- rownames(Wheat.M)
cat("PC matrix dimensions:", nrow(Wheat.PC5), "x", ncol(Wheat.PC5), "\n")
#> PC matrix dimensions: 200 x 5
cat("Variance explained by first 5 PCs:", sprintf("%.2f%%", 100 * sum(pca_result$sdev[1:5]^2) / sum(pca_result$sdev^2)), "\n")
#> Variance explained by first 5 PCs: 30.65%The package implements the classical A-, D-, and E-optimality criteria from experimental design theory.
# Select training sets of different sizes
train_small <- sample(candidates, 30)
train_medium <- sample(candidates, 60)
train_large <- sample(candidates, 100)
# Compute A-optimality using first 5 PCs (minimizes average prediction variance)
aopt_small <- a_optimality(train_small, test_set, Wheat.PC5)
aopt_medium <- a_optimality(train_medium, test_set, Wheat.PC5)
aopt_large <- a_optimality(train_large, test_set, Wheat.PC5)
cat("A-optimality results:\n")
#> A-optimality results:
cat("Small training (n=30):", sprintf("%.2e", aopt_small), "\n")
#> Small training (n=30): 1.37e-03
cat("Medium training (n=60):", sprintf("%.2e", aopt_medium), "\n")
#> Medium training (n=60): 5.26e-04
cat("Large training (n=100):", sprintf("%.2e", aopt_large), "\n")
#> Large training (n=100): 3.01e-04
cat("Improvement (large vs small):", sprintf("%.1fx", aopt_small/aopt_large), "\n")
#> Improvement (large vs small): 4.5x
# D-optimality using first 5 PCs (minimizes generalized variance)
dopt_small <- d_optimality(train_small, test_set, Wheat.PC5)
dopt_medium <- d_optimality(train_medium, test_set, Wheat.PC5)
dopt_large <- d_optimality(train_large, test_set, Wheat.PC5)
cat("\nD-optimality results:\n")
#>
#> D-optimality results:
cat("Small training (n=30):", sprintf("%.2f", dopt_small), "\n")
#> Small training (n=30): 42.80
cat("Medium training (n=60):", sprintf("%.2f", dopt_medium), "\n")
#> Medium training (n=60): 47.03
cat("Large training (n=100):", sprintf("%.2f", dopt_large), "\n")
#> Large training (n=100): 49.77
# E-optimality using first 5 PCs (minimizes maximum variance)
eopt_small <- e_optimality(train_small, test_set, Wheat.PC5)
eopt_medium <- e_optimality(train_medium, test_set, Wheat.PC5)
eopt_large <- e_optimality(train_large, test_set, Wheat.PC5)
cat("\nE-optimality results:\n")
#>
#> E-optimality results:
cat("Small training (n=30):", sprintf("%.2f", eopt_small), "\n")
#> Small training (n=30): 10.06
cat("Medium training (n=60):", sprintf("%.2f", eopt_medium), "\n")
#> Medium training (n=60): 10.69
cat("Large training (n=100):", sprintf("%.2f", eopt_large), "\n")
#> Large training (n=100): 11.35PEV criteria are particularly relevant for prediction accuracy in genomic selection.
# Mean PEV using first 5 PCs
pev_mean_small <- pev_mean(train_small, test_set, Wheat.PC5)
pev_mean_large <- pev_mean(train_large, test_set, Wheat.PC5)
# Normalized PEV using first 5 PCs (scale-invariant)
pev_norm_small <- pev_mean(train_small, test_set, Wheat.PC5, normalized = TRUE)
pev_norm_large <- pev_mean(train_large, test_set, Wheat.PC5, normalized = TRUE)
# Maximum PEV using first 5 PCs (worst-case scenario)
pev_max_small <- pev_max(train_small, test_set, Wheat.PC5)
pev_max_large <- pev_max(train_large, test_set, Wheat.PC5)
cat("PEV Results:\n")
#> PEV Results:
cat("Mean PEV (small):", sprintf("%.2e", pev_mean_small), "\n")
#> Mean PEV (small): 1.24e+00
cat("Mean PEV (large):", sprintf("%.2e", pev_mean_large), "\n")
#> Mean PEV (large): 1.05e+00
cat("Improvement:", sprintf("%.1fx", pev_mean_small/pev_mean_large), "\n")
#> Improvement: 1.2x
cat("\nNormalized PEV:\n")
#>
#> Normalized PEV:
cat("Small training:", sprintf("%.6f", pev_norm_small), "\n")
#> Small training: 0.025000
cat("Large training:", sprintf("%.6f", pev_norm_large), "\n")
#> Large training: 0.025000
cat("\nMax PEV:\n")
#>
#> Max PEV:
cat("Small training:", sprintf("%.2e", pev_max_small), "\n")
#> Small training: 1.76e+00
cat("Large training:", sprintf("%.2e", pev_max_large), "\n")
#> Large training: 1.20e+00
cat("Max/Mean ratio (small):", sprintf("%.2f", pev_max_small/pev_mean_small), "\n")
#> Max/Mean ratio (small): 1.42
cat("Max/Mean ratio (large):", sprintf("%.2f", pev_max_large/pev_mean_large), "\n")
#> Max/Mean ratio (large): 1.14The Coefficient of Determination measures the proportion of variance explained by the model.
# Mean Coefficient of Determination (R²) using first 5 PCs
cd_mean_small <- cd_mean(train_small, test_set, Wheat.PC5)
cd_mean_large <- cd_mean(train_large, test_set, Wheat.PC5)
# Normalized Coefficient of Determination using first 5 PCs
cd_norm_small <- cd_mean(train_small, test_set, Wheat.PC5, normalized = TRUE)
cd_norm_large <- cd_mean(train_large, test_set, Wheat.PC5, normalized = TRUE)
cat("Coefficient of Determination Results:\n")
#> Coefficient of Determination Results:
cat("Mean CD (small):", sprintf("%.2e", cd_mean_small), "\n")
#> Mean CD (small): 2.42e-01
cat("Mean CD (large):", sprintf("%.2e", cd_mean_large), "\n")
#> Mean CD (large): 5.26e-02
cat("\nNormalized R²:\n")
#>
#> Normalized R²:
cat("Small training:", sprintf("%.6f", cd_norm_small), "\n")
#> Small training: 0.025000
cat("Large training:", sprintf("%.6f", cd_norm_large), "\n")
#> Large training: 0.025000For genomic selection applications, the package provides mixed model criteria that account for genetic relationships.
# Select smaller sets for computational efficiency with mixed models
train_mm <- sample(candidates, 50)
test_mm <- sample(test_set, 20)
# Extract relevant kinship submatrices
K_train <- Wheat.K[train_mm, train_mm]
K_test <- Wheat.K[test_mm, test_mm]
K_test_train <- Wheat.K[test_mm, train_mm]
# Mixed model PEV with different heritability scenarios
h2_high <- list(Vg = 0.8, Ve = 0.2) # High heritability (80%)
h2_low <- list(Vg = 0.3, Ve = 0.7) # Low heritability (30%)
# Use smaller dataset for computational efficiency
# For mixed models, use subset of individuals but all relationships
all_mm_individuals <- c(train_mm, test_mm)
P_subset <- Wheat.M[all_mm_individuals, 1:100] # Use only first 100 markers
K_subset <- Wheat.K[all_mm_individuals, all_mm_individuals]
# Compute mixed model criteria with smaller dataset
pev_mm_high <- pev_mean_mm(train_mm, test_mm, P_subset, K_subset,
Vg = h2_high$Vg, Ve = h2_high$Ve)
#> Warning in validate_kinship_matrix(K, rownames(P)): Kinship matrix diagonal
#> values outside expected range [0.5, 2]
#> Warning in pev_mean_mm(train_mm, test_mm, P_subset, K_subset, Vg = h2_high$Vg,
#> : Negative PEV values detected. This may indicate numerical issues.
pev_mm_low <- pev_mean_mm(train_mm, test_mm, P_subset, K_subset,
Vg = h2_low$Vg, Ve = h2_low$Ve)
#> Warning in validate_kinship_matrix(K, rownames(P)): Kinship matrix diagonal
#> values outside expected range [0.5, 2]
#> Warning in pev_mean_mm(train_mm, test_mm, P_subset, K_subset, Vg = h2_low$Vg, :
#> Negative PEV values detected. This may indicate numerical issues.
cd_mm_high <- cd_mean_mm(train_mm, test_mm, P_subset, K_subset,
Vg = h2_high$Vg, Ve = h2_high$Ve)
#> Warning in validate_kinship_matrix(K, rownames(P)): Kinship matrix diagonal
#> values outside expected range [0.5, 2]
#> Warning in pev_mean_mm(train, test, P, K, lambda, C, Vg, Ve): Negative PEV
#> values detected. This may indicate numerical issues.
cd_mm_low <- cd_mean_mm(train_mm, test_mm, P_subset, K_subset,
Vg = h2_low$Vg, Ve = h2_low$Ve)
#> Warning in validate_kinship_matrix(K, rownames(P)): Kinship matrix diagonal
#> values outside expected range [0.5, 2]
#> Warning in validate_kinship_matrix(K, rownames(P)): Negative PEV values
#> detected. This may indicate numerical issues.
cat("Mixed Model Results:\n")
#> Mixed Model Results:
cat("PEV (h² = 0.8):", sprintf("%.6f", pev_mm_high), "\n")
#> PEV (h² = 0.8): 0.429349
cat("PEV (h² = 0.3):", sprintf("%.6f", pev_mm_low), "\n")
#> PEV (h² = 0.3): 0.044506
cat("Heritability effect:", sprintf("%.2fx", pev_mm_low/pev_mm_high), "\n")
#> Heritability effect: 0.10x
cat("\nCoefficient of Determination (mixed model):\n")
#>
#> Coefficient of Determination (mixed model):
cat("CD (h² = 0.8):", sprintf("%.6f", cd_mm_high), "\n")
#> CD (h² = 0.8): 0.463314
cat("CD (h² = 0.3):", sprintf("%.6f", cd_mm_low), "\n")
#> CD (h² = 0.3): 0.851648The criterion() function provides a unified interface to
all optimization criteria, supporting both modern and legacy names.
# Define a medium-sized training set
train_unified <- sample(candidates, 60)
# Test all modern criteria
modern_criteria <- c("a_optimality", "d_optimality", "e_optimality",
"pev_mean", "pev_max", "cd_mean")
cat("Modern Criteria Results:\n")
#> Modern Criteria Results:
cat("========================\n")
#> ========================
for (crit in modern_criteria) {
value <- criterion(train_unified, test_set, Wheat.PC5, criterion = crit)
cat(sprintf("%-15s: %12.6e\n", crit, value))
}
#> a_optimality : 4.961210e-04
#> d_optimality : 4.721829e+01
#> e_optimality : 1.078261e+01
#> pev_mean : 1.090311e+00
#> pev_max : 1.290717e+00
#> cd_mean : 9.031103e-02
# Test normalized versions
cat("\nNormalized Criteria:\n")
#>
#> Normalized Criteria:
cat("===================\n")
#> ===================
pev_norm <- criterion(train_unified, test_set, Wheat.PC5, criterion = "pev_mean_normalized")
cd_norm <- criterion(train_unified, test_set, Wheat.PC5, criterion = "cd_mean_normalized")
cat(sprintf("%-20s: %12.6f\n", "pev_mean_normalized", pev_norm))
#> pev_mean_normalized : 0.025000
cat(sprintf("%-20s: %12.6f\n", "cd_mean_normalized", cd_norm))
#> cd_mean_normalized : 0.025000
# Test mixed model criteria (using subset for speed)
cat("\nMixed Model Criteria (100 markers):\n")
#>
#> Mixed Model Criteria (100 markers):
cat("===================================\n")
#> ===================================
P_unified_subset <- Wheat.M[c(train_unified, test_set), 1:100]
K_unified_subset <- Wheat.K[c(train_unified, test_set), c(train_unified, test_set)]
pev_mm <- criterion(train_unified, test_set, P_unified_subset, criterion = "pev_mean_mm", K = K_unified_subset)
#> Warning in validate_kinship_matrix(K, rownames(P)): Kinship matrix diagonal
#> values outside expected range [0.5, 2]
#> Warning in pev_mean_mm(train, test, P, K, lambda, C, Vg, Ve): Negative PEV
#> values detected. This may indicate numerical issues.
cd_mm <- criterion(train_unified, test_set, P_unified_subset, criterion = "cd_mean_mm", K = K_unified_subset)
#> Warning in validate_kinship_matrix(K, rownames(P)): Kinship matrix diagonal
#> values outside expected range [0.5, 2]
#> Warning in validate_kinship_matrix(K, rownames(P)): Negative PEV values
#> detected. This may indicate numerical issues.
cat(sprintf("%-15s: %12.6e\n", "pev_mean_mm", pev_mm))
#> pev_mean_mm : 7.861777e-01
cat(sprintf("%-15s: %12.6e\n", "cd_mean_mm", cd_mm))
#> cd_mean_mm : 2.138223e-01
# Demonstrate legacy compatibility
cat("\nLegacy Compatibility:\n")
#>
#> Legacy Compatibility:
cat("====================\n")
#> ====================
aopt_modern <- criterion(train_unified, test_set, Wheat.PC5, criterion = "a_optimality")
aopt_legacy <- criterion(train_unified, test_set, Wheat.PC5, criterion = "AOPT")
cat("Modern a_optimality:", sprintf("%.6e", aopt_modern), "\n")
#> Modern a_optimality: 4.961210e-04
cat("Legacy AOPT: ", sprintf("%.6e", aopt_legacy), "\n")
#> Legacy AOPT: 4.961210e-04
cat("Values match:", aopt_modern == aopt_legacy, "\n")
#> Values match: TRUESTPGA’s core functionality is optimizing training set selection using genetic algorithms with advanced convergence detection and selection mechanisms.
The latest version includes significant improvements to the genetic algorithm implementation:
# Define optimization parameters
n_select <- 40 # Number of individuals to select
n_candidates <- length(candidates)
cat("Enhanced Genetic Algorithm Optimization\n")
#> Enhanced Genetic Algorithm Optimization
cat("======================================\n")
#> ======================================
cat("Candidate pool size:", n_candidates, "\n")
#> Candidate pool size: 160
cat("Selection target:", n_select, "\n")
#> Selection target: 40
cat("Test set size:", length(test_set), "\n")
#> Test set size: 40
# Demonstrate the genetic algorithm with advanced features
set.seed(456)
# Basic genetic algorithm run
cat("\nRunning genetic algorithm with default settings...\n")
#>
#> Running genetic algorithm with default settings...
ga_basic <- subset_ga(
P = Wheat.PC5,
Candidates = candidates,
Test = test_set,
ntoselect = n_select,
npop = 30,
niterations = 50,
criterion = "pev_mean",
verbose = FALSE
)
cat("Basic GA Results:\n")
#> Basic GA Results:
cat("Best fitness:", sprintf("%.8f", ga_basic$best_fitness), "\n")
#> Best fitness: 1.09824362
cat("Convergence generation:", ga_basic$convergence_generation, "\n")
#> Convergence generation: 50
cat("Selection method:", ga_basic$parameters$selection_method, "\n")
#> Selection method: tournament
# Enhanced genetic algorithm with rank-based selection and restart
cat("\nRunning enhanced GA with rank-based selection and restart...\n")
#>
#> Running enhanced GA with rank-based selection and restart...
ga_enhanced <- subset_ga(
P = Wheat.PC5,
Candidates = candidates,
Test = test_set,
ntoselect = n_select,
npop = 30,
niterations = 50,
criterion = "pev_mean",
selection_method = "rank",
selection_pressure = 1.4,
enable_restart = TRUE,
restart_threshold = 0.6,
max_restarts = 1,
convergence_window_multiplier = 3,
verbose = FALSE
)
cat("Enhanced GA Results:\n")
#> Enhanced GA Results:
cat("Best fitness:", sprintf("%.8f", ga_enhanced$best_fitness), "\n")
#> Best fitness: 1.10192184
cat("Convergence generation:", ga_enhanced$convergence_generation, "\n")
#> Convergence generation: 50
cat("Total generations:", ga_enhanced$total_generations, "\n")
#> Total generations: 50
cat("Restart count:", ga_enhanced$restart_count, "\n")
#> Restart count: 0
cat("Selection method:", ga_enhanced$parameters$selection_method, "\n")
#> Selection method: rank
cat("Selection pressure:", ga_enhanced$parameters$selection_pressure, "\n")
#> Selection pressure: 1.4
# Compare with random selection
random_selection <- sample(candidates, n_select)
random_fitness <- pev_mean(random_selection, test_set, Wheat.PC5, normalized = TRUE)
cat("\nComparison Results:\n")
#>
#> Comparison Results:
cat("Random selection PEV:", sprintf("%.8f", random_fitness), "\n")
#> Random selection PEV: 0.02500000
cat("Basic GA PEV: ", sprintf("%.8f", ga_basic$best_fitness), "\n")
#> Basic GA PEV: 1.09824362
cat("Enhanced GA PEV: ", sprintf("%.8f", ga_enhanced$best_fitness), "\n")
#> Enhanced GA PEV: 1.10192184
basic_improvement <- (random_fitness - ga_basic$best_fitness) / random_fitness * 100
enhanced_improvement <- (random_fitness - ga_enhanced$best_fitness) / random_fitness * 100
cat("Basic GA improvement:", sprintf("%.2f%%", basic_improvement), "\n")
#> Basic GA improvement: -4292.97%
cat("Enhanced GA improvement:", sprintf("%.2f%%", enhanced_improvement), "\n")
#> Enhanced GA improvement: -4307.69%The enhanced genetic algorithm provides comprehensive convergence diagnostics to help users understand optimization behavior.
# Generate convergence diagnostics for the enhanced GA
cat("Convergence Diagnostics for Enhanced GA:\n")
#> Convergence Diagnostics for Enhanced GA:
cat("=======================================\n")
#> =======================================
diags <- convergence_diagnostics(ga_enhanced, plot = FALSE)
cat("Convergence Status:\n")
#> Convergence Status:
cat("- Converged:", diags$convergence_status$converged, "\n")
#> - Converged: FALSE
cat("- Reason:", diags$convergence_status$reason, "\n")
#> - Reason: convergence criteria not met
cat("- Window size:", diags$window_size, "\n")
#> - Window size: 15
cat("- Data points:", diags$data_points, "\n")
#> - Data points: 15
cat("\nConvergence Metrics:\n")
#>
#> Convergence Metrics:
cat("- Fitness stability:", sprintf("%.6f", diags$metrics$fitness_stability), "\n")
#> - Fitness stability: 0.004162
cat("- Diversity stability:", sprintf("%.6f", diags$metrics$diversity_stability), "\n")
#> - Diversity stability: 0.001903
cat("- Mean improvement rate:", sprintf("%.6f", diags$metrics$mean_improvement_rate), "\n")
#> - Mean improvement rate: 0.004157
cat("- Variance stability:", sprintf("%.6f", diags$metrics$variance_stability), "\n")
#> - Variance stability: 0.007142
if (length(diags$recommendations) > 0) {
cat("\nRecommendations:\n")
for (i in seq_along(diags$recommendations)) {
cat(paste("-", diags$recommendations[i], "\n"))
}
} else {
cat("\nNo specific recommendations - optimization appears satisfactory.\n")
}
#>
#> No specific recommendations - optimization appears satisfactory.
# Show restart history if any restarts occurred
if (ga_enhanced$restart_count > 0) {
cat("\nRestart History:\n")
for (i in 1:ga_enhanced$restart_count) {
restart_info <- ga_enhanced$restart_history[[i]]
cat(sprintf("Restart %d: Generation %d, Fitness %.6f, Reason: %s\n",
i, restart_info$generation, restart_info$best_fitness,
restart_info$convergence_reason))
}
}# Plot GA convergence comparison
par(mfrow = c(1, 2))
# Plot fitness history for both algorithms
plot(1:nrow(ga_basic$fitness_history), ga_basic$fitness_history[, "best"],
type = "l", col = "blue", lwd = 2,
xlab = "Generation", ylab = "Best Fitness",
main = "GA Convergence Comparison")
lines(1:nrow(ga_enhanced$fitness_history), ga_enhanced$fitness_history[, "best"],
col = "red", lwd = 2)
grid(TRUE)
legend("topright", legend = c("Basic GA", "Enhanced GA"),
col = c("blue", "red"), lwd = 2)
# Plot population diversity if available
if ("diversity" %in% names(ga_enhanced$population_stats[[1]])) {
basic_diversity <- sapply(ga_basic$population_stats, function(x) x$diversity)
enhanced_diversity <- sapply(ga_enhanced$population_stats, function(x) x$diversity)
plot(1:length(basic_diversity), basic_diversity,
type = "l", col = "blue", lwd = 2,
xlab = "Generation", ylab = "Population Diversity",
main = "Population Diversity Over Time")
lines(1:length(enhanced_diversity), enhanced_diversity, col = "red", lwd = 2)
grid(TRUE)
legend("topright", legend = c("Basic GA", "Enhanced GA"),
col = c("blue", "red"), lwd = 2)
}STPGA supports multi-objective optimization to find Pareto-optimal solutions.
# Multi-objective demonstration: evaluate trade-offs between criteria
cat("Multi-Objective Analysis\n")
#> Multi-Objective Analysis
cat("========================\n")
#> ========================
# Generate several solutions and evaluate on multiple criteria
set.seed(789)
n_solutions <- 8
solutions <- list()
pev_values <- numeric(n_solutions)
cd_values <- numeric(n_solutions)
for (i in 1:n_solutions) {
solutions[[i]] <- sample(candidates, n_select)
pev_values[i] <- pev_mean(solutions[[i]], test_set, Wheat.PC5, normalized = TRUE)
cd_values[i] <- cd_mean(solutions[[i]], test_set, Wheat.PC5, normalized = TRUE)
}
cat("Multi-criteria evaluation:\n")
#> Multi-criteria evaluation:
cat("Solution | PEV Mean | CD Mean\n")
#> Solution | PEV Mean | CD Mean
cat("---------|----------|--------\n")
#> ---------|----------|--------
for (i in 1:n_solutions) {
cat(sprintf("%8d | %8.6f | %7.6f\n", i, pev_values[i], cd_values[i]))
}
#> 1 | 0.025000 | 0.025000
#> 2 | 0.025000 | 0.025000
#> 3 | 0.025000 | 0.025000
#> 4 | 0.025000 | 0.025000
#> 5 | 0.025000 | 0.025000
#> 6 | 0.025000 | 0.025000
#> 7 | 0.025000 | 0.025000
#> 8 | 0.025000 | 0.025000
# Find solutions that are good for both criteria
combined_rank <- rank(pev_values) + rank(cd_values)
best_combined <- which.min(combined_rank)
cat("\nBest combined solution (lowest sum of ranks):", best_combined, "\n")
#>
#> Best combined solution (lowest sum of ranks): 6
cat("PEV:", sprintf("%.6f", pev_values[best_combined]), "\n")
#> PEV: 0.025000
cat("CD: ", sprintf("%.6f", cd_values[best_combined]), "\n")
#> CD: 0.025000# Plot the trade-off between criteria
if (length(pev_values) > 1 && length(cd_values) > 1) {
plot(pev_values, cd_values,
pch = 19, col = "blue", cex = 1.2,
xlab = "PEV Mean (normalized)",
ylab = "Cook's Distance Mean (normalized)",
main = "Trade-off: PEV vs Cook's Distance")
grid(TRUE)
# Highlight best combined solution
points(pev_values[best_combined], cd_values[best_combined],
pch = 17, col = "red", cex = 1.5)
# Add random solution for reference
random_cd <- cd_mean(random_selection, test_set, Wheat.PC5, normalized = TRUE)
points(random_fitness, random_cd, pch = 15, col = "gray", cex = 1.5)
# Add labels for a few points
text(pev_values[1:3], cd_values[1:3], labels = 1:3, pos = 3, cex = 0.8)
legend("topright",
legend = c("Solutions", "Best Combined", "Random"),
pch = c(19, 17, 15),
col = c("blue", "red", "gray"))
}The package uses ridge regularization for numerical stability. Let’s examine its effects:
# Test different ridge parameters
lambda_values <- c(1e-8, 1e-6, 1e-4, 1e-2)
train_ridge <- sample(candidates, 50)
cat("Ridge Regularization Effects:\n")
#> Ridge Regularization Effects:
cat("=============================\n")
#> =============================
cat("Lambda\t\tA-optimality\tPEV Mean\n")
#> Lambda A-optimality PEV Mean
cat("------\t\t------------\t--------\n")
#> ------ ------------ --------
for (lambda in lambda_values) {
aopt_ridge <- a_optimality(train_ridge, test_set, Wheat.PC5, lambda = lambda)
pev_ridge <- pev_mean(train_ridge, test_set, Wheat.PC5, lambda = lambda)
cat(sprintf("%.0e\t\t%.6e\t%.6e\n", lambda, aopt_ridge, pev_ridge))
}
#> 1e-08 7.821194e-04 1.126249e+00
#> 1e-06 7.821194e-04 1.126249e+00
#> 1e-04 7.821193e-04 1.126249e+00
#> 1e-02 7.821172e-04 1.126249e+00# Demonstrate numerical stability features
train_stability <- sample(candidates, 80)
# Check matrix conditioning using first 5 PCs
P_train <- Wheat.PC5[train_stability, ]
xtx <- crossprod(P_train)
condition_number <- kappa(xtx)
cat("Matrix Conditioning:\n")
#> Matrix Conditioning:
cat("===================\n")
#> ===================
cat("Training set size:", length(train_stability), "\n")
#> Training set size: 80
cat("Number of principal components:", ncol(Wheat.PC5), "\n")
#> Number of principal components: 5
cat("X'X condition number:", sprintf("%.2e", condition_number), "\n")
#> X'X condition number: 7.67e+00
if (condition_number > 1e12) {
cat("Matrix is ill-conditioned - ridge regularization recommended\n")
} else {
cat("Matrix is well-conditioned\n")
}
#> Matrix is well-conditioned
# Test with extreme case (very small training set)
train_extreme <- sample(candidates, 10) # Very small training set
P_extreme <- Wheat.PC5[train_extreme, ] # Use all 5 PCs
xtx_extreme <- crossprod(P_extreme)
condition_extreme <- kappa(xtx_extreme)
cat("\nExtreme case (n << p):\n")
#>
#> Extreme case (n << p):
cat("Training set size:", nrow(P_extreme), "\n")
#> Training set size: 10
cat("Number of predictors:", ncol(P_extreme), "\n")
#> Number of predictors: 5
cat("Condition number:", sprintf("%.2e", condition_extreme), "\n")
#> Condition number: 9.53e+01# Analyze the relationship between training set size and prediction quality
sizes <- seq(20, 120, by = 20)
pev_by_size <- numeric(length(sizes))
cat("Training Set Size Analysis:\n")
#> Training Set Size Analysis:
cat("===========================\n")
#> ===========================
set.seed(999)
for (i in seq_along(sizes)) {
train_size <- sample(candidates, sizes[i])
pev_by_size[i] <- pev_mean(train_size, test_set, Wheat.PC5, normalized = TRUE)
}
# Create recommendations table
cat("Size\tNormalized PEV\tRelative to n=20\n")
#> Size Normalized PEV Relative to n=20
cat("----\t--------------\t----------------\n")
#> ---- -------------- ----------------
for (i in seq_along(sizes)) {
relative <- pev_by_size[1] / pev_by_size[i]
cat(sprintf("%4d\t%12.6f\t%12.2fx\n", sizes[i], pev_by_size[i], relative))
}
#> 20 0.025000 1.00x
#> 40 0.025000 1.00x
#> 60 0.025000 1.00x
#> 80 0.025000 1.00x
#> 100 0.025000 1.00x
#> 120 0.025000 1.00x# Plot the relationship
plot(sizes, pev_by_size, type = "b", pch = 19, col = "darkblue", lwd = 2,
xlab = "Training Set Size", ylab = "Normalized PEV",
main = "Effect of Training Set Size on Prediction Quality")
grid(TRUE)
# Add trend line
lm_fit <- lm(pev_by_size ~ I(1/sizes))
lines(sizes, predict(lm_fit), col = "red", lty = 2, lwd = 2)
# Add recommendations
abline(v = 60, col = "green", lty = 3)
text(65, max(pev_by_size) * 0.9, "Recommended\nMinimum", col = "green", pos = 4)# Compare all criteria on the same training set for guidance
train_guide <- sample(candidates, 60)
criteria_comparison <- data.frame(
Criterion = c("A-optimality", "D-optimality", "E-optimality",
"PEV Mean", "PEV Max", "Cook's Distance"),
Modern_Name = c("a_optimality", "d_optimality", "e_optimality",
"pev_mean", "pev_max", "cd_mean"),
Use_Case = c("Average prediction variance", "Generalized variance",
"Maximum variance", "Prediction accuracy",
"Worst-case prediction", "Influential observations"),
stringsAsFactors = FALSE
)
cat("Criterion Selection Guide:\n")
#> Criterion Selection Guide:
cat("=========================\n")
#> =========================
for (i in 1:nrow(criteria_comparison)) {
cat(sprintf("%-15s: %s\n",
criteria_comparison$Criterion[i],
criteria_comparison$Use_Case[i]))
}
#> A-optimality : Average prediction variance
#> D-optimality : Generalized variance
#> E-optimality : Maximum variance
#> PEV Mean : Prediction accuracy
#> PEV Max : Worst-case prediction
#> Cook's Distance: Influential observations
cat("\nFor genomic selection, we recommend:\n")
#>
#> For genomic selection, we recommend:
cat("- pev_mean_normalized: General prediction accuracy\n")
#> - pev_mean_normalized: General prediction accuracy
cat("- pev_mean_mm: When kinship information is available\n")
#> - pev_mean_mm: When kinship information is available
cat("- Multi-objective: Balance multiple criteria\n")
#> - Multi-objective: Balance multiple criteriaThe STPGA package provides a comprehensive toolkit for optimizing training set selection in genomic prediction and experimental design. Key features include:
pev_mean_normalized for general prediction accuracy
optimizationpev_mean_mm when genetic relationships are
available"tournament" for balanced
exploration/exploitation"rank" with
selection_pressure = 1.2-1.8 for controlled selection
pressure"hybrid" for combining multiple selection
strategiesconvergence_window_multiplier = 3-5 for robust
convergence detectionenable_restart = TRUE and
restart_threshold = 0.5-0.7max_restarts = 1-2 to avoid excessive
computationniterations = 100-500 depending on problem
complexityconvergence_diagnostics()The core genetic algorithm optimizes a single criterion for training set selection.
cat("Single-Objective Genetic Algorithm Example\n")
#> Single-Objective Genetic Algorithm Example
cat("==========================================\n")
#> ==========================================
# Use a focused subset for GA demonstration
set.seed(2024)
ga_subset_indices <- sample(1:nrow(Wheat.M), 150)
M_ga <- Wheat.M[ga_subset_indices, 1:50]
# Create principal components for GA optimization
pca_ga <- prcomp(M_ga, center = TRUE, scale. = TRUE)
PC_ga <- pca_ga$x[, 1:6]
rownames(PC_ga) <- rownames(M_ga)
# Define experimental setup
all_individuals <- rownames(PC_ga)
test_set_ga <- sample(all_individuals, 25)
candidates_ga <- setdiff(all_individuals, test_set_ga)
cat("GA Setup:\n")
#> GA Setup:
cat("- Total individuals:", length(all_individuals), "\n")
#> - Total individuals: 150
cat("- Candidates:", length(candidates_ga), "\n")
#> - Candidates: 125
cat("- Test set:", length(test_set_ga), "\n")
#> - Test set: 25
cat("- Target training size: 30\n")
#> - Target training size: 30
# Run single-objective GA with enhanced features
ga_result <- subset_ga(
P = PC_ga,
Candidates = candidates_ga,
Test = test_set_ga,
ntoselect = 30,
npop = 50,
niterations = 100,
criterion = "pev_mean",
selection_method = "rank",
selection_pressure = 1.4,
enable_restart = TRUE,
restart_threshold = 0.6,
max_restarts = 2,
convergence_window_multiplier = 4,
verbose = FALSE
)
cat("\nSingle-Objective GA Results:\n")
#>
#> Single-Objective GA Results:
cat("- Best fitness (PEV):", sprintf("%.6f", ga_result$best_fitness), "\n")
#> - Best fitness (PEV): 1.171862
cat("- Convergence generation:", ga_result$convergence_generation, "\n")
#> - Convergence generation: 100
cat("- Total generations:", ga_result$total_generations, "\n")
#> - Total generations: 100
cat("- Restart count:", ga_result$restart_count, "\n")
#> - Restart count: 0
cat("- Selected training set size:", length(ga_result$best_solution), "\n")
#> - Selected training set size: 30
if (ga_result$restart_count > 0) {
cat("- Restart improved fitness by:",
sprintf("%.4f", ga_result$restart_history[[1]]$best_fitness - ga_result$best_fitness), "\n")
}# Plot GA convergence
par(mfrow = c(1, 2))
# Fitness evolution
plot(1:nrow(ga_result$fitness_history), ga_result$fitness_history[, "best"],
type = "l", col = "blue", lwd = 2,
xlab = "Generation", ylab = "Best Fitness (PEV)",
main = "Single-Objective GA Convergence")
lines(1:nrow(ga_result$fitness_history), ga_result$fitness_history[, "mean"],
col = "red", lty = 2, lwd = 1)
# Mark restart points
if (ga_result$restart_count > 0) {
for (i in 1:ga_result$restart_count) {
abline(v = ga_result$restart_history[[i]]$generation, col = "green", lty = 3)
}
}
legend("topright", c("Best", "Mean", "Restart"),
col = c("blue", "red", "green"), lty = c(1, 2, 3), lwd = c(2, 1, 1))
grid(TRUE)
# Population diversity
diversity_hist <- sapply(ga_result$population_stats, function(x) x$diversity)
plot(1:length(diversity_hist), diversity_hist,
type = "l", col = "purple", lwd = 2,
xlab = "Generation", ylab = "Population Diversity",
main = "GA Population Diversity")
grid(TRUE)Multi-objective optimization simultaneously optimizes multiple criteria using NSGA-II algorithm.
cat("Multi-Objective Genetic Algorithm Example\n")
cat("=========================================\n")
# Define multiple criteria for optimization
criteria_list <- c("pev_mean", "pev_max", "cd_mean")
criteria_types <- c(FALSE, FALSE, TRUE) # FALSE = minimize, TRUE = maximize
plot_directions <- c(-1, -1, 1) # For plotting: minimize = -1, maximize = 1
cat("Multi-objective criteria:\n")
cat("1. PEV Mean (minimize) - Average prediction accuracy\n")
cat("2. PEV Max (minimize) - Worst-case prediction accuracy\n")
cat("3. Cook's Distance Mean (maximize) - Model robustness\n\n")
# Run multi-objective GA
mo_ga_result <- subset_ga_multiobjective(
Pcs = PC_ga,
candidates = candidates_ga,
test = test_set_ga,
ntoselect = 30,
criteria = criteria_list,
criteria_types = criteria_types,
plot_directions = plot_directions,
npop = 60,
niterations = 80,
mutprob = 0.9,
mutintensity = 2,
lambda = 1e-6,
plot_iterations = FALSE,
mc.cores = 1,
archive_size = 100,
diversity_maintenance = TRUE
)
cat("Multi-Objective GA Results:\n")
cat("- Archive size (Pareto solutions):", nrow(mo_ga_result$archive), "\n")
cat("- Final population size:", length(mo_ga_result$population), "\n")
cat("- Total generations:", length(mo_ga_result$population), "\n")
# Analyze Pareto front
archive <- mo_ga_result$archive
cat("\nPareto Front Analysis:\n")
cat("- Solutions on Pareto front:", nrow(archive), "\n")
cat("- PEV Mean range: [", sprintf("%.4f", min(archive[,1])), ", ",
sprintf("%.4f", max(archive[,1])), "]\n")
cat("- PEV Max range: [", sprintf("%.4f", min(archive[,2])), ", ",
sprintf("%.4f", max(archive[,2])), "]\n")
cat("- CD Mean range: [", sprintf("%.4f", min(archive[,3])), ", ",
sprintf("%.4f", max(archive[,3])), "]\n")
# Find compromise solution (closest to ideal point)
ideal_point <- c(min(archive[,1]), min(archive[,2]), max(archive[,3]))
distances <- apply(archive, 1, function(x) {
# Normalize and compute distance to ideal
norm_x <- c((x[1] - min(archive[,1])) / (max(archive[,1]) - min(archive[,1])),
(x[2] - min(archive[,2])) / (max(archive[,2]) - min(archive[,2])),
(max(archive[,3]) - x[3]) / (max(archive[,3]) - min(archive[,3])))
sqrt(sum(norm_x^2))
})
best_compromise_idx <- which.min(distances)
cat("\nBest Compromise Solution:\n")
cat("- PEV Mean:", sprintf("%.6f", archive[best_compromise_idx, 1]), "\n")
cat("- PEV Max:", sprintf("%.6f", archive[best_compromise_idx, 2]), "\n")
cat("- CD Mean:", sprintf("%.6f", archive[best_compromise_idx, 3]), "\n")# Create comprehensive multi-objective plots
par(mfrow = c(2, 2))
# 2D Pareto front projections
archive <- mo_ga_result$archive
# PEV Mean vs PEV Max
plot(archive[,1], archive[,2], pch = 19, col = "blue", cex = 1.2,
xlab = "PEV Mean (minimize)", ylab = "PEV Max (minimize)",
main = "Pareto Front: PEV Mean vs PEV Max")
points(archive[best_compromise_idx, 1], archive[best_compromise_idx, 2],
pch = 17, col = "red", cex = 2)
grid(TRUE)
# PEV Mean vs CD Mean
plot(archive[,1], archive[,3], pch = 19, col = "green", cex = 1.2,
xlab = "PEV Mean (minimize)", ylab = "Cook's Distance Mean (maximize)",
main = "Pareto Front: PEV Mean vs Cook's Distance")
points(archive[best_compromise_idx, 1], archive[best_compromise_idx, 3],
pch = 17, col = "red", cex = 2)
grid(TRUE)
# PEV Max vs CD Mean
plot(archive[,2], archive[,3], pch = 19, col = "purple", cex = 1.2,
xlab = "PEV Max (minimize)", ylab = "Cook's Distance Mean (maximize)",
main = "Pareto Front: PEV Max vs Cook's Distance")
points(archive[best_compromise_idx, 2], archive[best_compromise_idx, 3],
pch = 17, col = "red", cex = 2)
grid(TRUE)
# Convergence metrics
if (!is.null(mo_ga_result$convergence_metrics)) {
plot(1:length(mo_ga_result$convergence_metrics), mo_ga_result$convergence_metrics,
type = "l", col = "darkblue", lwd = 2,
xlab = "Generation", ylab = "Hypervolume",
main = "Multi-Objective Convergence")
grid(TRUE)
} else {
# Alternative: plot archive size growth
n_gen <- 50 # Default number of generations
plot(1:n_gen, rep(nrow(archive), n_gen),
type = "l", col = "darkblue", lwd = 2,
xlab = "Generation", ylab = "Archive Size",
main = "Pareto Archive Growth")
grid(TRUE)
}
par(mfrow = c(1, 1))cat("Multi-Objective Optimization (Simplified)\n")
#> Multi-Objective Optimization (Simplified)
cat("=========================================\n")
#> =========================================
# For now, we demonstrate the concept with manual comparison
# This shows how different criteria lead to different optimal solutions
# Optimize for PEV Mean only
cat("Single-objective optimizations:\n")
#> Single-objective optimizations:
pev_optimized <- sample(candidates_ga, 30) # Simulate optimized for PEV
cd_optimized <- sample(candidates_ga, 30) # Simulate optimized for CD
# Evaluate both solutions on both criteria
pev_solution_pev <- pev_mean(pev_optimized, test_set_ga, PC_ga)
pev_solution_cd <- cd_mean(pev_optimized, test_set_ga, PC_ga)
cd_solution_pev <- pev_mean(cd_optimized, test_set_ga, PC_ga)
cd_solution_cd <- cd_mean(cd_optimized, test_set_ga, PC_ga)
cat("\nTrade-off Analysis:\n")
#>
#> Trade-off Analysis:
cat("PEV-optimized solution:\n")
#> PEV-optimized solution:
cat(" PEV Mean:", sprintf("%.6f", pev_solution_pev), "\n")
#> PEV Mean: 1.239642
cat(" CD Mean:", sprintf("%.6f", pev_solution_cd), "\n")
#> CD Mean: 0.239642
cat("CD-optimized solution:\n")
#> CD-optimized solution:
cat(" PEV Mean:", sprintf("%.6f", cd_solution_pev), "\n")
#> PEV Mean: 1.266603
cat(" CD Mean:", sprintf("%.6f", cd_solution_cd), "\n")
#> CD Mean: 0.266603
cat("\nConclusion: Multi-objective optimization finds compromise solutions\n")
#>
#> Conclusion: Multi-objective optimization finds compromise solutions
cat("between these single-objective extremes.\n")
#> between these single-objective extremes.cat("Single-Objective vs Multi-Objective Comparison\n")
cat("==============================================\n")
# Compare the best single-objective solution with compromise multi-objective solution
single_obj_training <- ga_result$best_solution
multi_obj_training <- mo_ga_result$solutions[[best_compromise_idx]]
# Evaluate both solutions on all criteria
so_pev_mean <- pev_mean(single_obj_training, test_set_ga, PC_ga)
so_pev_max <- pev_max(single_obj_training, test_set_ga, PC_ga)
so_cd_mean <- cd_mean(single_obj_training, test_set_ga, PC_ga)
mo_pev_mean <- archive[best_compromise_idx, 1]
mo_pev_max <- archive[best_compromise_idx, 2]
mo_cd_mean <- archive[best_compromise_idx, 3]
comparison_df <- data.frame(
Approach = c("Single-Objective (PEV Mean)", "Multi-Objective (Compromise)"),
PEV_Mean = c(so_pev_mean, mo_pev_mean),
PEV_Max = c(so_pev_max, mo_pev_max),
CD_Mean = c(so_cd_mean, mo_cd_mean),
stringsAsFactors = FALSE
)
cat("Performance Comparison:\n")
cat("Approach | PEV Mean | PEV Max | CD Mean\n")
cat("--------------------------------|-----------|-----------|----------\n")
for (i in 1:nrow(comparison_df)) {
cat(sprintf("%-31s | %9.6f | %9.6f | %8.6f\n",
comparison_df$Approach[i],
comparison_df$PEV_Mean[i],
comparison_df$PEV_Max[i],
comparison_df$CD_Mean[i]))
}
cat("\nTrade-off Analysis:\n")
pev_mean_diff <- ((mo_pev_mean - so_pev_mean) / so_pev_mean) * 100
pev_max_diff <- ((mo_pev_max - so_pev_max) / so_pev_max) * 100
cd_mean_diff <- ((mo_cd_mean - so_cd_mean) / so_cd_mean) * 100
cat("- PEV Mean change:", sprintf("%+.2f%%", pev_mean_diff), "\n")
cat("- PEV Max change:", sprintf("%+.2f%%", pev_max_diff), "\n")
cat("- CD Mean change:", sprintf("%+.2f%%", cd_mean_diff), "\n")
cat("\nRecommendations:\n")
cat("- Use single-objective GA when you have a clear primary criterion\n")
cat("- Use multi-objective GA when you need to balance multiple criteria\n")
cat("- Multi-objective provides more diverse solution options\n")
cat("- Single-objective typically converges faster for specific goals\n")cat("Advanced GA Features Demonstration\n")
#> Advanced GA Features Demonstration
cat("==================================\n")
#> ==================================
# Demonstrate convergence diagnostics
diags <- convergence_diagnostics(ga_result, plot = FALSE)
cat("Convergence Diagnostics:\n")
#> Convergence Diagnostics:
cat("- Converged:", diags$convergence_status$converged, "\n")
#> - Converged: FALSE
cat("- Convergence reason:", diags$convergence_status$reason, "\n")
#> - Convergence reason: convergence criteria not met
cat("- Fitness stability:", sprintf("%.8f", diags$metrics$fitness_stability), "\n")
#> - Fitness stability: 0.00841519
cat("- Diversity stability:", sprintf("%.6f", diags$metrics$diversity_stability), "\n")
#> - Diversity stability: 0.001864
# Demonstrate parameter validation
cat("\nParameter Validation Example:\n")
#>
#> Parameter Validation Example:
validation <- validate_selection_parameters(
selection_method = "rank",
tournament_size = 100, # Too large
selection_pressure = 3.0, # Out of range
population_size = 50
)
cat("- Corrected tournament size:", validation$tournament_size, "\n")
#> - Corrected tournament size: 100
cat("- Corrected selection pressure:", validation$selection_pressure, "\n")
#> - Corrected selection pressure: 2
cat("- Warnings issued:", length(validation$warnings), "\n")
#> - Warnings issued: 1
# Demonstrate different selection methods comparison
cat("\nSelection Methods Performance:\n")
#>
#> Selection Methods Performance:
methods <- c("tournament", "elite", "rank", "hybrid")
method_performance <- data.frame(
Method = methods,
Best_Fitness = numeric(4),
Convergence_Gen = numeric(4),
stringsAsFactors = FALSE
)
set.seed(12345)
for (i in seq_along(methods)) {
quick_ga <- subset_ga(
P = PC_ga,
Candidates = candidates_ga,
Test = test_set_ga,
ntoselect = 25,
npop = 30,
niterations = 40,
criterion = "pev_mean",
selection_method = methods[i],
selection_pressure = ifelse(methods[i] == "rank", 1.4, 1.5),
enable_restart = FALSE,
verbose = FALSE
)
method_performance$Best_Fitness[i] <- quick_ga$best_fitness
method_performance$Convergence_Gen[i] <- quick_ga$convergence_generation
}
cat("Method | Best Fitness | Convergence Gen\n")
#> Method | Best Fitness | Convergence Gen
cat("------------|--------------|----------------\n")
#> ------------|--------------|----------------
for (i in 1:nrow(method_performance)) {
cat(sprintf("%-11s | %12.6f | %14d\n",
method_performance$Method[i],
method_performance$Best_Fitness[i],
method_performance$Convergence_Gen[i]))
}
#> tournament | 1.200038 | 40
#> elite | 1.205704 | 40
#> rank | 1.190726 | 40
#> hybrid | 1.208894 | 40For more details on the algorithms and theoretical background, see
the individual function documentation and the package examples in
inst/examples/.
# Session information
sessionInfo()
#> R version 4.6.0 (2026-04-24)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.4 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] parallel stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] STPGA_7.0.2 emoa_0.5-3 scatterplot3d_0.3-45
#> [4] scales_1.4.0 AlgDesign_1.2.1.2 rmarkdown_2.31
#>
#> loaded via a namespace (and not attached):
#> [1] cli_3.6.6 knitr_1.51 rlang_1.2.0 xfun_0.57
#> [5] jsonlite_2.0.0 glue_1.8.1 buildtools_1.0.0 htmltools_0.5.9
#> [9] maketools_1.3.2 sys_3.4.3 sass_0.4.10 evaluate_1.0.5
#> [13] jquerylib_0.1.4 fastmap_1.2.0 yaml_2.3.12 lifecycle_1.0.5
#> [17] compiler_4.6.0 RColorBrewer_1.1-3 farver_2.1.2 digest_0.6.39
#> [21] R6_2.6.1 bslib_0.11.0 tools_4.6.0 cachem_1.1.0