OpenMP should work OK for your application but to make best use of it you have to inform it of a few things and it does not appear to me that you did this. To be specific, you have to let it know what variables are local to each thread and what ones are shared. This is done to a certain degree automatically but I usually obtain better results when I do it myself. For your code, I would guess it needs to look something like this :
#pragma omp parallel default( none ) \
private( XCO, run, j, t, k, r, sum_coverage, sum_co2_rate, data_points, current_coverage ) \
shared( i, XCO_values, num_simulations, co_coverages_mean, co_coverages_std_dev, num_runs )
#pragma omp for
for (int i = 0; i < num_simulations; i++) {
double XCO = i / (double)(num_simulations - 1); XCO_values[i] = XCO;
for (int run = 0; run < num_runs; run++) {
for (int j = 0; j < 64 * 64; j++) {
lattice[j] = EMPTY;
}
int co2_count = 0, n2_count = 0;
for (int t = 0; t < 30000; t++) {
for (int k = 0; k < 64 * 64; k++) {
double r = ldexp(pcg32_random_r(&local_rng), -32);
if (r < XCO) {
double r1 = ldexp(pcg32_random_r(&local_rng), -32);
add_CO(lattice, &co2_count, COADSORPTION_PROBABILITY, r1);
} else {
add_NO_dissociate(lattice, &co2_count, &n2_count);
}
}
}
Coverage sum_coverage = {0};
double sum_co2_rate = 0, sum_n2_rate = 0;
int data_points = 0;
for (int t = 0; t < 50000; t++) {
for (int k = 0; k < 64 * 64; k++) {
double r = ldexp(pcg32_random_r(&local_rng), -32);
if (r < XCO) {
double r1 = ldexp(pcg32_random_r(&local_rng), -32);
add_CO(lattice, &co2_count, COADSORPTION_PROBABILITY, r1);
} else {
add_NO_dissociate(lattice, &co2_count, &n2_count);
}
}
if ((t+1) % 10 == 0) { Coverage current_coverage = calculate_coverage(lattice);
sum_coverage.CO_coverage += current_coverage.CO_coverage;
sum_coverage.O_coverage += current_coverage.O_coverage;
sum_coverage.N_coverage += current_coverage.N_coverage;
sum_co2_rate += (double)co2_count / (10 * 64 * 64);
sum_n2_rate += (double)n2_count / (10 * 64 * 64);
co2_count = 0; n2_count = 0;
data_points++;
}
}
co_coverages[run] = sum_coverage.CO_coverage / data_points;
o_coverages[run] = sum_coverage.O_coverage / data_points;
n_coverages[run] = sum_coverage.N_coverage / data_points;
co2_rates[run] = sum_co2_rate / data_points;
n2_rates[run] = sum_n2_rate / data_points;
}
co_coverages_mean[i] = mean(co_coverages, num_runs);
co_coverages_std_dev[i] = std_dev(co_coverages, num_runs);
}
I am not sure I covered them all but I am close. As I mentioned, a private variable means each thread gets its own copy and shared variable means it is shared between all threads. The thing is, if OpenMP thinks a variable is shared it will put locks around accesses of it so if you have private variables that it thinks are shared it will hurt performance. This is why it is important to flag which variables are private to a thread and which ones are shared between threads.