Rebuilding dont help and there is no pragma but i have a source
KERNEL FireLayer(CUDA_FLOATING_TYPE * inputs, CUDA_FLOATING_TYPE * weights, CUDA_FLOATING_TYPE * m, int mOffset, int totalNeuronsWithSelectiveActivation, CUDA_FLOATING_TYPE * outputs);
KERNEL FireOutputLayer(CUDA_FLOATING_TYPE * inputs, CUDA_FLOATING_TYPE * weights, CUDA_FLOATING_TYPE * m, int mOffset, int totalNeuronsWithSelectiveActivation, CUDA_FLOATING_TYPE * desiredOutputs, CUDA_FLOATING_TYPE * outputs, CUDA_FLOATING_TYPE * localGradient, CUDA_FLOATING_TYPE * rms, CUDA_FLOATING_TYPE * localGradientSpaceNet);
void KernelFireLayer(cudaStream_t stream, dim3 & gridSize, int blockSize, CUDA_FLOATING_TYPE * inputs, CUDA_FLOATING_TYPE * weights, CUDA_FLOATING_TYPE * m, int mOffset, int totalNeuronsWithSelectiveActivation, CUDA_FLOATING_TYPE * outputs, int numInputs);
void KernelFireOutputLayer(cudaStream_t stream, dim3 & gridSize, int blockSize, CUDA_FLOATING_TYPE * inputs, CUDA_FLOATING_TYPE * weights, CUDA_FLOATING_TYPE * m, int mOffset, int totalNeuronsWithSelectiveActivation, CUDA_FLOATING_TYPE * desiredOutputs, CUDA_FLOATING_TYPE * outputs, CUDA_FLOATING_TYPE * localGradient, CUDA_FLOATING_TYPE * rms, CUDA_FLOATING_TYPE * localGradientSpaceNet, int numInputs);
and bug
1>CudaMultipleBackPropagation.obj : error LNK2019: unresolved external symbol "void __cdecl FireLayer__entry(float *,float *,float *,int,int,float *)" (?FireLayer__entry@@YAXPAM00HH0@Z) referenced in function "public: void __thiscall CudaMultipleBackPropagation::DeviceLayer::Fire(int)" (?Fire@DeviceLayer@CudaMultipleBackPropagation@@QAEXH@Z)
1>CudaMultipleBackPropagation.obj : error LNK2019: unresolved external symbol "void __cdecl KernelFireLayer(int,struct dim3 &,int,float *,float *,float *,int,int,float *,int)" (?KernelFireLayer@@YAXHAAUdim3@@HPAM11HH1H@Z) referenced in function "public: void __thiscall CudaMultipleBackPropagation::DeviceLayer::Fire(int)" (?Fire@DeviceLayer@CudaMultipleBackPropagation@@QAEXH@Z)
So it complains about this call: CudaMultipleBackPropagation::DeviceLayer::Fire(int)
it is defined like this
void Fire(cudaStream_t stream);
Than it is mapped to FireLayer. So does it mean that the arguments of CudaMultipleBackPropagation::DeviceLayer::Fire so cudaStream_t stream dont map
to arguments of FireLayer ?? Why compilation passes than ??? What does it mean __entry after FireLayer ??
Whole class
#include "../cuda.h"
#include "../MultipleBackPropagation.h"
#include "../../Common/CUDA/CudaDefinitions.h"
#include "../../Common/CUDA/Arrays/DeviceArray.h"
#include "../../Common/CUDA/Arrays/HostArray.h"
class CudaMultipleBackPropagation {
private:
class DeviceLayer {
friend class CudaMultipleBackPropagation;
private:
static int neuronsWithSelectiveActivation;
int patterns;
int neurons;
int inputs;
int inputsWithoutBias;
int connections;
DeviceArray<CUDA_FLOATING_TYPE> weights;
DeviceArray<CUDA_FLOATING_TYPE> bestWeights;
DeviceArray<CUDA_FLOATING_TYPE> learnRate;
DeviceArray<CUDA_FLOATING_TYPE> lastDelta;
DeviceArray<CUDA_FLOATING_TYPE> lastDeltaWithoutLearningMomentum;
DeviceArray<CUDA_FLOATING_TYPE> outputs;
DeviceArray<CUDA_FLOATING_TYPE> localGradient;
CUDA_FLOATING_TYPE * inputValues;
CUDA_FLOATING_TYPE * desOutputs;
CUDA_FLOATING_TYPE * m;
int mOffset;
CUDA_FLOATING_TYPE * lgSpaceNet;
CUDA_FLOATING_TYPE * rms;
dim3 dimNeuronsPatterns;
dim3 dimInputsNeurons;
dim3 dimOutputsNeurons;
int inputsBlockSize;
int sharedMemFire;
int sharedMemGradients;
bool isOutputLayer;
public:
DeviceLayer(HostArray<CUDA_FLOATING_TYPE> & hweights, HostArray<CUDA_FLOATING_TYPE> & hlearnRate, HostArray<CUDA_FLOATING_TYPE> & hlastDelta, HostArray<CUDA_FLOATING_TYPE> & hlastDeltaWithoutLearningMomentum, DeviceArray<CUDA_FLOATING_TYPE> * layerInputs, int inputs, int neurons, int nextLayerNeurons, int patterns, CUDA_FLOATING_TYPE * m, int mOffset, CUDA_FLOATING_TYPE * lgSpaceNet) : weights(hweights), learnRate(hlearnRate), lastDelta(hlastDelta), lastDeltaWithoutLearningMomentum(hlastDeltaWithoutLearningMomentum), outputs(neurons * patterns), localGradient(neurons * patterns), dimNeuronsPatterns(neurons, patterns), dimInputsNeurons(inputs, neurons), bestWeights(hweights.Lenght()), dimOutputsNeurons(nextLayerNeurons, neurons) {
connections = hweights.Lenght();
this->m = m;
this->mOffset = mOffset;
this->lgSpaceNet = lgSpaceNet;
this->inputs = inputs;
this->neurons = neurons;
this->patterns = patterns;
inputsWithoutBias = inputs - 1;
inputsBlockSize = 1;
while(inputsBlockSize < MAX_THREADS_PER_BLOCK && inputsBlockSize < inputs) inputsBlockSize <<= 1;
sharedMemFire = weights.Lenght() * sizeof(CUDA_FLOATING_TYPE);
sharedMemGradients = (nextLayerNeurons * (neurons + 1)) * sizeof(CUDA_FLOATING_TYPE);
inputValues = layerInputs->Pointer();
desOutputs = rms = NULL;
isOutputLayer = false;
}
void DefineOutputLayer(CudaMultipleBackPropagation * cmbp) {
isOutputLayer = true;
desOutputs = cmbp->d_desOutputs->Pointer();
rms = cmbp->d_rms->Pointer();
sharedMemFire += neurons * sizeof(CUDA_FLOATING_TYPE);
}
void Fire(cudaStream_t stream);
void CalculateLocalGradient(cudaStream_t stream, CUDA_FLOATING_TYPE * rms, CUDA_FLOATING_TYPE * bestRMS, CUDA_FLOATING_TYPE rmsGrowToApplyRobustLearning, DeviceLayer * nextLayer);
void CorrectWeights(cudaStream_t stream, int patternsBlockSize, CUDA_FLOATING_TYPE * rms, CUDA_FLOATING_TYPE * bestRMS, CUDA_FLOATING_TYPE rmsGrowToApplyRobustLearning, CUDA_FLOATING_TYPE robustFactor, CUDA_FLOATING_TYPE momentum);
};
List<DeviceLayer> layersSpaceNetwork;
List<DeviceLayer> layers;
Pointer< DeviceArray<CUDA_FLOATING_TYPE> > d_inputs;
Pointer< DeviceArray<CUDA_FLOATING_TYPE> > d_desOutputs;
Pointer< DeviceArray<CUDA_FLOATING_TYPE> > d_rms;
Pointer< DeviceArray<CUDA_FLOATING_TYPE> > d_bestRMS;
DeviceArray<CUDA_FLOATING_TYPE> d_rmsOut;
CUDA_FLOATING_TYPE * rms;
Pointer< DeviceArray<int> > d_numberWeightsLayer;
Pointer< DeviceArray<CUDA_FLOATING_TYPE *> > d_weightsLayers;
Pointer< DeviceArray<CUDA_FLOATING_TYPE *> > d_bestWeightsLayers;
Pointer< DeviceArray<CUDA_FLOATING_TYPE *> > d_learnRatesLayers;
Pointer< DeviceArray<CUDA_FLOATING_TYPE *> > d_lastDeltaLayers;
Pointer< DeviceArray<CUDA_FLOATING_TYPE *> > d_lastDeltaWithoutLMlayers;
cudaStream_t streamKernels;
cudaStream_t streamRMS;
int layersRobustTraining;
int maxNumberWeigths;
int patternsBlockSize;
CUDA_FLOATING_TYPE numberPatternsNeurons;
void CreateDeviceLayers(List<Layer> & hostLayers, List<DeviceLayer> & deviceLayers, int patterns, int * neuronsWithSelectiveActivation);
void CopyLayersToHost(List<DeviceLayer> & deviceLayers, List<Layer> & hostLayers);
public:
CudaMultipleBackPropagation(Pointer <MultipleBackPropagation> & mbp, Matrix<double> & trainInputPatterns, Matrix<double> & trainDesiredOutputPatterns);
~CudaMultipleBackPropagation();
void Train(double momentum, double spaceMomentum, bool robustLearning, double rmsGrowToApplyRobustLearning, double robustFactor);
CUDA_FLOATING_TYPE GetRMS() {
return *rms;
}
void CopyNetworkHost(Pointer <MultipleBackPropagation> & mbp);
};
#endif
#include "CudaMultipleBackPropagation.h"
#include "MBPkernels.h"
int CudaMultipleBackPropagation::DeviceLayer::neuronsWithSelectiveActivation = 0;
void CudaMultipleBackPropagation::DeviceLayer::Fire(cudaStream_t stream) {
if (isOutputLayer) {
if(connections > MAX_THREADS_PER_BLOCK) {
KernelFireOutputLayer(stream, dimNeuronsPatterns, inputsBlockSize, inputValues, weights.Pointer(), m, mOffset, neuronsWithSelectiveActivation, desOutputs, outputs.Pointer(), localGradient.Pointer(), rms, lgSpaceNet, inputsWithoutBias);
} else {
FireOutputLayer<<<patterns, dimInputsNeurons, sharedMemFire, stream>>>(inputValues, weights.Pointer(), m, mOffset, neuronsWithSelectiveActivation, desOutputs, outputs.Pointer(), localGradient.Pointer(), rms, lgSpaceNet);
}
} else {
if(connections > MAX_THREADS_PER_BLOCK) {
KernelFireLayer(stream, dimNeuronsPatterns, inputsBlockSize, inputValues, weights.Pointer(), m, mOffset, neuronsWithSelectiveActivation, outputs.Pointer(), inputsWithoutBias);
} else {
FireLayer<<<patterns, dimInputsNeurons, sharedMemFire, stream>>>(inputValues, weights.Pointer(), m, mOffset, neuronsWithSelectiveActivation, outputs.Pointer());
}
}
}
void CudaMultipleBackPropagation::DeviceLayer::CalculateLocalGradient(cudaStream_t stream, CUDA_FLOATING_TYPE * rms, CUDA_FLOATING_TYPE * bestRMS, CUDA_FLOATING_TYPE rmsGrowToApplyRobustLearning, DeviceLayer * nextLayer) {
::CalculateLocalGradient<<<patterns, dimOutputsNeurons, sharedMemGradients, stream>>>(rms, bestRMS, rmsGrowToApplyRobustLearning, outputs.Pointer(), nextLayer->weights.Pointer(), m, mOffset, neuronsWithSelectiveActivation, nextLayer->localGradient.Pointer(), localGradient.Pointer(), lgSpaceNet);
}
void CudaMultipleBackPropagation::DeviceLayer::CorrectWeights(cudaStream_t stream, int patternsBlockSize, CUDA_FLOATING_TYPE * rms, CUDA_FLOATING_TYPE * bestRMS, CUDA_FLOATING_TYPE rmsGrowToApplyRobustLearning, CUDA_FLOATING_TYPE robustFactor, CUDA_FLOATING_TYPE momentum) {
KernelCorrectLayerWeights(stream, dimInputsNeurons, patternsBlockSize, rms, bestRMS, rmsGrowToApplyRobustLearning, inputValues, localGradient.Pointer(), weights.Pointer(), learnRate.Pointer(), lastDeltaWithoutLearningMomentum.Pointer(), lastDelta.Pointer(), (CUDA_FLOATING_TYPE) Connection::u, (CUDA_FLOATING_TYPE) Connection::d, robustFactor, momentum, patterns);
}
void CudaMultipleBackPropagation::CreateDeviceLayers(List<Layer> & hostLayers, List<DeviceLayer> & deviceLayers, int patterns, int * neuronsWithSelectiveActivation) {
Layer * l = hostLayers.First();
int inputsWithoutBias = l->neurons.Lenght();
DeviceArray<CUDA_FLOATING_TYPE> * layerInputs = d_inputs;
DeviceLayer * outputLayerSpaceNetwork = layersSpaceNetwork.Last();
CUDA_FLOATING_TYPE * m = (neuronsWithSelectiveActivation == NULL) ? NULL : outputLayerSpaceNetwork->outputs.Pointer();
CUDA_FLOATING_TYPE * lgSpaceNet = (neuronsWithSelectiveActivation == NULL) ? NULL : outputLayerSpaceNetwork->localGradient.Pointer();
int mOffset = 0;
Layer * nextLayer = hostLayers.Next();
for (int ln = 1; (l = nextLayer) != NULL; ln++) {
int neurons = l->neurons.Lenght();
int inputs = inputsWithoutBias + 1;
int connections = inputs * neurons;
if (connections > maxNumberWeigths) maxNumberWeigths = connections;
HostArray<CUDA_FLOATING_TYPE> weights(connections);
HostArray<CUDA_FLOATING_TYPE> learningRate(connections);
HostArray<CUDA_FLOATING_TYPE> lDelta(connections);
HostArray<CUDA_FLOATING_TYPE> lastDeltaWithoutLearningMomentum(connections);
int w = 0;
for(NeuronWithInputConnections * n = static_cast<NeuronWithInputConnections *> (l->neurons.First()); n != NULL; n = static_cast<NeuronWithInputConnections *> (l->neurons.Next())) {
for(Connection * c = n->inputs.First(); c != NULL; c = n->inputs.Next()) {
weights[w] = (CUDA_FLOATING_TYPE) c->weight;
learningRate[w] = (CUDA_FLOATING_TYPE) c->learningRate;
lDelta[w] = (CUDA_FLOATING_TYPE) c->delta;
lastDeltaWithoutLearningMomentum[w] = (CUDA_FLOATING_TYPE) c->lastDeltaWithoutLearningMomentum;
w++;
}
}
int numberNeuronsWithSelectiveActivation = (m == NULL) ? 0 : neuronsWithSelectiveActivation[ln];
CUDA_FLOATING_TYPE * ml = (numberNeuronsWithSelectiveActivation) ? m : NULL;
CUDA_FLOATING_TYPE * lgSpaceNetl = (numberNeuronsWithSelectiveActivation) ? lgSpaceNet : NULL;
nextLayer = hostLayers.Next();
int nextLayerNeurons = (nextLayer == NULL) ? 0 : nextLayer->neurons.Lenght();
DeviceLayer * dl = new DeviceLayer(weights, learningRate, lDelta, lastDeltaWithoutLearningMomentum, layerInputs, inputs, neurons, nextLayerNeurons, patterns, ml, mOffset, lgSpaceNetl);
deviceLayers.Add(dl);
mOffset += numberNeuronsWithSelectiveActivation;
layerInputs = &(dl->outputs);
inputsWithoutBias = neurons;
}
}
CudaMultipleBackPropagation::CudaMultipleBackPropagation(Pointer <MultipleBackPropagation> & mbp, Matrix<double> & trainInputPatterns, Matrix<double> & trainDesiredOutputPatterns) : d_rmsOut(1) {
int patterns = trainInputPatterns.Rows();
int ninputs = mbp->Inputs();
int noutputs = mbp->Outputs();
HostArray<CUDA_FLOATING_TYPE> inputs(ninputs * patterns);
HostArray<CUDA_FLOATING_TYPE> desiredOutputs(noutputs * patterns);
for(int p = 0; p < patterns; p++) {
for (int i = 0; i < ninputs; i++) inputs[p * ninputs + i] = (CUDA_FLOATING_TYPE) trainInputPatterns[p][i];
for (int o = 0; o < noutputs; o++) desiredOutputs[p * noutputs + o] = (CUDA_FLOATING_TYPE) trainDesiredOutputPatterns[p][o];
}
d_inputs = new DeviceArray<CUDA_FLOATING_TYPE>(inputs);
d_desOutputs = new DeviceArray<CUDA_FLOATING_TYPE>(desiredOutputs);
maxNumberWeigths = 0;
int * neuronsWithSelectiveActivation = NULL;
if (!mbp->spaceNetwork.IsNull()) {
CreateDeviceLayers(mbp->spaceNetwork->layers, layersSpaceNetwork, patterns, NULL);
neuronsWithSelectiveActivation = mbp->neuronsWithSelectiveActivation.Pointer();
DeviceLayer::neuronsWithSelectiveActivation = layersSpaceNetwork.Last()->neurons;
}
CreateDeviceLayers(mbp->layers, layers, patterns, neuronsWithSelectiveActivation);
DeviceLayer * dlOut = layers.Last();
layersRobustTraining = layersSpaceNetwork.Lenght() + layers.Lenght();
HostArray<int> numberWeightsLayer(layersRobustTraining);
HostArray<CUDA_FLOATING_TYPE *> weightsLayers(layersRobustTraining);
HostArray<CUDA_FLOATING_TYPE *> bestWeightsLayers(layersRobustTraining);
HostArray<CUDA_FLOATING_TYPE *> learnRatesLayers(layersRobustTraining);
HostArray<CUDA_FLOATING_TYPE *> lastDeltaLayers(layersRobustTraining);
HostArray<CUDA_FLOATING_TYPE *> lastDeltaWithoutLMlayers(layersRobustTraining);
int ll = 0;
for(DeviceLayer * l = layersSpaceNetwork.First(); l != NULL; l = layersSpaceNetwork.Next()) {
numberWeightsLayer[ll] = l->connections;
weightsLayers[ll] = l->weights.Pointer();
bestWeightsLayers[ll] = l->bestWeights.Pointer();
learnRatesLayers[ll] = l->learnRate.Pointer();
lastDeltaLayers[ll] = l->lastDelta.Pointer();
lastDeltaWithoutLMlayers[ll] = l->lastDeltaWithoutLearningMomentum.Pointer();
ll++;
}
for(DeviceLayer * l = layers.First(); l != NULL; l = layers.Next()) {
numberWeightsLayer[ll] = l->connections;
weightsLayers[ll] = l->weights.Pointer();
bestWeightsLayers[ll] = l->bestWeights.Pointer();
learnRatesLayers[ll] = l->learnRate.Pointer();
lastDeltaLayers[ll] = l->lastDelta.Pointer();
lastDeltaWithoutLMlayers[ll] = l->lastDeltaWithoutLearningMomentum.Pointer();
ll++;
}
d_numberWeightsLayer = new DeviceArray<int>(numberWeightsLayer);
d_weightsLayers = new DeviceArray<CUDA_FLOATING_TYPE *>(weightsLayers);
d_bestWeightsLayers = new DeviceArray<CUDA_FLOATING_TYPE *>(bestWeightsLayers);
d_learnRatesLayers = new DeviceArray<CUDA_FLOATING_TYPE *>(learnRatesLayers);
d_lastDeltaLayers = new DeviceArray<CUDA_FLOATING_TYPE *>(lastDeltaLayers);
d_lastDeltaWithoutLMlayers = new DeviceArray<CUDA_FLOATING_TYPE *>(lastDeltaWithoutLMlayers);
int sizeRMSvector = (dlOut->connections > MAX_THREADS_PER_BLOCK) ? patterns * dlOut->neurons : patterns;
d_rms = new DeviceArray<CUDA_FLOATING_TYPE>(sizeRMSvector);
dlOut->DefineOutputLayer(this);
HostArray<CUDA_FLOATING_TYPE> h_bestRMS(1);
h_bestRMS[0] = (patterns * CUDA_VALUE(3.0));
d_bestRMS = new DeviceArray<CUDA_FLOATING_TYPE>(h_bestRMS);
cudaMallocHost((void**) &rms, sizeof(CUDA_FLOATING_TYPE));
*rms = CUDA_VALUE(1.0);
patternsBlockSize = 1;
while(patternsBlockSize < MAX_THREADS_PER_BLOCK && patternsBlockSize < patterns) patternsBlockSize <<= 1;
numberPatternsNeurons = (CUDA_FLOATING_TYPE) patterns * (CUDA_FLOATING_TYPE) dlOut->neurons;
cudaStreamCreate(&streamKernels);
cudaStreamCreate(&streamRMS);
}
CudaMultipleBackPropagation::~CudaMultipleBackPropagation() {
cudaStreamDestroy(streamKernels);
cudaStreamDestroy(streamRMS);
*rms = CUDA_VALUE(1.0);
cudaFreeHost(rms);
}
void CudaMultipleBackPropagation::Train(double momentum, double spaceMomentum, bool robustLearning, double rmsGrowToApplyRobustLearning, double robustFactor) {
for(DeviceLayer * l = layersSpaceNetwork.First(); l != NULL; l = layersSpaceNetwork.Next()) l->Fire(streamKernels);
for(DeviceLayer * l = layers.First(); l != NULL; l = layers.Next()) l->Fire(streamKernels);
if (robustLearning) {
KernelCalculateRMS(streamKernels, patternsBlockSize, d_rms->Pointer(), d_rmsOut.Pointer(), d_rms->Lenght(), numberPatternsNeurons);
if (cudaStreamQuery(streamRMS) == cudaSuccess) cudaMemcpyAsync(rms, d_rmsOut.Pointer(), sizeof(CUDA_FLOATING_TYPE), cudaMemcpyDeviceToHost, streamRMS);
RobustLearning<<<1, maxNumberWeigths, 0, streamKernels>>>(d_rmsOut.Pointer(), d_bestRMS->Pointer(), (CUDA_FLOATING_TYPE) rmsGrowToApplyRobustLearning, layersRobustTraining, d_numberWeightsLayer->Pointer(), d_weightsLayers->Pointer(), d_bestWeightsLayers->Pointer(), d_learnRatesLayers->Pointer(), robustFactor, d_lastDeltaWithoutLMlayers->Pointer(), d_lastDeltaLayers->Pointer());
} else {
if (cudaStreamQuery(streamRMS) == cudaSuccess) {
KernelCalculateRMS(streamRMS, patternsBlockSize, d_rms->Pointer(), d_rmsOut.Pointer(), d_rms->Lenght(), numberPatternsNeurons);
cudaMemcpyAsync(rms, d_rmsOut.Pointer(), sizeof(CUDA_FLOATING_TYPE), cudaMemcpyDeviceToHost, streamRMS);
}
}
CUDA_FLOATING_TYPE * rms = (robustLearning) ? d_rmsOut.Pointer() : NULL;
CUDA_FLOATING_TYPE * bestRMS = (robustLearning) ? d_bestRMS->Pointer() : NULL;
DeviceLayer * nextLayer = layers.Last();
for(DeviceLayer * l = layers.Previous(); l != NULL; l = layers.Previous()) {
l->CalculateLocalGradient(streamKernels, rms, bestRMS, (CUDA_FLOATING_TYPE) rmsGrowToApplyRobustLearning, nextLayer);
nextLayer = l;
}
nextLayer = layersSpaceNetwork.Last();
for(DeviceLayer * l = layersSpaceNetwork.Previous(); l != NULL; l = layersSpaceNetwork.Previous()) {
l->CalculateLocalGradient(streamKernels, rms, bestRMS, (CUDA_FLOATING_TYPE) rmsGrowToApplyRobustLearning, nextLayer);
nextLayer = l;
}
for(DeviceLayer * l = layers.Last(); l != NULL; l = layers.Previous()) l->CorrectWeights(streamKernels, patternsBlockSize, rms, bestRMS, rmsGrowToApplyRobustLearning, robustFactor, momentum);
for(DeviceLayer * l = layersSpaceNetwork.Last(); l != NULL; l = layersSpaceNetwork.Previous()) l->CorrectWeights(streamKernels, patternsBlockSize, rms, bestRMS, rmsGrowToApplyRobustLearning, robustFactor, spaceMomentum);
}
void CudaMultipleBackPropagation::CopyLayersToHost(List<DeviceLayer> & deviceLayers, List<Layer> & hostLayers) {
hostLayers.First();
for(DeviceLayer * l = deviceLayers.First(); l != NULL; l = layers.Next()) {
Layer * hl = hostLayers.Next();
HostArray<CUDA_FLOATING_TYPE> dweights(l->weights);
HostArray<CUDA_FLOATING_TYPE> dlearnRate(l->learnRate);
HostArray<CUDA_FLOATING_TYPE> dlastDelta(l->lastDelta);
HostArray<CUDA_FLOATING_TYPE> dlastDeltaWithoutLearningMomentum(l->lastDeltaWithoutLearningMomentum);
int w = 0;
for(NeuronWithInputConnections * n = static_cast<NeuronWithInputConnections *> (hl->neurons.First()); n != NULL; n = static_cast<NeuronWithInputConnections *> (hl->neurons.Next())) {
for(Connection * c = n->inputs.First(); c != NULL; c = n->inputs.Next()) {
c->weight = dweights[w];
c->learningRate = dlearnRate[w];
c->delta = dlastDelta[w];
c->lastDeltaWithoutLearningMomentum = dlastDeltaWithoutLearningMomentum[w];
w++;
}
}
}
}
void CudaMultipleBackPropagation::CopyNetworkHost(Pointer <MultipleBackPropagation> & mbp) {
if (!mbp->spaceNetwork.IsNull()) CopyLayersToHost(layersSpaceNetwork, mbp->spaceNetwork->layers);
CopyLayersToHost(layers, mbp->layers);
|