Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class TimeFrameGPU : public TimeFrame<NLayers>
using typename TimeFrame<NLayers>::ROFMaskTableN;
using typename TimeFrame<NLayers>::TrackingTopologyN;
using typename TimeFrame<NLayers>::TrackSeedN;
static constexpr int MaxTransitions = TrackingTopologyN::MaxTransitions;
static constexpr int MaxLinks = TrackingTopologyN::MaxLinks;
static constexpr int MaxCells = TrackingTopologyN::MaxCells;
static constexpr int MaxStreams = MaxCells > NLayers ? MaxCells : NLayers;

Expand Down Expand Up @@ -157,7 +157,7 @@ class TimeFrameGPU : public TimeFrame<NLayers>
void setDevicePropagator(const o2::base::PropagatorImpl<float>* p) final { this->mPropagatorDevice = p; }

// Host-specific getters
gsl::span<int> getNTracklets() { return {mNTracklets.data(), static_cast<gsl::span<int>::size_type>(this->mTrackingTopologyView.nTransitions)}; }
gsl::span<int> getNTracklets() { return {mNTracklets.data(), static_cast<gsl::span<int>::size_type>(this->mTrackingTopologyView.nLinks)}; }
gsl::span<int> getNCells() { return {mNCells.data(), static_cast<gsl::span<int>::size_type>(this->mTrackingTopologyView.nCells)}; }
auto& getArrayNCells() { return mNCells; }
gsl::span<int> getNNeighbours() { return {mNNeighbours.data(), static_cast<gsl::span<int>::size_type>(this->mTrackingTopologyView.nCells)}; }
Expand All @@ -179,7 +179,7 @@ class TimeFrameGPU : public TimeFrame<NLayers>
void allocMem(void**, size_t, bool, int32_t = o2::gpu::GPUMemoryResource::MEMORY_GPU); // Abstract owned and unowned memory allocations on default stream

// Host-available device buffer sizes
std::array<int, MaxTransitions> mNTracklets{};
std::array<int, MaxLinks> mNTracklets{};
std::array<int, MaxCells> mNCells{};
std::array<int, MaxCells> mNNeighbours{};

Expand All @@ -205,8 +205,8 @@ class TimeFrameGPU : public TimeFrame<NLayers>
const int** mClustersIndexTablesDeviceArray;
uint8_t** mUsedClustersDeviceArray;
const int** mROFramesClustersDeviceArray;
std::array<Tracklet*, MaxTransitions> mTrackletsDevice{};
std::array<int*, MaxTransitions> mTrackletsLUTDevice{};
std::array<Tracklet*, MaxLinks> mTrackletsDevice{};
std::array<int*, MaxLinks> mTrackletsLUTDevice{};
std::array<int*, MaxCells> mCellsLUTDevice{};
std::array<int*, MaxCells> mNeighboursLUTDevice{};

Expand Down Expand Up @@ -258,7 +258,7 @@ inline std::vector<unsigned int> TimeFrameGPU<NLayers>::getClusterSizes()
template <int NLayers>
inline size_t TimeFrameGPU<NLayers>::getNumberOfTracklets() const
{
return std::accumulate(mNTracklets.begin(), mNTracklets.begin() + this->mTrackingTopologyView.nTransitions, 0);
return std::accumulate(mNTracklets.begin(), mNTracklets.begin() + this->mTrackingTopologyView.nLinks, 0);
}

template <int NLayers>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class ExternalAllocator;
template <int NLayers>
void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
const typename ROFMaskTable<NLayers>::View& rofMask,
const int transitionId,
const int linkId,
const int fromLayer,
const int toLayer,
const typename ROFOverlapTable<NLayers>::View& rofOverlaps,
Expand All @@ -58,20 +58,20 @@ void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
const bool selectUPCVertices,
const float NSigmaCut,
const typename TrackingTopology<NLayers>::View topology,
bounded_vector<float>& transitionPhiCuts,
bounded_vector<float>& linkPhiCuts,
const float resolutionPV,
std::array<float, NLayers>& minR,
std::array<float, NLayers>& maxR,
bounded_vector<float>& resolutions,
std::vector<float>& radii,
bounded_vector<float>& transitionMSAngles,
bounded_vector<float>& linkMSAngles,
o2::its::ExternalAllocator* alloc,
gpu::Streams& streams);

template <int NLayers>
void computeTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
const typename ROFMaskTable<NLayers>::View& rofMask,
const int transitionId,
const int linkId,
const int fromLayer,
const int toLayer,
const typename ROFOverlapTable<NLayers>::View& rofOverlaps,
Expand All @@ -92,13 +92,13 @@ void computeTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
const bool selectUPCVertices,
const float NSigmaCut,
const typename TrackingTopology<NLayers>::View topology,
bounded_vector<float>& transitionPhiCuts,
bounded_vector<float>& linkPhiCuts,
const float resolutionPV,
std::array<float, NLayers>& minR,
std::array<float, NLayers>& maxR,
bounded_vector<float>& resolutions,
std::vector<float>& radii,
bounded_vector<float>& transitionMSAngles,
bounded_vector<float>& linkMSAngles,
o2::its::ExternalAllocator* alloc,
gpu::Streams& streams);

Expand Down
42 changes: 21 additions & 21 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -335,25 +335,25 @@ void TimeFrameGPU<NLayers>::loadTrackingTopologies()
GPUTimer timer("initialising device views of TrackingTopology");
const auto& hostTopologies = this->getTrackerTopologies();
mDeviceTrackerTopologyViews.resize(hostTopologies.size());
using LayerTransition = typename TrackingTopologyN::LayerTransition;
using LayerLink = typename TrackingTopologyN::LayerLink;
using CellTopology = typename TrackingTopologyN::CellTopology;
using Range = typename TrackingTopologyN::Range;
using Id = typename TrackingTopologyN::Id;
for (size_t iteration = 0; iteration < hostTopologies.size(); ++iteration) {
const auto& topology = hostTopologies[iteration];
LayerTransition* dTransitions{nullptr};
LayerLink* dLinks{nullptr};
CellTopology* dCells{nullptr};
Range* dCellsByFirstTransitionIndex{nullptr};
Id* dCellsByFirstTransition{nullptr};
allocMem(reinterpret_cast<void**>(&dTransitions), topology.getNTransitions() * sizeof(LayerTransition), this->hasFrameworkAllocator());
Range* dCellsByFirstLinkIndex{nullptr};
Id* dCellsByFirstLink{nullptr};
allocMem(reinterpret_cast<void**>(&dLinks), topology.getNLinks() * sizeof(LayerLink), this->hasFrameworkAllocator());
allocMem(reinterpret_cast<void**>(&dCells), topology.getNCells() * sizeof(CellTopology), this->hasFrameworkAllocator());
allocMem(reinterpret_cast<void**>(&dCellsByFirstTransitionIndex), topology.getNTransitions() * sizeof(Range), this->hasFrameworkAllocator());
allocMem(reinterpret_cast<void**>(&dCellsByFirstTransition), topology.getNCellsByFirstTransition() * sizeof(Id), this->hasFrameworkAllocator());
GPUChkErrS(cudaMemcpy(dTransitions, topology.getTransitions().data(), topology.getNTransitions() * sizeof(LayerTransition), cudaMemcpyHostToDevice));
allocMem(reinterpret_cast<void**>(&dCellsByFirstLinkIndex), topology.getNLinks() * sizeof(Range), this->hasFrameworkAllocator());
allocMem(reinterpret_cast<void**>(&dCellsByFirstLink), topology.getNCellsByFirstLink() * sizeof(Id), this->hasFrameworkAllocator());
GPUChkErrS(cudaMemcpy(dLinks, topology.getLinks().data(), topology.getNLinks() * sizeof(LayerLink), cudaMemcpyHostToDevice));
GPUChkErrS(cudaMemcpy(dCells, topology.getCells().data(), topology.getNCells() * sizeof(CellTopology), cudaMemcpyHostToDevice));
GPUChkErrS(cudaMemcpy(dCellsByFirstTransitionIndex, topology.getCellsByFirstTransitionIndex().data(), topology.getNTransitions() * sizeof(Range), cudaMemcpyHostToDevice));
GPUChkErrS(cudaMemcpy(dCellsByFirstTransition, topology.getCellsByFirstTransition().data(), topology.getNCellsByFirstTransition() * sizeof(Id), cudaMemcpyHostToDevice));
mDeviceTrackerTopologyViews[iteration] = topology.getDeviceView(dTransitions, dCells, dCellsByFirstTransitionIndex, dCellsByFirstTransition);
GPUChkErrS(cudaMemcpy(dCellsByFirstLinkIndex, topology.getCellsByFirstLinkIndex().data(), topology.getNLinks() * sizeof(Range), cudaMemcpyHostToDevice));
GPUChkErrS(cudaMemcpy(dCellsByFirstLink, topology.getCellsByFirstLink().data(), topology.getNCellsByFirstLink() * sizeof(Id), cudaMemcpyHostToDevice));
mDeviceTrackerTopologyViews[iteration] = topology.getDeviceView(dLinks, dCells, dCellsByFirstLinkIndex, dCellsByFirstLink);
}
if (!mDeviceTrackerTopologyViews.empty()) {
mDeviceTrackingTopologyView = mDeviceTrackerTopologyViews.front();
Expand All @@ -380,15 +380,15 @@ template <int NLayers>
void TimeFrameGPU<NLayers>::createTrackletsLUTDeviceArray()
{
{
allocMem(reinterpret_cast<void**>(&mTrackletsLUTDeviceArray), MaxTransitions * sizeof(int*), this->hasFrameworkAllocator());
allocMem(reinterpret_cast<void**>(&mTrackletsLUTDeviceArray), MaxLinks * sizeof(int*), this->hasFrameworkAllocator());
}
}

template <int NLayers>
void TimeFrameGPU<NLayers>::createTrackletsLUTDevice(bool allocate, const int layer)
{
GPUTimer timer(mGpuStreams[layer], "creating tracklets LUTs", layer);
const int fromLayer = this->mTrackingTopologyView.getTransition(layer).fromLayer;
const int fromLayer = this->mTrackingTopologyView.getLink(layer).fromLayer;
const int ncls = this->mClusters[fromLayer].size() + 1;
if (allocate || mTrackletsLUTDevice[layer] == nullptr) {
GPULog("gpu-allocation: creating tracklets LUT for {} elements on layer {}, for {:.2f} MB.", ncls, layer, ncls * sizeof(int) / constants::MB);
Expand All @@ -403,7 +403,7 @@ void TimeFrameGPU<NLayers>::createTrackletsBuffersArray()
{
{
GPUTimer timer("creating tracklet buffers array");
allocMem(reinterpret_cast<void**>(&mTrackletsDeviceArray), MaxTransitions * sizeof(Tracklet*), this->hasFrameworkAllocator());
allocMem(reinterpret_cast<void**>(&mTrackletsDeviceArray), MaxLinks * sizeof(Tracklet*), this->hasFrameworkAllocator());
}
}

Expand All @@ -412,7 +412,7 @@ void TimeFrameGPU<NLayers>::createTrackletsBuffers(const int layer)
{
GPUTimer timer(mGpuStreams[layer], "creating tracklet buffers", layer);
mNTracklets[layer] = 0;
const int fromLayer = this->mTrackingTopologyView.getTransition(layer).fromLayer;
const int fromLayer = this->mTrackingTopologyView.getLink(layer).fromLayer;
GPUChkErrS(cudaMemcpyAsync(&mNTracklets[layer], mTrackletsLUTDevice[layer] + this->mClusters[fromLayer].size(), sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get()));
mGpuStreams[layer].sync(); // ensure number of tracklets is correct
GPULog("gpu-transfer: creating tracklets buffer for {} elements on layer {}, for {:.2f} MB.", mNTracklets[layer], layer, mNTracklets[layer] * sizeof(Tracklet) / constants::MB);
Expand Down Expand Up @@ -491,10 +491,10 @@ template <int NLayers>
void TimeFrameGPU<NLayers>::createCellsLUTDevice(const int layer)
{
GPUTimer timer(mGpuStreams[layer], "creating cells LUTs", layer);
const int firstTransition = this->mTrackingTopologyView.getCell(layer).firstTransition;
GPULog("gpu-transfer: creating cell LUT for {} elements on layer {}, for {:.2f} MB.", mNTracklets[firstTransition] + 1, layer, (mNTracklets[firstTransition] + 1) * sizeof(int) / constants::MB);
allocMemAsync(reinterpret_cast<void**>(&mCellsLUTDevice[layer]), (mNTracklets[firstTransition] + 1) * sizeof(int), mGpuStreams[layer], this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
GPUChkErrS(cudaMemsetAsync(mCellsLUTDevice[layer], 0, (mNTracklets[firstTransition] + 1) * sizeof(int), mGpuStreams[layer].get()));
const int firstLink = this->mTrackingTopologyView.getCell(layer).firstLink;
GPULog("gpu-transfer: creating cell LUT for {} elements on layer {}, for {:.2f} MB.", mNTracklets[firstLink] + 1, layer, (mNTracklets[firstLink] + 1) * sizeof(int) / constants::MB);
allocMemAsync(reinterpret_cast<void**>(&mCellsLUTDevice[layer]), (mNTracklets[firstLink] + 1) * sizeof(int), mGpuStreams[layer], this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
GPUChkErrS(cudaMemsetAsync(mCellsLUTDevice[layer], 0, (mNTracklets[firstLink] + 1) * sizeof(int), mGpuStreams[layer].get()));
GPUChkErrS(cudaMemcpyAsync(&mCellsLUTDeviceArray[layer], &mCellsLUTDevice[layer], sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
}

Expand All @@ -515,8 +515,8 @@ void TimeFrameGPU<NLayers>::createCellsBuffers(const int layer)
{
GPUTimer timer(mGpuStreams[layer], "creating cells buffers");
mNCells[layer] = 0;
const int firstTransition = this->mTrackingTopologyView.getCell(layer).firstTransition;
GPUChkErrS(cudaMemcpyAsync(&mNCells[layer], mCellsLUTDevice[layer] + mNTracklets[firstTransition], sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get()));
const int firstLink = this->mTrackingTopologyView.getCell(layer).firstLink;
GPUChkErrS(cudaMemcpyAsync(&mNCells[layer], mCellsLUTDevice[layer] + mNTracklets[firstLink], sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get()));
mGpuStreams[layer].sync(); // ensure number of cells is correct
GPULog("gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeed) / constants::MB);
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeed), mGpuStreams[layer], this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
Expand Down
Loading