Skip to content

Commit d256e71

Browse files
authored
fix the SYCL DCT sample (#2693)
Because the MatrixMultply function is called both on pointers to the global address space and pointers to the private address space, it cannot take a multi_ptr as a function argument. This fixes incorrect execution (page faults) on some GPUs.
1 parent 9f95f5d commit d256e71

File tree

1 file changed

+17
-16
lines changed
  • DirectProgramming/C++SYCL/SpectralMethods/DiscreteCosineTransform/src

1 file changed

+17
-16
lines changed

DirectProgramming/C++SYCL/SpectralMethods/DiscreteCosineTransform/src/DCT.cpp

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ void MatrixTranspose(float x[block_size], float xinv[block_size]) {
4747
}
4848

4949
// Multiply two matrices x and y and write output to xy
50-
SYCL_EXTERNAL void MatrixMultiply(multi_ptr<const float, access::address_space::global_space, (sycl::access::decorated)2> x,
51-
multi_ptr<const float, access::address_space::global_space, (sycl::access::decorated)2> y,
50+
SYCL_EXTERNAL void MatrixMultiply(const float x[block_size],
51+
const float y[block_size],
5252
float xy[block_size]) {
5353
for (int i = 0; i < block_dims; ++i) {
5454
for (int j = 0; j < block_dims; ++j) {
@@ -61,7 +61,8 @@ SYCL_EXTERNAL void MatrixMultiply(multi_ptr<const float, access::address_space::
6161
}
6262

6363
// Processes an individual 8x8 subset of image data
64-
SYCL_EXTERNAL void ProcessBlock(multi_ptr<const rgb, access::address_space::global_space, sycl::access::decorated::no> indataset, rgb* outdataset,
64+
SYCL_EXTERNAL void ProcessBlock(multi_ptr<const rgb, access::address_space::global_space, sycl::access::decorated::no> indataset,
65+
multi_ptr<rgb, access::address_space::global_space, sycl::access::decorated::no> outdataset,
6566
multi_ptr<const float, access::address_space::global_space, sycl::access::decorated::no> dct,
6667
multi_ptr<const float, access::address_space::global_space, sycl::access::decorated::no> dctinv,
6768
int start_index, int width) {
@@ -111,8 +112,8 @@ SYCL_EXTERNAL void ProcessBlock(multi_ptr<const rgb, access::address_space::glob
111112

112113
// Computation of the discrete cosine transform of the image section of size
113114
// 8x8 for red values
114-
MatrixMultiply(dct, red_input, temp);
115-
MatrixMultiply(temp, dctinv, interim);
115+
MatrixMultiply(dct.get(), red_input, temp);
116+
MatrixMultiply(temp, dctinv.get(), interim);
116117

117118
// Computation of quantization phase using the quantization matrix
118119
for (int i = 0; i < block_size; ++i)
@@ -123,8 +124,8 @@ SYCL_EXTERNAL void ProcessBlock(multi_ptr<const rgb, access::address_space::glob
123124
interim[i] = sycl::floor((interim[i] * quant[i]) + 0.5f);
124125

125126
// Computation of Inverse Discrete Cosine Transform (IDCT)
126-
MatrixMultiply(dctinv, interim, temp);
127-
MatrixMultiply(temp, dct, product);
127+
MatrixMultiply(dctinv.get(), interim, temp);
128+
MatrixMultiply(temp, dct.get(), product);
128129

129130
// Translating the pixels values from [-128, 127] range to [0, 255] range
130131
// and writing to output image data
@@ -146,8 +147,8 @@ SYCL_EXTERNAL void ProcessBlock(multi_ptr<const rgb, access::address_space::glob
146147

147148
// Computation of the discrete cosine transform of the image section of size
148149
// 8x8 for blue values
149-
MatrixMultiply(dct, blue_input, temp);
150-
MatrixMultiply(temp, dctinv, interim);
150+
MatrixMultiply(dct.get(), blue_input, temp);
151+
MatrixMultiply(temp, dctinv.get(), interim);
151152

152153
// Computation of quantization phase using the quantization matrix
153154
for (int i = 0; i < block_size; ++i)
@@ -158,8 +159,8 @@ SYCL_EXTERNAL void ProcessBlock(multi_ptr<const rgb, access::address_space::glob
158159
interim[i] = sycl::floor((interim[i] * quant[i]) + 0.5f);
159160

160161
// Computation of Inverse Discrete Cosine Transform (IDCT)
161-
MatrixMultiply(dctinv, interim, temp);
162-
MatrixMultiply(temp, dct, product);
162+
MatrixMultiply(dctinv.get(), interim, temp);
163+
MatrixMultiply(temp, dct.get(), product);
163164

164165
// Translating the pixels values from [-128, 127] range to [0, 255] range
165166
// and writing to output image data
@@ -181,8 +182,8 @@ SYCL_EXTERNAL void ProcessBlock(multi_ptr<const rgb, access::address_space::glob
181182

182183
// Computation of the discrete cosine transform of the image section of size
183184
// 8x8 for green values
184-
MatrixMultiply(dct, green_input, temp);
185-
MatrixMultiply(temp, dctinv, interim);
185+
MatrixMultiply(dct.get(), green_input, temp);
186+
MatrixMultiply(temp, dctinv.get(), interim);
186187

187188
// Computation of quantization phase using the quantization matrix
188189
for (int i = 0; i < block_size; ++i)
@@ -193,8 +194,8 @@ SYCL_EXTERNAL void ProcessBlock(multi_ptr<const rgb, access::address_space::glob
193194
interim[i] = sycl::floor((interim[i] * quant[i]) + 0.5f);
194195

195196
// Computation of Inverse Discrete Cosine Transform (IDCT)
196-
MatrixMultiply(dctinv, interim, temp);
197-
MatrixMultiply(temp, dct, product);
197+
MatrixMultiply(dctinv.get(), interim, temp);
198+
MatrixMultiply(temp, dct.get(), product);
198199

199200
// Translating the pixels values from [-128, 127] range to [0, 255] range
200201
// and writing to output image data
@@ -235,7 +236,7 @@ void ProcessImage(rgb* indataset, rgb* outdataset, int width, int height) {
235236
h.parallel_for(
236237
range<2>(width / block_dims, height / block_dims), [=](auto idx) {
237238
int start_index = idx[0] * block_dims + idx[1] * block_dims * width;
238-
ProcessBlock(i_acc.get_multi_ptr<sycl::access::decorated::no>(), o_acc.get_multi_ptr<sycl::access::decorated::no>().get(),
239+
ProcessBlock(i_acc.get_multi_ptr<sycl::access::decorated::no>(), o_acc.get_multi_ptr<sycl::access::decorated::no>(),
239240
d_acc.get_multi_ptr<sycl::access::decorated::no>(), di_acc.get_multi_ptr<sycl::access::decorated::no>(), start_index,
240241
width);
241242
});

0 commit comments

Comments
 (0)