Skip to content

Commit 3fa9c4a

Browse files
authored
[SYCL][UR][L0 v2] fix use after free on queue (#18101)
1 parent 40e089a commit 3fa9c4a

File tree

10 files changed

+139
-92
lines changed

10 files changed

+139
-92
lines changed

unified-runtime/scripts/templates/queue_api.hpp.mako

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ from templates import helper as th
2929
struct ur_queue_t_ {
3030
virtual ~ur_queue_t_();
3131

32-
virtual void deferEventFree(ur_event_handle_t hEvent) = 0;
33-
3432
%for obj in th.get_queue_related_functions(specs, n, tags):
3533
%if not 'Release' in obj['name'] and not 'Retain' in obj['name']:
3634
virtual ${x}_result_t ${th.transform_queue_related_function_name(n, tags, obj, format=["type"])} = 0;

unified-runtime/source/adapters/level_zero/v2/event.cpp

Lines changed: 38 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,21 @@ uint64_t event_profiling_data_t::getEventEndTimestamp() {
6060
return adjustedEventEndTimestamp;
6161
}
6262

63+
void event_profiling_data_t::reset() {
64+
// This ensures that the event is consider as not timestamped.
65+
// We can't touch the recordEventEndTimestamp
66+
// as it may still be overwritten by the driver.
67+
// In case event is resued and recordStartTimestamp
68+
// is called again, adjustedEventEndTimestamp will always be updated correctly
69+
// to the new value as we wait for the event to be signaled.
70+
// If the event is reused on another queue, this means that the original
71+
// queue must have been destroyed (and the even pool released back to the
72+
// context) and the timstamp is already wrriten, so there's no race-condition
73+
// possible.
74+
adjustedEventStartTimestamp = 0;
75+
adjustedEventEndTimestamp = 0;
76+
}
77+
6378
void event_profiling_data_t::recordStartTimestamp(ur_device_handle_t hDevice) {
6479
zeTimerResolution = hDevice->ZeDeviceProperties->timerResolution;
6580
timestampMaxValue = hDevice->getTimestampMask();
@@ -98,16 +113,22 @@ void ur_event_handle_t_::resetQueueAndCommand(ur_queue_t_ *hQueue,
98113
ur_command_t commandType) {
99114
this->hQueue = hQueue;
100115
this->commandType = commandType;
101-
profilingData = event_profiling_data_t(getZeEvent());
116+
117+
if (hQueue) {
118+
UR_CALL_THROWS(hQueue->queueGetInfo(UR_QUEUE_INFO_DEVICE, sizeof(hDevice),
119+
reinterpret_cast<void *>(&hDevice),
120+
nullptr));
121+
} else {
122+
hDevice = nullptr;
123+
}
124+
125+
profilingData.reset();
102126
}
103127

104128
void ur_event_handle_t_::recordStartTimestamp() {
105-
assert(hQueue); // queue must be set before calling this
106-
107-
ur_device_handle_t hDevice;
108-
UR_CALL_THROWS(hQueue->queueGetInfo(UR_QUEUE_INFO_DEVICE, sizeof(hDevice),
109-
reinterpret_cast<void *>(&hDevice),
110-
nullptr));
129+
// queue and device must be set before calling this
130+
assert(hQueue);
131+
assert(hDevice);
111132

112133
profilingData.recordStartTimestamp(hDevice);
113134
}
@@ -141,33 +162,17 @@ ur_result_t ur_event_handle_t_::retain() {
141162
return UR_RESULT_SUCCESS;
142163
}
143164

144-
ur_result_t ur_event_handle_t_::releaseDeferred() {
145-
assert(zeEventQueryStatus(getZeEvent()) == ZE_RESULT_SUCCESS);
146-
assert(RefCount.load() == 0);
147-
148-
return this->forceRelease();
149-
}
150-
151165
ur_result_t ur_event_handle_t_::release() {
152166
if (!RefCount.decrementAndTest())
153167
return UR_RESULT_SUCCESS;
154168

155-
// Need to take a lock before checking if the event is timestamped.
156-
std::unique_lock<ur_shared_mutex> lock(Mutex);
157-
158-
if (isTimestamped() && !getEventEndTimestamp()) {
159-
// L0 will write end timestamp to this event some time in the future,
160-
// so we can't release it yet.
161-
assert(hQueue);
162-
hQueue->deferEventFree(this);
163-
return UR_RESULT_SUCCESS;
169+
if (event_pool) {
170+
event_pool->free(this);
171+
} else {
172+
std::get<v2::raii::ze_event_handle_t>(hZeEvent).release();
173+
delete this;
164174
}
165-
166-
// Need to unlock now, as forceRelease might deallocate memory backing
167-
// the Mutex.
168-
lock.unlock();
169-
170-
return this->forceRelease();
175+
return UR_RESULT_SUCCESS;
171176
}
172177

173178
bool ur_event_handle_t_::isTimestamped() const {
@@ -189,6 +194,8 @@ ur_context_handle_t ur_event_handle_t_::getContext() const { return hContext; }
189194

190195
ur_command_t ur_event_handle_t_::getCommandType() const { return commandType; }
191196

197+
ur_device_handle_t ur_event_handle_t_::getDevice() const { return hDevice; }
198+
192199
ur_event_handle_t_::ur_event_handle_t_(
193200
ur_context_handle_t hContext,
194201
v2::raii::cache_borrowed_event eventAllocation, v2::event_pool *pool)
@@ -209,16 +216,6 @@ ur_event_handle_t_::ur_event_handle_t_(
209216
,
210217
nullptr) {}
211218

212-
ur_result_t ur_event_handle_t_::forceRelease() {
213-
if (event_pool) {
214-
event_pool->free(this);
215-
} else {
216-
std::get<v2::raii::ze_event_handle_t>(hZeEvent).release();
217-
delete this;
218-
}
219-
return UR_RESULT_SUCCESS;
220-
}
221-
222219
namespace ur::level_zero {
223220
ur_result_t urEventRetain(ur_event_handle_t hEvent) try {
224221
return hEvent->retain();
@@ -323,19 +320,14 @@ ur_result_t urEventGetProfilingInfo(
323320
}
324321
}
325322

326-
auto hQueue = hEvent->getQueue();
327-
if (!hQueue) {
323+
auto hDevice = hEvent->getDevice();
324+
if (!hDevice) {
328325
// no command has been enqueued with this event yet
329326
return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE;
330327
}
331328

332329
ze_kernel_timestamp_result_t tsResult;
333330

334-
ur_device_handle_t hDevice;
335-
UR_CALL_THROWS(hQueue->queueGetInfo(UR_QUEUE_INFO_DEVICE, sizeof(hDevice),
336-
reinterpret_cast<void *>(&hDevice),
337-
nullptr));
338-
339331
auto zeTimerResolution = hDevice->ZeDeviceProperties->timerResolution;
340332
auto timestampMaxValue = hDevice->getTimestampMask();
341333

unified-runtime/source/adapters/level_zero/v2/event.hpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ struct event_profiling_data_t {
3535
bool recordingStarted() const;
3636
bool recordingEnded() const;
3737

38+
// clear the profiling data, allowing the event to be reused
39+
// for a new command
40+
void reset();
41+
3842
private:
3943
ze_event_handle_t hZeEvent;
4044

@@ -64,9 +68,6 @@ struct ur_event_handle_t_ : ur_object {
6468
// Set the queue and command that this event is associated with
6569
void resetQueueAndCommand(ur_queue_t_ *hQueue, ur_command_t commandType);
6670

67-
// releases event immediately
68-
ur_result_t forceRelease();
69-
7071
void reset();
7172
ze_event_handle_t getZeEvent() const;
7273

@@ -95,6 +96,9 @@ struct ur_event_handle_t_ : ur_object {
9596
// Get the type of the command that this event is associated with
9697
ur_command_t getCommandType() const;
9798

99+
// Get the device associated with this event
100+
ur_device_handle_t getDevice() const;
101+
98102
// Record the start timestamp of the event, to be obtained by
99103
// urEventGetProfilingInfo. resetQueueAndCommand should be
100104
// called before this.
@@ -122,6 +126,7 @@ struct ur_event_handle_t_ : ur_object {
122126
// commands
123127
ur_queue_t_ *hQueue = nullptr;
124128
ur_command_t commandType = UR_COMMAND_FORCE_UINT32;
129+
ur_device_handle_t hDevice = nullptr;
125130

126131
v2::event_flags_t flags;
127132
event_profiling_data_t profilingData;

unified-runtime/source/adapters/level_zero/v2/queue_api.hpp

Lines changed: 0 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -138,11 +138,6 @@ ur_queue_immediate_in_order_t::queueGetInfo(ur_queue_info_t propName,
138138
return UR_RESULT_SUCCESS;
139139
}
140140

141-
void ur_queue_immediate_in_order_t::deferEventFree(ur_event_handle_t hEvent) {
142-
auto commandListLocked = commandListManager.lock();
143-
deferredEvents.push_back(hEvent);
144-
}
145-
146141
ur_result_t ur_queue_immediate_in_order_t::queueGetNativeHandle(
147142
ur_queue_native_desc_t * /*pDesc*/, ur_native_handle_t *phNativeQueue) {
148143
*phNativeQueue = reinterpret_cast<ur_native_handle_t>(
@@ -160,12 +155,6 @@ ur_result_t ur_queue_immediate_in_order_t::queueFinish() {
160155
ZE2UR_CALL(zeCommandListHostSynchronize,
161156
(commandListLocked->getZeCommandList(), UINT64_MAX));
162157

163-
// Free deferred events
164-
for (auto &hEvent : deferredEvents) {
165-
UR_CALL(hEvent->releaseDeferred());
166-
}
167-
deferredEvents.clear();
168-
169158
// Free deferred kernels
170159
for (auto &hKernel : submittedKernels) {
171160
UR_CALL(hKernel->release());

unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ struct ur_queue_immediate_in_order_t : ur_object, public ur_queue_t_ {
3434
ur_queue_flags_t flags;
3535

3636
lockable<ur_command_list_manager> commandListManager;
37-
std::vector<ur_event_handle_t> deferredEvents;
3837
std::vector<ur_kernel_handle_t> submittedKernels;
3938

4039
wait_list_view
@@ -46,8 +45,6 @@ struct ur_queue_immediate_in_order_t : ur_object, public ur_queue_t_ {
4645
ur_event_handle_t *hUserEvent,
4746
ur_command_t commandType);
4847

49-
void deferEventFree(ur_event_handle_t hEvent) override;
50-
5148
ur_result_t enqueueGenericFillUnlocked(
5249
ur_mem_buffer_t *hBuffer, size_t offset, size_t patternSize,
5350
const void *pPattern, size_t size, uint32_t numEventsInWaitList,

unified-runtime/test/adapters/level_zero/v2/event_pool_test.cpp

Lines changed: 4 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -272,39 +272,18 @@ TEST_P(EventPoolTestWithQueue, WithTimestamp) {
272272
&hDevice, nullptr));
273273

274274
ur_event_handle_t first;
275-
ze_event_handle_t zeFirst;
276275
{
277276
ASSERT_SUCCESS(
278277
urEnqueueTimestampRecordingExp(queue, false, 1, &hEvent, &first));
279-
zeFirst = first->getZeEvent();
280-
281278
urEventRelease(first); // should not actually release the event until
282279
// recording is completed
283280
}
284281
ur_event_handle_t second;
285-
ze_event_handle_t zeSecond;
286-
{
287-
ASSERT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &second));
288-
zeSecond = second->getZeEvent();
289-
ASSERT_SUCCESS(urEventRelease(second));
290-
}
291-
ASSERT_NE(first, second);
292-
ASSERT_NE(zeFirst, zeSecond);
282+
ASSERT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &second));
283+
// even if the event is reused, it should not be timestamped anymore
284+
ASSERT_FALSE(second->isTimestamped());
285+
ASSERT_SUCCESS(urEventRelease(second));
293286

294287
ASSERT_EQ(zeEventHostSignal(zeEvent.get()), ZE_RESULT_SUCCESS);
295-
296288
ASSERT_SUCCESS(urQueueFinish(queue));
297-
298-
// Now, the first event should be avilable for reuse
299-
ur_event_handle_t third;
300-
ze_event_handle_t zeThird;
301-
{
302-
ASSERT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &third));
303-
zeThird = third->getZeEvent();
304-
ASSERT_SUCCESS(urEventRelease(third));
305-
306-
ASSERT_FALSE(third->isTimestamped());
307-
}
308-
ASSERT_EQ(first, third);
309-
ASSERT_EQ(zeFirst, zeThird);
310289
}

unified-runtime/test/adapters/level_zero/ze_helpers.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,18 @@
1010
#include <ur_api.h>
1111
#include <uur/fixtures.h>
1212

13+
static bool ze_initialized = false;
14+
1315
std::unique_ptr<_ze_event_handle_t, std::function<void(ze_event_handle_t)>>
1416
createZeEvent(ur_context_handle_t hContext, ur_device_handle_t hDevice) {
17+
if (!ze_initialized) {
18+
ze_result_t result = zeInit(ZE_INIT_FLAG_GPU_ONLY);
19+
if (result != ZE_RESULT_SUCCESS) {
20+
return nullptr;
21+
}
22+
ze_initialized = true;
23+
}
24+
1525
ze_event_pool_desc_t desc;
1626
desc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC;
1727
desc.pNext = nullptr;

unified-runtime/test/conformance/enqueue/urEnqueueTimestampRecording.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,61 @@ TEST_P(urEnqueueTimestampRecordingExpTest, SuccessBlocking) {
6666
ASSERT_SUCCESS(urEventRelease(event));
6767
}
6868

69+
TEST_P(urEnqueueTimestampRecordingExpTest,
70+
ReleaseEventWhileTimestampWritePending) {
71+
void *ptr;
72+
ASSERT_SUCCESS(
73+
urUSMSharedAlloc(context, device, nullptr, nullptr, 1024 * 1024, &ptr));
74+
75+
// Enqueue an operation to keep the device busy
76+
uint8_t pattern = 0xFF;
77+
ASSERT_SUCCESS(urEnqueueUSMFill(queue, ptr, sizeof(uint8_t), &pattern,
78+
1024 * 1024, 0, nullptr, nullptr));
79+
80+
ur_event_handle_t event1 = nullptr;
81+
ASSERT_SUCCESS(
82+
urEnqueueTimestampRecordingExp(queue, false, 0, nullptr, &event1));
83+
ASSERT_SUCCESS(urEventRelease(event1));
84+
85+
ur_event_handle_t event2 = nullptr;
86+
ASSERT_SUCCESS(urEnqueueUSMFill(queue, ptr, sizeof(uint8_t), &pattern,
87+
1024 * 1024, 0, nullptr, &event2));
88+
89+
// Make sure the new event does not contain profiling info (in case it's reused
90+
// by the adapter)
91+
ASSERT_EQ(urEventGetProfilingInfo(event2, UR_PROFILING_INFO_COMMAND_QUEUED,
92+
sizeof(uint64_t), nullptr, nullptr),
93+
UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE);
94+
ASSERT_SUCCESS(urEventRelease(event2));
95+
ASSERT_SUCCESS(urUSMFree(context, ptr));
96+
}
97+
98+
TEST_P(urEnqueueTimestampRecordingExpTest, ReleaseEventAfterQueueRelease) {
99+
void *ptr;
100+
ASSERT_SUCCESS(
101+
urUSMSharedAlloc(context, device, nullptr, nullptr, 1024 * 1024, &ptr));
102+
103+
// Enqueue an operation to keep the device busy
104+
uint8_t pattern = 0xFF;
105+
ASSERT_SUCCESS(urEnqueueUSMFill(queue, ptr, sizeof(uint8_t), &pattern,
106+
1024 * 1024, 0, nullptr, nullptr));
107+
108+
ur_event_handle_t event1 = nullptr;
109+
ASSERT_SUCCESS(
110+
urEnqueueTimestampRecordingExp(queue, false, 0, nullptr, &event1));
111+
112+
ASSERT_SUCCESS(urQueueRelease(queue));
113+
queue = nullptr;
114+
115+
uint64_t queuedTime = 0;
116+
ASSERT_SUCCESS(
117+
urEventGetProfilingInfo(event1, UR_PROFILING_INFO_COMMAND_QUEUED,
118+
sizeof(uint64_t), &queuedTime, nullptr));
119+
120+
ASSERT_SUCCESS(urEventRelease(event1));
121+
ASSERT_SUCCESS(urUSMFree(context, ptr));
122+
}
123+
69124
TEST_P(urEnqueueTimestampRecordingExpTest, InvalidNullHandleQueue) {
70125
ur_event_handle_t event = nullptr;
71126
ASSERT_EQ_RESULT(

unified-runtime/test/conformance/event/urEventGetProfilingInfo.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,30 @@ TEST_P(urEventGetProfilingInfoTest, Success) {
128128
UR_PROFILING_INFO_COMMAND_COMPLETE);
129129
}
130130

131+
TEST_P(urEventGetProfilingInfoTest, ReleaseEventAfterQueueRelease) {
132+
void *ptr;
133+
ASSERT_SUCCESS(
134+
urUSMSharedAlloc(context, device, nullptr, nullptr, 1024 * 1024, &ptr));
135+
136+
// Enqueue an operation to keep the device busy
137+
uint8_t pattern = 0xFF;
138+
ur_event_handle_t event1;
139+
ASSERT_SUCCESS(urEnqueueUSMFill(queue, ptr, sizeof(uint8_t), &pattern,
140+
1024 * 1024, 0, nullptr, &event1));
141+
142+
ASSERT_SUCCESS(urQueueRelease(queue));
143+
queue = nullptr;
144+
145+
uint64_t queuedTime = 0;
146+
auto ret = urEventGetProfilingInfo(event1, UR_PROFILING_INFO_COMMAND_QUEUED,
147+
sizeof(uint64_t), &queuedTime, nullptr);
148+
ASSERT_TRUE(ret == UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION ||
149+
ret == UR_RESULT_SUCCESS);
150+
151+
ASSERT_SUCCESS(urEventRelease(event1));
152+
ASSERT_SUCCESS(urUSMFree(context, ptr));
153+
}
154+
131155
TEST_P(urEventGetProfilingInfoTest, InvalidNullHandle) {
132156
UUR_KNOWN_FAILURE_ON(uur::NativeCPU{});
133157

0 commit comments

Comments
 (0)