@@ -491,26 +491,28 @@ Examples:
491
491
i32 %byte_offset,
492
492
i32 0)
493
493
494
- Texture and Typed Buffer Stores
495
- -------------------------------
494
+ Stores
495
+ ------
496
496
497
- *relevant types: Textures and TypedBuffer *
497
+ *relevant types: Textures and Buffer *
498
498
499
- The `TextureStore `_ and `BufferStore `_ DXIL operations always write all four
500
- 32-bit components to a texture or a typed buffer. While both operations include
501
- a mask parameter, it is specified that the mask must cover all components when
502
- used with these types .
499
+ The `TextureStore `_, ` BufferStore `_, and `RawBufferStore `_ DXIL operations
500
+ write four components to a texture or a buffer. These include a mask argument
501
+ that is used when fewer than 4 components are written, but notably this only
502
+ takes on the contiguous x, xy, xyz, and xyzw values .
503
503
504
- The store operations that we define as intrinsics behave similarly, and will
505
- only accept writes to the whole of the contained type. This differs from the
506
- loads above, but this makes sense to do from a semantics preserving point of
507
- view. Thus, texture and buffer stores may only operate on 4-element vectors of
508
- types that are 32-bits or fewer, such as ``<4 x i32> ``, ``<4 x float> ``, and
509
- ``<4 x half> ``, and 2 element vectors of 64-bit types like ``<2 x double> `` and
510
- ``<2 x i64> ``.
504
+ We define the LLVM store intrinsics to accept vectors when storing multiple
505
+ components rather than using `undef ` and a mask, but otherwise match the DXIL
506
+ ops fairly closely.
511
507
512
- .. _BufferStore : https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#bufferstore
513
508
.. _TextureStore : https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#texturestore
509
+ .. _BufferStore : https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#bufferstore
510
+ .. _RawBufferStore : https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#rawbufferstore
511
+
512
+ For TypedBuffer, we only need one coordinate, and we must always write a vector
513
+ since partial writes aren't possible. Similarly to the load operations
514
+ described above, we handle 64-bit types specially and only handle 2-element
515
+ vectors rather than 4.
514
516
515
517
Examples:
516
518
@@ -548,3 +550,85 @@ Examples:
548
550
target("dx.TypedBuffer", f16, 1, 0) %buf, i32 %index, <4 x f16> %data)
549
551
call void @llvm.dx.resource.store.typedbuffer.tdx.Buffer_v2f64_1_0_0t(
550
552
target("dx.TypedBuffer", f64, 1, 0) %buf, i32 %index, <2 x f64> %data)
553
+
554
+ For RawBuffer, we need two indices and we accept scalars and vectors of 4 or
555
+ fewer elements. Note that we do allow vectors of 4 64-bit elements here.
556
+
557
+ Examples:
558
+
559
+ .. list-table :: ``@llvm.dx.resource.store.rawbuffer``
560
+ :header-rows: 1
561
+
562
+ * - Argument
563
+ -
564
+ - Type
565
+ - Description
566
+ * - Return value
567
+ -
568
+ - ``void ``
569
+ -
570
+ * - ``%buffer ``
571
+ - 0
572
+ - ``target(dx.RawBuffer, ...) ``
573
+ - The buffer to store into
574
+ * - ``%index ``
575
+ - 1
576
+ - ``i32 ``
577
+ - Index into the buffer
578
+ * - ``%offset ``
579
+ - 2
580
+ - ``i32 ``
581
+ - Byte offset into structured buffer elements
582
+ * - ``%data ``
583
+ - 3
584
+ - Scalar or vector
585
+ - The data to store
586
+
587
+ Examples:
588
+
589
+ .. code-block :: llvm
590
+
591
+ ; float
592
+ call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f32_1_0_0t.f32(
593
+ target("dx.RawBuffer", float, 1, 0, 0) %buffer,
594
+ i32 %index, i32 0, float %data)
595
+ call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i8_1_0_0t.f32(
596
+ target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
597
+ i32 %index, i32 0, float %data)
598
+
599
+ ; float4
600
+ call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4f32_1_0_0t.v4f32(
601
+ target("dx.RawBuffer", <4 x float>, 1, 0, 0) %buffer,
602
+ i32 %index, i32 0, <4 x float> %data)
603
+ call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i8_1_0_0t.v4f32(
604
+ target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
605
+ i32 %index, i32 0, <4 x float> %data)
606
+
607
+ ; struct S0 { float4 f; int4 i; }
608
+ call void @llvm.dx.resource.store.rawbuffer.v4f32(
609
+ target("dx.RawBuffer", { <4 x float>, <4 x i32> }, 1, 0, 0) %buffer,
610
+ i32 %index, i32 0, <4 x float> %data0)
611
+ call void @llvm.dx.resource.store.rawbuffer.v4i32(
612
+ target("dx.RawBuffer", { <4 x float>, <4 x i32> }, 1, 0, 0) %buffer,
613
+ i32 %index, i32 16, <4 x i32> %data1)
614
+
615
+ ; struct Q { float4 f; int3 i; }
616
+ ; struct R { int z; S x; }
617
+ call void @llvm.dx.resource.store.rawbuffer.i32(
618
+ target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0)
619
+ %buffer,
620
+ i32 %index, i32 0, i32 %data0)
621
+ call void @llvm.dx.resource.store.rawbuffer.v4f32(
622
+ target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0)
623
+ %buffer,
624
+ i32 %index, i32 4, <4 x float> %data1)
625
+ call void @llvm.dx.resource.store.rawbuffer.v3f16(
626
+ target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0)
627
+ %buffer,
628
+ i32 %index, i32 20, <3 x half> %data2)
629
+
630
+ ; byteaddressbuf.Store<int64_t4>
631
+ call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i8_1_0_0t.v4f64(
632
+ target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
633
+ i32 %index, i32 0, <4 x double> %data)
634
+
0 commit comments