Skip to content

Commit f93f925

Browse files
lenarysvs-quic
andauthored
[RISCV][MC] Support Assembling 48- and 64-bit Instructions (#110022)
This adds `.insn` support for assembling instructions of 48- and 64-bits (only when giving an explicit length). Disassembly already knows to bunch up the instruction bits for these instructions. This changes some error messages so they are a little clearer. Co-authored-by: Sudharsan Veeravalli <[email protected]>
1 parent 2d666de commit f93f925

File tree

9 files changed

+162
-17
lines changed

9 files changed

+162
-17
lines changed

llvm/docs/RISCVUsage.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,20 @@ line. This currently applies to the following extensions:
426426

427427
No extensions have experimental intrinsics.
428428

429+
Long (>32-bit) Instruction Support
430+
==================================
431+
432+
RISC-V is a variable-length ISA, but the standard currently only defines 16- and 32-bit instructions. The specification describes longer instruction encodings, but these are not ratified.
433+
434+
The LLVM disassembler, `llvm-objdump`, does use the longer instruction encodings described in the specification to guess the instruction length (up to 176 bits) and will group the disassembly view of encoding bytes correspondingly.
435+
436+
The LLVM integrated assembler for RISC-V supports two different kinds of ``.insn`` directive, for assembling instructions that LLVM does not yet support:
437+
438+
* ``.insn type, args*`` which takes a known instruction type, and a list of fields. You are strongly recommended to use this variant of the directive if your instruction fits an existing instruction type.
439+
* ``.insn [ length , ] encoding`` which takes an (optional) explicit length (in bytes) and a raw encoding for the instruction. When given an explicit length, this variant can encode instructions up to 64 bits long. The encoding part of the directive must be given all bits for the instruction, none are filled in for the user. When used without the optional length, this variant of the directive will use the LSBs of the raw encoding to work out if an instruction is 16 or 32 bits long. LLVM does not infer that an instruction might be longer than 32 bits - in this case, the user must give the length explicitly.
440+
441+
It is strongly recommended to use the ``.insn`` directive for assembling unsupported instructions instead of ``.word`` or ``.hword``, because it will produce the correct mapping symbols to mark the word as an instruction, not data.
442+
429443
Global Pointer (GP) Relaxation and the Small Data Limit
430444
=======================================================
431445

llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp

Lines changed: 56 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "llvm/TargetParser/RISCVISAInfo.h"
4242

4343
#include <limits>
44+
#include <optional>
4445

4546
using namespace llvm;
4647

@@ -719,6 +720,8 @@ struct RISCVOperand final : public MCParsedAsmOperand {
719720
bool isUImm16() const { return IsUImm<16>(); }
720721
bool isUImm20() const { return IsUImm<20>(); }
721722
bool isUImm32() const { return IsUImm<32>(); }
723+
bool isUImm48() const { return IsUImm<48>(); }
724+
bool isUImm64() const { return IsUImm<64>(); }
722725

723726
bool isUImm8GE32() const {
724727
int64_t Imm;
@@ -3166,8 +3169,8 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
31663169
StringRef Format;
31673170
SMLoc ErrorLoc = Parser.getTok().getLoc();
31683171
if (Parser.parseIdentifier(Format)) {
3169-
// Try parsing .insn [length], value
3170-
int64_t Length = 0;
3172+
// Try parsing .insn [ length , ] value
3173+
std::optional<int64_t> Length;
31713174
int64_t Value = 0;
31723175
if (Parser.parseIntToken(
31733176
Value, "expected instruction format or an integer constant"))
@@ -3176,25 +3179,66 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
31763179
Length = Value;
31773180
if (Parser.parseIntToken(Value, "expected an integer constant"))
31783181
return true;
3182+
3183+
if (*Length == 0 || (*Length % 2) != 0)
3184+
return Error(ErrorLoc,
3185+
"instruction lengths must be a non-zero multiple of two");
3186+
3187+
// TODO: Support Instructions > 64 bits.
3188+
if (*Length > 8)
3189+
return Error(ErrorLoc,
3190+
"instruction lengths over 64 bits are not supported");
3191+
}
3192+
3193+
// We only derive a length from the encoding for 16- and 32-bit
3194+
// instructions, as the encodings for longer instructions are not frozen in
3195+
// the spec.
3196+
int64_t EncodingDerivedLength = ((Value & 0b11) == 0b11) ? 4 : 2;
3197+
3198+
if (Length) {
3199+
// Only check the length against the encoding if the length is present and
3200+
// could match
3201+
if ((*Length <= 4) && (*Length != EncodingDerivedLength))
3202+
return Error(ErrorLoc,
3203+
"instruction length does not match the encoding");
3204+
3205+
if (!isUIntN(*Length * 8, Value))
3206+
return Error(ErrorLoc, "encoding value does not fit into instruction");
3207+
} else {
3208+
if (!isUIntN(EncodingDerivedLength * 8, Value))
3209+
return Error(ErrorLoc, "encoding value does not fit into instruction");
31793210
}
31803211

3181-
// TODO: Add support for long instructions
3182-
int64_t RealLength = (Value & 3) == 3 ? 4 : 2;
3183-
if (!isUIntN(RealLength * 8, Value))
3184-
return Error(ErrorLoc, "invalid operand for instruction");
3185-
if (RealLength == 2 && !AllowC)
3212+
if (!AllowC && (EncodingDerivedLength == 2))
31863213
return Error(ErrorLoc, "compressed instructions are not allowed");
3187-
if (Length != 0 && Length != RealLength)
3188-
return Error(ErrorLoc, "instruction length mismatch");
31893214

31903215
if (getParser().parseEOL("invalid operand for instruction")) {
31913216
getParser().eatToEndOfStatement();
31923217
return true;
31933218
}
31943219

3195-
emitToStreamer(getStreamer(), MCInstBuilder(RealLength == 2 ? RISCV::Insn16
3196-
: RISCV::Insn32)
3197-
.addImm(Value));
3220+
unsigned Opcode;
3221+
if (Length) {
3222+
switch (*Length) {
3223+
case 2:
3224+
Opcode = RISCV::Insn16;
3225+
break;
3226+
case 4:
3227+
Opcode = RISCV::Insn32;
3228+
break;
3229+
case 6:
3230+
Opcode = RISCV::Insn48;
3231+
break;
3232+
case 8:
3233+
Opcode = RISCV::Insn64;
3234+
break;
3235+
default:
3236+
llvm_unreachable("Error should have already been emitted");
3237+
}
3238+
} else
3239+
Opcode = (EncodingDerivedLength == 2) ? RISCV::Insn16 : RISCV::Insn32;
3240+
3241+
emitToStreamer(getStreamer(), MCInstBuilder(Opcode).addImm(Value));
31983242
return false;
31993243
}
32003244

llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,8 @@ enum OperandType : unsigned {
309309
OPERAND_UIMM12,
310310
OPERAND_UIMM16,
311311
OPERAND_UIMM32,
312+
OPERAND_UIMM48,
313+
OPERAND_UIMM64,
312314
OPERAND_ZERO,
313315
OPERAND_SIMM5,
314316
OPERAND_SIMM5_PLUS1,

llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,21 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI,
355355
support::endian::write(CB, Bits, llvm::endianness::little);
356356
break;
357357
}
358+
case 6: {
359+
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI) & 0xffff'ffff'ffffu;
360+
SmallVector<char, 8> Encoding;
361+
support::endian::write(Encoding, Bits, llvm::endianness::little);
362+
assert(Encoding[6] == 0 && Encoding[7] == 0 &&
363+
"Unexpected encoding for 48-bit instruction");
364+
Encoding.truncate(6);
365+
CB.append(Encoding);
366+
break;
367+
}
368+
case 8: {
369+
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
370+
support::endian::write(CB, Bits, llvm::endianness::little);
371+
break;
372+
}
358373
}
359374

360375
++MCNumEmitted; // Keep track of the # of mi's emitted.

llvm/lib/Target/RISCV/RISCVInstrFormats.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,22 @@ class RVInst<dag outs, dag ins, string opcodestr, string argstr,
266266
let Size = 4;
267267
}
268268

269+
class RVInst48<dag outs, dag ins, string opcodestr, string argstr,
270+
list<dag> pattern, InstFormat format>
271+
: RVInstCommon<outs, ins, opcodestr, argstr, pattern, format> {
272+
field bits<48> Inst;
273+
field bits<48> SoftFail = 0;
274+
let Size = 6;
275+
}
276+
277+
class RVInst64<dag outs, dag ins, string opcodestr, string argstr,
278+
list<dag> pattern, InstFormat format>
279+
: RVInstCommon<outs, ins, opcodestr, argstr, pattern, format> {
280+
field bits<64> Inst;
281+
field bits<64> SoftFail = 0;
282+
let Size = 8;
283+
}
284+
269285
// Pseudo instructions
270286
class Pseudo<dag outs, dag ins, list<dag> pattern, string opcodestr = "", string argstr = "">
271287
: RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatPseudo> {

llvm/lib/Target/RISCV/RISCVInstrInfo.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,8 @@ def uimm7 : RISCVUImmOp<7>;
241241
def uimm8 : RISCVUImmOp<8>;
242242
def uimm16 : RISCVUImmOp<16>;
243243
def uimm32 : RISCVUImmOp<32>;
244+
def uimm48 : RISCVUImmOp<48>;
245+
def uimm64 : RISCVUImmOp<64>;
244246
def simm12 : RISCVSImmLeafOp<12> {
245247
let MCOperandPredicate = [{
246248
int64_t Imm;
@@ -1155,6 +1157,16 @@ def Insn32 : RVInst<(outs), (ins uimm32:$value), "", "", [], InstFormatOther> {
11551157
let Inst{31-0} = value;
11561158
let AsmString = ".insn 0x4, $value";
11571159
}
1160+
def Insn48 : RVInst48<(outs), (ins uimm48:$value), "", "", [], InstFormatOther> {
1161+
bits<48> value;
1162+
let Inst{47-0} = value;
1163+
let AsmString = ".insn 0x6, $value";
1164+
}
1165+
def Insn64 : RVInst64<(outs), (ins uimm64:$value), "", "", [], InstFormatOther> {
1166+
bits<64> value;
1167+
let Inst{63-0} = value;
1168+
let AsmString = ".insn 0x8, $value";
1169+
}
11581170
}
11591171

11601172
// Use InstAliases to match these so that we can combine the insn and format

llvm/test/MC/RISCV/insn-invalid.s

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,30 @@
2626

2727
.insn . # CHECK: :[[@LINE]]:7: error: expected instruction format or an integer constant
2828
.insn 0x2, # CHECK: :[[@LINE]]:12: error: expected an integer constant
29-
.insn 0x2, 0xffff # CHECK: :[[@LINE]]:7: error: instruction length mismatch
30-
.insn 0x2, 0xffffffff # CHECK: :[[@LINE]]:7: error: instruction length mismatch
31-
.insn 0xffffffffff # CHECK: :[[@LINE]]:7: error: invalid operand for instruction
32-
.insn 0x0010 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
29+
3330
.insn 0x4, 0x13, 0 # CHECK: :[[@LINE]]:16: error: invalid operand for instruction
31+
32+
.insn 0x2, 0xffff # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
33+
.insn 0x2, 0xffffffff # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
34+
.insn 0xffffffffff # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
35+
36+
.insn 0x0, 0x0 # CHECK: :[[@LINE]]:7: error: instruction lengths must be a non-zero multiple of two
37+
.insn 0x1, 0xff # CHECK: :[[@LINE]]:7: error: instruction lengths must be a non-zero multiple of two
38+
.insn 10, 0x000007f # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
39+
40+
.insn 0x2, 0x03 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
41+
.insn 0x2, 0x1f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
42+
.insn 0x2, 0x3f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
43+
44+
.insn 0x4, 0x00000001 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
45+
46+
.insn 0x6, 0x000000000001 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
47+
.insn 0x8, 0x0000000000000001 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
48+
49+
.insn 0x2, 0x10001 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
50+
.insn 0x4, 0x100000003 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
51+
.insn 0x6, 0x100000000001f # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
52+
.insn 0x8, 0x1000000000000003f # CHECK: :[[@LINE]]:12: error: expected an integer constant
53+
54+
.insn 0x0010 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
55+
.insn 0x2, 0x0001 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed

llvm/test/MC/RISCV/insn.s

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,23 @@ target:
164164
# CHECK-ASM: encoding: [0x13,0x00,0x00,0x00]
165165
# CHECK-OBJ: addi zero, zero, 0x0
166166
.insn 0x4, 0x13
167+
168+
# CHECK-ASM: .insn 0x6, 31
169+
# CHECK-ASM: encoding: [0x1f,0x00,0x00,0x00,0x00,0x00]
170+
# CHECK-OBJ: <unknown>
171+
.insn 6, 0x1f
172+
173+
# CHECK-ASM: .insn 0x4, 65503
174+
# CHECK-ASM: encoding: [0xdf,0xff,0x00,0x00]
175+
# CHECK-OBJ: <unknown>
176+
.insn 0xffdf
177+
178+
# CHECK-ASM: .insn 0x8, 63
179+
# CHECK-ASM: encoding: [0x3f,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
180+
# CHECK-OBJ: <unknown>
181+
.insn 8, 0x3f
182+
183+
# CHECK-ASM: .insn 0x4, 65471
184+
# CHECK-ASM: encoding: [0xbf,0xff,0x00,0x00]
185+
# CHECK-OBJ: <unknown>
186+
.insn 0xffbf

llvm/test/MC/RISCV/insn_c-invalid.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@
2424
## Make fake mnemonics we use to match these in the tablegened asm match table isn't exposed.
2525
.insn_cr 2, 9, a0, a1 # CHECK: :[[#@LINE]]:1: error: unknown directive
2626

27-
.insn 0xfffffff0 # CHECK: :[[@LINE]]:7: error: invalid operand for instruction
27+
.insn 0xfffffff0 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction

0 commit comments

Comments
 (0)