Skip to content

Commit fb559fb

Browse files
committed
Refactor legacy assemble to also use RAII instruction location construction
1 parent c07944d commit fb559fb

File tree

1 file changed

+57
-101
lines changed

1 file changed

+57
-101
lines changed

libevmasm/Assembly.cpp

+57-101
Original file line numberDiff line numberDiff line change
@@ -1341,163 +1341,119 @@ LinkerObject const& Assembly::assembleLegacy() const
13411341
uint8_t dataRefPush = static_cast<uint8_t>(pushInstruction(bytesPerDataRef));
13421342

13431343
LinkerObject::CodeSectionLocation codeSectionLocation;
1344+
codeSectionLocation.instructionLocations.reserve(items.size());
13441345
codeSectionLocation.start = 0;
1345-
size_t assemblyItemIndex = 0;
1346-
auto assembleInstruction = [&](auto&& _addInstruction) {
1347-
size_t start = ret.bytecode.size();
1348-
_addInstruction();
1349-
size_t end = ret.bytecode.size();
1350-
codeSectionLocation.instructionLocations.emplace_back(
1351-
LinkerObject::InstructionLocation{
1352-
.start = start,
1353-
.end = end,
1354-
.assemblyItemIndex = assemblyItemIndex
1355-
}
1356-
);
1357-
};
1358-
for (AssemblyItem const& item: items)
1346+
for (auto const& [assemblyItemIndex, item]: items | ranges::views::enumerate)
13591347
{
1348+
// collect instruction locations via side effects
1349+
AddInstructionLocation addInstructionLocation(codeSectionLocation.instructionLocations, ret.bytecode, assemblyItemIndex);
13601350
// store position of the invalid jump destination
13611351
if (item.type() != Tag && m_tagPositionsInBytecode[0] == std::numeric_limits<size_t>::max())
13621352
m_tagPositionsInBytecode[0] = ret.bytecode.size();
13631353

13641354
switch (item.type())
13651355
{
13661356
case Operation:
1367-
assembleInstruction([&](){
1368-
ret.bytecode += assembleOperation(item);
1369-
});
1357+
ret.bytecode += assembleOperation(item);
13701358
break;
13711359
case Push:
1372-
assembleInstruction([&](){
1373-
ret.bytecode += assemblePush(item);
1374-
});
1360+
ret.bytecode += assemblePush(item);
13751361
break;
13761362
case PushTag:
1377-
{
1378-
assembleInstruction([&](){
1379-
ret.bytecode.push_back(tagPush);
1380-
tagRefs[ret.bytecode.size()] = item.splitForeignPushTag();
1381-
ret.bytecode.resize(ret.bytecode.size() + bytesPerTag);
1382-
});
1363+
ret.bytecode.push_back(tagPush);
1364+
tagRefs[ret.bytecode.size()] = item.splitForeignPushTag();
1365+
ret.bytecode.resize(ret.bytecode.size() + bytesPerTag);
13831366
break;
1384-
}
13851367
case PushData:
1386-
assembleInstruction([&]() {
1387-
ret.bytecode.push_back(dataRefPush);
1388-
dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size()));
1389-
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1390-
});
1368+
ret.bytecode.push_back(dataRefPush);
1369+
dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size()));
1370+
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
13911371
break;
13921372
case PushSub:
1393-
assembleInstruction([&]() {
1394-
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1395-
ret.bytecode.push_back(dataRefPush);
1396-
subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size()));
1397-
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1398-
});
1373+
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1374+
ret.bytecode.push_back(dataRefPush);
1375+
subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size()));
1376+
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
13991377
break;
14001378
case PushSubSize:
14011379
{
1402-
assembleInstruction([&](){
1403-
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1404-
auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size();
1405-
item.setPushedValue(u256(s));
1406-
unsigned b = std::max<unsigned>(1, numberEncodingSize(s));
1407-
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b)));
1408-
ret.bytecode.resize(ret.bytecode.size() + b);
1409-
bytesRef byr(&ret.bytecode.back() + 1 - b, b);
1410-
toBigEndian(s, byr);
1411-
});
1380+
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1381+
auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size();
1382+
item.setPushedValue(u256(s));
1383+
unsigned b = std::max<unsigned>(1, numberEncodingSize(s));
1384+
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b)));
1385+
ret.bytecode.resize(ret.bytecode.size() + b);
1386+
bytesRef byr(&ret.bytecode.back() + 1 - b, b);
1387+
toBigEndian(s, byr);
14121388
break;
14131389
}
14141390
case PushProgramSize:
1415-
{
1416-
assembleInstruction([&](){
1417-
ret.bytecode.push_back(dataRefPush);
1418-
sizeRefs.push_back(static_cast<unsigned>(ret.bytecode.size()));
1419-
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1420-
});
1391+
ret.bytecode.push_back(dataRefPush);
1392+
sizeRefs.push_back(static_cast<unsigned>(ret.bytecode.size()));
1393+
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
14211394
break;
1422-
}
14231395
case PushLibraryAddress:
14241396
{
1425-
assembleInstruction([&]() {
1426-
auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size());
1427-
ret.bytecode += bytecode;
1428-
ret.linkReferences.insert(linkRef);
1429-
});
1397+
auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size());
1398+
ret.bytecode += bytecode;
1399+
ret.linkReferences.insert(linkRef);
14301400
break;
14311401
}
14321402
case PushImmutable:
1433-
assembleInstruction([&]() {
1434-
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH32));
1435-
// Maps keccak back to the "identifier" std::string of that immutable.
1436-
ret.immutableReferences[item.data()].first = m_immutables.at(item.data());
1437-
// Record the bytecode offset of the PUSH32 argument.
1438-
ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size());
1439-
// Advance bytecode by 32 bytes (default initialized).
1440-
ret.bytecode.resize(ret.bytecode.size() + 32);
1441-
});
1403+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH32));
1404+
// Maps keccak back to the "identifier" std::string of that immutable.
1405+
ret.immutableReferences[item.data()].first = m_immutables.at(item.data());
1406+
// Record the bytecode offset of the PUSH32 argument.
1407+
ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size());
1408+
// Advance bytecode by 32 bytes (default initialized).
1409+
ret.bytecode.resize(ret.bytecode.size() + 32);
14421410
break;
14431411
case VerbatimBytecode:
14441412
ret.bytecode += assembleVerbatimBytecode(item);
14451413
break;
14461414
case AssignImmutable:
14471415
{
1416+
// this decomposes into multiple evm instructions, so we manually call emit on `addInstructionLocation`
14481417
// Expect 2 elements on stack (source, dest_base)
14491418
auto const& offsets = immutableReferencesBySub[item.data()].second;
14501419
for (size_t i = 0; i < offsets.size(); ++i)
14511420
{
14521421
if (i != offsets.size() - 1)
14531422
{
1454-
assembleInstruction([&]() {
1455-
ret.bytecode.push_back(uint8_t(Instruction::DUP2));
1456-
});
1457-
assembleInstruction([&]() {
1458-
ret.bytecode.push_back(uint8_t(Instruction::DUP2));
1459-
});
1423+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::DUP2));
1424+
addInstructionLocation.emit();
1425+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::DUP2));
1426+
addInstructionLocation.emit();
14601427
}
1461-
assembleInstruction([&]() {
1462-
// TODO: should we make use of the constant optimizer methods for pushing the offsets?
1463-
bytes offsetBytes = toCompactBigEndian(u256(offsets[i]));
1464-
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(static_cast<unsigned>(offsetBytes.size()))));
1465-
ret.bytecode += offsetBytes;
1466-
});
1467-
assembleInstruction([&]() {
1468-
ret.bytecode.push_back(uint8_t(Instruction::ADD));
1469-
});
1470-
assembleInstruction([&]() {
1471-
ret.bytecode.push_back(uint8_t(Instruction::MSTORE));
1472-
});
1428+
// TODO: should we make use of the constant optimizer methods for pushing the offsets?
1429+
bytes offsetBytes = toCompactBigEndian(u256(offsets[i]));
1430+
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(static_cast<unsigned>(offsetBytes.size()))));
1431+
ret.bytecode += offsetBytes;
1432+
addInstructionLocation.emit();
1433+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::ADD));
1434+
addInstructionLocation.emit();
1435+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::MSTORE));
1436+
// no emit needed here, it's taken care of by the destructor of addInstructionLocation
14731437
}
14741438
if (offsets.empty())
14751439
{
1476-
assembleInstruction([&]() {
1477-
ret.bytecode.push_back(uint8_t(Instruction::POP));
1478-
});
1479-
assembleInstruction([&]() {
1480-
ret.bytecode.push_back(uint8_t(Instruction::POP));
1481-
});
1440+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::POP));
1441+
addInstructionLocation.emit();
1442+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::POP));
1443+
// no emit needed here, it's taken care of by the destructor of addInstructionLocation
14821444
}
14831445
immutableReferencesBySub.erase(item.data());
14841446
break;
14851447
}
14861448
case PushDeployTimeAddress:
1487-
assembleInstruction([&]() {
1488-
ret.bytecode += assemblePushDeployTimeAddress();
1489-
});
1449+
ret.bytecode += assemblePushDeployTimeAddress();
14901450
break;
14911451
case Tag:
1492-
assembleInstruction([&](){
1493-
ret.bytecode += assembleTag(item, ret.bytecode.size(), true);
1494-
});
1452+
ret.bytecode += assembleTag(item, ret.bytecode.size(), true);
14951453
break;
14961454
default:
14971455
solAssert(false, "Unexpected opcode while assembling.");
14981456
}
1499-
1500-
++assemblyItemIndex;
15011457
}
15021458

15031459
codeSectionLocation.end = ret.bytecode.size();

0 commit comments

Comments
 (0)