|
| 1 | +//===- StringTable.h - Table of strings tracked by offset ----------C++ -*-===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | + |
| 9 | +#ifndef LLVM_ADT_STRING_TABLE_H |
| 10 | +#define LLVM_ADT_STRING_TABLE_H |
| 11 | + |
| 12 | +#include "llvm/ADT/StringRef.h" |
| 13 | + |
| 14 | +namespace llvm { |
| 15 | + |
| 16 | +/// A table of densely packed, null-terminated strings indexed by offset. |
| 17 | +/// |
| 18 | +/// This table abstracts a densely concatenated list of null-terminated strings, |
| 19 | +/// each of which can be referenced using an offset into the table. |
| 20 | +/// |
| 21 | +/// This requires and ensures that the string at offset 0 is also the empty |
| 22 | +/// string. This helps allow zero-initialized offsets form empty strings and |
| 23 | +/// avoids non-zero initialization when using a string literal pointer would |
| 24 | +/// allow a null pointer. |
| 25 | +/// |
| 26 | +/// The primary use case is having a single global string literal for the table |
| 27 | +/// contents, and offsets into it in other global data structures to avoid |
| 28 | +/// dynamic relocations of individual string literal pointers in those global |
| 29 | +/// data structures. |
| 30 | +class StringTable { |
| 31 | + StringRef Table; |
| 32 | + |
| 33 | +public: |
| 34 | + // An offset into one of these packed string tables, used to select a string |
| 35 | + // within the table. |
| 36 | + // |
| 37 | + // Typically these are created by TableGen or other code generator from |
| 38 | + // computed offsets, and it just wraps that integer into a type until it is |
| 39 | + // used with the relevant table. |
| 40 | + // |
| 41 | + // We also ensure that the empty string is at offset zero and default |
| 42 | + // constructing this class gives you an offset of zero. This makes default |
| 43 | + // constructing this type work similarly (after indexing the table) to default |
| 44 | + // constructing a `StringRef`. |
| 45 | + class Offset { |
| 46 | + // Note that we ensure the empty string is at offset zero. |
| 47 | + unsigned Value = 0; |
| 48 | + |
| 49 | + public: |
| 50 | + Offset() = default; |
| 51 | + Offset(unsigned Value) : Value(Value) {} |
| 52 | + |
| 53 | + unsigned value() const { return Value; } |
| 54 | + }; |
| 55 | + |
| 56 | + // We directly handle string literals with a templated converting constructor |
| 57 | + // because we *don't* want to do `strlen` on them -- we fully expect null |
| 58 | + // bytes in this input. This is somewhat the opposite of how `StringLiteral` |
| 59 | + // works. |
| 60 | + template <size_t N> |
| 61 | + constexpr StringTable(const char (&RawTable)[N]) : Table(RawTable, N) { |
| 62 | + assert(!Table.empty() && "Requires at least a valid empty string."); |
| 63 | + assert(Table[0] == '\0' && "Offset zero must be the empty string."); |
| 64 | + // Ensure that `strlen` from any offset cannot overflow the end of the table |
| 65 | + // by insisting on a null byte at the end. |
| 66 | + assert(Table.back() == '\0' && "Last byte must be a null byte."); |
| 67 | + } |
| 68 | + |
| 69 | + // Get a string from the table starting with the provided offset. The returned |
| 70 | + // `StringRef` is in fact null terminated, and so can be converted safely to a |
| 71 | + // C-string if necessary for a system API. |
| 72 | + StringRef operator[](Offset O) const { |
| 73 | + assert(O.value() < Table.size() && "Out of bounds offset!"); |
| 74 | + return Table.data() + O.value(); |
| 75 | + } |
| 76 | + |
| 77 | + /// Returns the byte size of the table. |
| 78 | + size_t size() const { return Table.size(); } |
| 79 | +}; |
| 80 | + |
| 81 | +} // namespace llvm |
| 82 | + |
| 83 | +#endif // LLVM_ADT_STRING_TABLE_H |
0 commit comments