-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[lldb] Use LLVM's helper for Unicode conversion (NFC) #112582
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-lldb Author: Jonas Devlieghere (JDevlieghere) ChangesThe codecvt header has been deprecated in C++17. Use locale to convert between std::string and std::wstring in Editline. Full diff: https://github.com/llvm/llvm-project/pull/112582.diff 2 Files Affected:
diff --git a/lldb/include/lldb/Host/Editline.h b/lldb/include/lldb/Host/Editline.h
index 9049b106f02a34..57e2c831e3499d 100644
--- a/lldb/include/lldb/Host/Editline.h
+++ b/lldb/include/lldb/Host/Editline.h
@@ -30,9 +30,6 @@
#include "lldb/Host/Config.h"
-#if LLDB_EDITLINE_USE_WCHAR
-#include <codecvt>
-#endif
#include <locale>
#include <sstream>
#include <vector>
@@ -366,9 +363,6 @@ class Editline {
void SetEditLinePromptCallback(EditlinePromptCallbackType callbackFn);
void SetGetCharacterFunction(EditlineGetCharCallbackType callbackFn);
-#if LLDB_EDITLINE_USE_WCHAR
- std::wstring_convert<std::codecvt_utf8<wchar_t>> m_utf8conv;
-#endif
::EditLine *m_editline = nullptr;
EditlineHistorySP m_history_sp;
bool m_in_history = false;
diff --git a/lldb/source/Host/common/Editline.cpp b/lldb/source/Host/common/Editline.cpp
index 561ec228cdb23f..99a5003e7a83db 100644
--- a/lldb/source/Host/common/Editline.cpp
+++ b/lldb/source/Host/common/Editline.cpp
@@ -76,6 +76,46 @@ using namespace lldb_private::line_editor;
#endif // #if LLDB_EDITLINE_USE_WCHAR
+#if LLDB_EDITLINE_USE_WCHAR
+std::string ToBytes(const std::wstring &in) {
+ static std::locale locale("C.UTF-8");
+ static const auto &cvt =
+ std::use_facet<std::codecvt<wchar_t, char, std::mbstate_t>>(locale);
+
+ const size_t length = in.length();
+ std::string output(length + 1, 0x0);
+
+ std::mbstate_t mbs{};
+ const wchar_t *in_next;
+ char *out_next;
+
+ if (cvt.out(mbs, in.data(), in.data() + length + 1, in_next, output.data(),
+ output.data() + output.length() + 1,
+ out_next) == std::codecvt_base::ok)
+ return output;
+ return {};
+}
+
+std::wstring FromBytes(const std::string &in) {
+ static std::locale locale("C.UTF-8");
+ static const auto &cvt =
+ std::use_facet<std::codecvt<wchar_t, char, std::mbstate_t>>(locale);
+
+ const size_t length = in.length();
+ std::wstring output(length + 1, 0x0);
+
+ std::mbstate_t mbs{};
+ const char *in_next;
+ wchar_t *out_next;
+
+ if (cvt.in(mbs, in.data(), in.data() + length + 1, in_next, output.data(),
+ output.data() + output.length() + 1,
+ out_next) == std::codecvt_base::ok)
+ return output;
+ return {};
+}
+#endif
+
bool IsOnlySpaces(const EditLineStringType &content) {
for (wchar_t ch : content) {
if (ch != EditLineCharType(' '))
@@ -444,7 +484,7 @@ StringList Editline::GetInputAsStringList(int line_count) {
if (line_count == 0)
break;
#if LLDB_EDITLINE_USE_WCHAR
- lines.AppendString(m_utf8conv.to_bytes(line));
+ lines.AppendString(ToBytes(line));
#else
lines.AppendString(line);
#endif
@@ -636,7 +676,7 @@ unsigned char Editline::BreakLineCommand(int ch) {
if (m_fix_indentation_callback) {
StringList lines = GetInputAsStringList(m_current_line_index + 1);
#if LLDB_EDITLINE_USE_WCHAR
- lines.AppendString(m_utf8conv.to_bytes(new_line_fragment));
+ lines.AppendString(ToBytes(new_line_fragment));
#else
lines.AppendString(new_line_fragment);
#endif
@@ -685,7 +725,7 @@ unsigned char Editline::EndOrAddLineCommand(int ch) {
for (unsigned index = 0; index < lines.GetSize(); index++) {
#if LLDB_EDITLINE_USE_WCHAR
m_input_lines.insert(m_input_lines.end(),
- m_utf8conv.from_bytes(lines[index]));
+ FromBytes(lines[index]));
#else
m_input_lines.insert(m_input_lines.end(), lines[index]);
#endif
@@ -869,7 +909,7 @@ unsigned char Editline::FixIndentationCommand(int ch) {
currentLine = currentLine.erase(0, -indent_correction);
}
#if LLDB_EDITLINE_USE_WCHAR
- m_input_lines[m_current_line_index] = m_utf8conv.from_bytes(currentLine);
+ m_input_lines[m_current_line_index] = FromBytes(currentLine);
#else
m_input_lines[m_current_line_index] = currentLine;
#endif
@@ -1502,7 +1542,7 @@ bool Editline::GetLine(std::string &line, bool &interrupted) {
} else {
m_history_sp->Enter(input);
#if LLDB_EDITLINE_USE_WCHAR
- line = m_utf8conv.to_bytes(SplitLines(input)[0]);
+ line = ToBytes(SplitLines(input)[0]);
#else
line = SplitLines(input)[0];
#endif
@@ -1574,7 +1614,8 @@ bool Editline::CompleteCharacter(char ch, EditLineGetCharType &out) {
out = (unsigned char)ch;
return true;
#else
- std::codecvt_utf8<wchar_t> cvt;
+ std::locale locale("C.UTF-8");
+ const auto &cvt = std::use_facet<std::codecvt<wchar_t, char, std::mbstate_t>>(locale);
llvm::SmallString<4> input;
for (;;) {
const char *from_next;
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you can't reproduce the buildbot errors, I can take a look, but I think the failures are likely due to the string length problems.
Actually (and, in retrospect, obviously) llvm already has functions for wstring<->string conversions, so it would be best to use those. |
The codecvt header has been deprecated in C++17. Use LLVM's unicode helpers to convert between UTF-8 and UTF-16.
1b39121
to
de19f43
Compare
@labath FYI this is ready for review. |
Friendly ping |
lldb/source/Host/common/Editline.cpp
Outdated
@@ -12,6 +12,8 @@ | |||
|
|||
#include "lldb/Host/Editline.h" | |||
|
|||
#include <codecvt> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess this is not needed now (?)
lldb/source/Host/common/Editline.cpp
Outdated
const char *cur_ptr = input.begin(); | ||
const char *end_ptr = input.end(); | ||
llvm::UTF32 code_point = 0; | ||
llvm::ConversionResult cr = llvm::convertUTF8Sequence( | ||
(const llvm::UTF8 **)&cur_ptr, (const llvm::UTF8 *)end_ptr, &code_point, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
const char *cur_ptr = input.begin(); | |
const char *end_ptr = input.end(); | |
llvm::UTF32 code_point = 0; | |
llvm::ConversionResult cr = llvm::convertUTF8Sequence( | |
(const llvm::UTF8 **)&cur_ptr, (const llvm::UTF8 *)end_ptr, &code_point, | |
auto *cur_ptr = reinterpret_cast<const llvm::UTF8 *>(input.begin()); | |
auto *end_ptr = reinterpret_cast<const llvm::UTF8 *>(input.end()); | |
llvm::UTF32 code_point = 0; | |
llvm::ConversionResult cr = llvm::convertUTF8Sequence( | |
&cur_ptr, end_ptr, &code_point, |
I think this is "less undefined" :P
The codecvt header has been deprecated in C++17. Use LLVM's unicode helpers to convert between UTF-8 and UTF-16.
The codecvt header has been deprecated in C++17. Use LLVM's unicode helpers to convert between UTF-8 and UTF-16. (cherry picked from commit 4927725)
…10109f1db2e53c60d35881 [lldb] Use LLVM's helper for Unicode conversion (NFC) (llvm#112582)
The codecvt header has been deprecated in C++17. Use LLVM's unicode
helpers to convert between UTF-8 and UTF-16.