Skip to content

Commit 3f8f974

Browse files
rwmjonesamodra
authored andcommitted
PR 31283 windmc: Parse input correctly on big endian hosts
On big endian hosts (eg. s390x) the windmc tool fails to parse even trivial files: $ cat test.mc ; $ ./binutils/windmc ./test.mc In test.mc at line 1: parser: syntax error. In test.mc at line 1: fatal: syntax error. The tool starts by reading the input as Windows CP1252 and then converting it internally into an array of UTF-16LE, which it then processes as an array of unsigned short (typedef unichar). There are lots of ways this is wrong, but in the specific case of big endian machines the little endian pairs of bytes are byte-swapped. For example, the ';' character in the input above is first converted to UTF16-LE byte sequence { 0x3b, 0x00 }, which is then cast to unsigned short. On a big endian machine the first unichar appears to be 0x3b00. The lexer is unable to recognize this as the comment character ((unichar)';') and so parsing fails. The simple fix is to convert the input to UTF-16BE on big endian machines (and do the reverse conversion when writing the output). Fixes: https://sourceware.org/bugzilla/show_bug.cgi?id=31283 Signed-off-by: Richard W.M. Jones <[email protected]>
1 parent 1668838 commit 3f8f974

File tree

4 files changed

+259
-4
lines changed

4 files changed

+259
-4
lines changed

binutils/config.in

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
#endif
88
#define __CONFIG_H__ 1
99

10+
/* Define if building universal (internal helper macro) */
11+
#undef AC_APPLE_UNIVERSAL_BUILD
12+
1013
/* Should ar and ranlib use -D behavior by default? */
1114
#undef DEFAULT_AR_DETERMINISTIC
1215

@@ -256,6 +259,18 @@
256259
/* Version number of package */
257260
#undef VERSION
258261

262+
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
263+
significant byte first (like Motorola and SPARC, unlike Intel). */
264+
#if defined AC_APPLE_UNIVERSAL_BUILD
265+
# if defined __BIG_ENDIAN__
266+
# define WORDS_BIGENDIAN 1
267+
# endif
268+
#else
269+
# ifndef WORDS_BIGENDIAN
270+
# undef WORDS_BIGENDIAN
271+
# endif
272+
#endif
273+
259274
/* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a
260275
`char[]'. */
261276
#undef YYTEXT_POINTER

binutils/configure

Lines changed: 228 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4725,6 +4725,231 @@ $as_echo "$ac_cv_safe_to_define___extensions__" >&6; }
47254725

47264726

47274727

4728+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
4729+
$as_echo_n "checking whether byte ordering is bigendian... " >&6; }
4730+
if ${ac_cv_c_bigendian+:} false; then :
4731+
$as_echo_n "(cached) " >&6
4732+
else
4733+
ac_cv_c_bigendian=unknown
4734+
# See if we're dealing with a universal compiler.
4735+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
4736+
/* end confdefs.h. */
4737+
#ifndef __APPLE_CC__
4738+
not a universal capable compiler
4739+
#endif
4740+
typedef int dummy;
4741+
4742+
_ACEOF
4743+
if ac_fn_c_try_compile "$LINENO"; then :
4744+
4745+
# Check for potential -arch flags. It is not universal unless
4746+
# there are at least two -arch flags with different values.
4747+
ac_arch=
4748+
ac_prev=
4749+
for ac_word in $CC $CFLAGS $CPPFLAGS $LDFLAGS; do
4750+
if test -n "$ac_prev"; then
4751+
case $ac_word in
4752+
i?86 | x86_64 | ppc | ppc64)
4753+
if test -z "$ac_arch" || test "$ac_arch" = "$ac_word"; then
4754+
ac_arch=$ac_word
4755+
else
4756+
ac_cv_c_bigendian=universal
4757+
break
4758+
fi
4759+
;;
4760+
esac
4761+
ac_prev=
4762+
elif test "x$ac_word" = "x-arch"; then
4763+
ac_prev=arch
4764+
fi
4765+
done
4766+
fi
4767+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
4768+
if test $ac_cv_c_bigendian = unknown; then
4769+
# See if sys/param.h defines the BYTE_ORDER macro.
4770+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
4771+
/* end confdefs.h. */
4772+
#include <sys/types.h>
4773+
#include <sys/param.h>
4774+
4775+
int
4776+
main ()
4777+
{
4778+
#if ! (defined BYTE_ORDER && defined BIG_ENDIAN \
4779+
&& defined LITTLE_ENDIAN && BYTE_ORDER && BIG_ENDIAN \
4780+
&& LITTLE_ENDIAN)
4781+
bogus endian macros
4782+
#endif
4783+
4784+
;
4785+
return 0;
4786+
}
4787+
_ACEOF
4788+
if ac_fn_c_try_compile "$LINENO"; then :
4789+
# It does; now see whether it defined to BIG_ENDIAN or not.
4790+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
4791+
/* end confdefs.h. */
4792+
#include <sys/types.h>
4793+
#include <sys/param.h>
4794+
4795+
int
4796+
main ()
4797+
{
4798+
#if BYTE_ORDER != BIG_ENDIAN
4799+
not big endian
4800+
#endif
4801+
4802+
;
4803+
return 0;
4804+
}
4805+
_ACEOF
4806+
if ac_fn_c_try_compile "$LINENO"; then :
4807+
ac_cv_c_bigendian=yes
4808+
else
4809+
ac_cv_c_bigendian=no
4810+
fi
4811+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
4812+
fi
4813+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
4814+
fi
4815+
if test $ac_cv_c_bigendian = unknown; then
4816+
# See if <limits.h> defines _LITTLE_ENDIAN or _BIG_ENDIAN (e.g., Solaris).
4817+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
4818+
/* end confdefs.h. */
4819+
#include <limits.h>
4820+
4821+
int
4822+
main ()
4823+
{
4824+
#if ! (defined _LITTLE_ENDIAN || defined _BIG_ENDIAN)
4825+
bogus endian macros
4826+
#endif
4827+
4828+
;
4829+
return 0;
4830+
}
4831+
_ACEOF
4832+
if ac_fn_c_try_compile "$LINENO"; then :
4833+
# It does; now see whether it defined to _BIG_ENDIAN or not.
4834+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
4835+
/* end confdefs.h. */
4836+
#include <limits.h>
4837+
4838+
int
4839+
main ()
4840+
{
4841+
#ifndef _BIG_ENDIAN
4842+
not big endian
4843+
#endif
4844+
4845+
;
4846+
return 0;
4847+
}
4848+
_ACEOF
4849+
if ac_fn_c_try_compile "$LINENO"; then :
4850+
ac_cv_c_bigendian=yes
4851+
else
4852+
ac_cv_c_bigendian=no
4853+
fi
4854+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
4855+
fi
4856+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
4857+
fi
4858+
if test $ac_cv_c_bigendian = unknown; then
4859+
# Compile a test program.
4860+
if test "$cross_compiling" = yes; then :
4861+
# Try to guess by grepping values from an object file.
4862+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
4863+
/* end confdefs.h. */
4864+
short int ascii_mm[] =
4865+
{ 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 };
4866+
short int ascii_ii[] =
4867+
{ 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 };
4868+
int use_ascii (int i) {
4869+
return ascii_mm[i] + ascii_ii[i];
4870+
}
4871+
short int ebcdic_ii[] =
4872+
{ 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 };
4873+
short int ebcdic_mm[] =
4874+
{ 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 };
4875+
int use_ebcdic (int i) {
4876+
return ebcdic_mm[i] + ebcdic_ii[i];
4877+
}
4878+
extern int foo;
4879+
4880+
int
4881+
main ()
4882+
{
4883+
return use_ascii (foo) == use_ebcdic (foo);
4884+
;
4885+
return 0;
4886+
}
4887+
_ACEOF
4888+
if ac_fn_c_try_compile "$LINENO"; then :
4889+
if grep BIGenDianSyS conftest.$ac_objext >/dev/null; then
4890+
ac_cv_c_bigendian=yes
4891+
fi
4892+
if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then
4893+
if test "$ac_cv_c_bigendian" = unknown; then
4894+
ac_cv_c_bigendian=no
4895+
else
4896+
# finding both strings is unlikely to happen, but who knows?
4897+
ac_cv_c_bigendian=unknown
4898+
fi
4899+
fi
4900+
fi
4901+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
4902+
else
4903+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
4904+
/* end confdefs.h. */
4905+
$ac_includes_default
4906+
int
4907+
main ()
4908+
{
4909+
4910+
/* Are we little or big endian? From Harbison&Steele. */
4911+
union
4912+
{
4913+
long int l;
4914+
char c[sizeof (long int)];
4915+
} u;
4916+
u.l = 1;
4917+
return u.c[sizeof (long int) - 1] == 1;
4918+
4919+
;
4920+
return 0;
4921+
}
4922+
_ACEOF
4923+
if ac_fn_c_try_run "$LINENO"; then :
4924+
ac_cv_c_bigendian=no
4925+
else
4926+
ac_cv_c_bigendian=yes
4927+
fi
4928+
rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
4929+
conftest.$ac_objext conftest.beam conftest.$ac_ext
4930+
fi
4931+
4932+
fi
4933+
fi
4934+
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_bigendian" >&5
4935+
$as_echo "$ac_cv_c_bigendian" >&6; }
4936+
case $ac_cv_c_bigendian in #(
4937+
yes)
4938+
$as_echo "#define WORDS_BIGENDIAN 1" >>confdefs.h
4939+
;; #(
4940+
no)
4941+
;; #(
4942+
universal)
4943+
4944+
$as_echo "#define AC_APPLE_UNIVERSAL_BUILD 1" >>confdefs.h
4945+
4946+
;; #(
4947+
*)
4948+
as_fn_error $? "unknown endianness
4949+
presetting ac_cv_c_bigendian=no (or yes) will help" "$LINENO" 5 ;;
4950+
esac
4951+
4952+
47284953
case `pwd` in
47294954
*\ * | *\ *)
47304955
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&5
@@ -10858,7 +11083,7 @@ else
1085811083
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1085911084
lt_status=$lt_dlunknown
1086011085
cat > conftest.$ac_ext <<_LT_EOF
10861-
#line 10861 "configure"
11086+
#line 11086 "configure"
1086211087
#include "confdefs.h"
1086311088

1086411089
#if HAVE_DLFCN_H
@@ -10964,7 +11189,7 @@ else
1096411189
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1096511190
lt_status=$lt_dlunknown
1096611191
cat > conftest.$ac_ext <<_LT_EOF
10967-
#line 10967 "configure"
11192+
#line 11192 "configure"
1096811193
#include "confdefs.h"
1096911194

1097011195
#if HAVE_DLFCN_H
@@ -16532,6 +16757,7 @@ if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then
1653216757
as_fn_error $? "conditional \"am__fastdepCC\" was never defined.
1653316758
Usually this means the macro was only invoked conditionally." "$LINENO" 5
1653416759
fi
16760+
1653516761
if test -z "${ENABLE_LIBCTF_TRUE}" && test -z "${ENABLE_LIBCTF_FALSE}"; then
1653616762
as_fn_error $? "conditional \"ENABLE_LIBCTF\" was never defined.
1653716763
Usually this means the macro was only invoked conditionally." "$LINENO" 5

binutils/configure.ac

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ AC_PROG_CC
3131
AC_GNU_SOURCE
3232
AC_USE_SYSTEM_EXTENSIONS
3333

34+
AC_C_BIGENDIAN
35+
3436
LT_INIT
3537
ACX_LARGEFILE
3638

binutils/winduni.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -771,7 +771,13 @@ wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
771771

772772
if (!mb || !iconv_name)
773773
return 0;
774-
iconv_t cd = iconv_open ("UTF-16LE", iconv_name);
774+
iconv_t cd = iconv_open (
775+
#if WORDS_BIGENDIAN
776+
"UTF-16BE",
777+
#else
778+
"UTF-16LE",
779+
#endif
780+
iconv_name);
775781

776782
while (1)
777783
{
@@ -844,7 +850,13 @@ wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_t
844850

845851
if (!u || !iconv_name)
846852
return 0;
847-
iconv_t cd = iconv_open (iconv_name, "UTF-16LE");
853+
iconv_t cd = iconv_open (iconv_name,
854+
#if WORDS_BIGENDIAN
855+
"UTF-16BE"
856+
#else
857+
"UTF-16LE"
858+
#endif
859+
);
848860

849861
while (1)
850862
{

0 commit comments

Comments
 (0)