Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions main/sal/qa/rtl/textenc/rtl_textcvt.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@
#include "sal/types.h"
#include "gtest/gtest.h"

/* The ISCII Devanagari converter was removed for licensing reasons (its table
was derived from LGPL code; see #i119141), so RTL_TEXTENCODING_ISCII_DEVANAGARI
is currently unimplemented and has no converter. The test data that exercises
it is kept below but compiled out. Define this macro to re-enable those
checks once an Apache-licensed ISCII converter is reintroduced. */
// #define TEST_ISCII_DEVANAGARI

namespace {

struct SingleByteCharSet {
Expand Down Expand Up @@ -1062,6 +1069,7 @@ TEST_F(Test, testSingleByte) {
0x0425,0x0418,0x0419,0x041A,0x041B,0x041C,0x041D,0x041E,
0x041F,0x042F,0x0420,0x0421,0x0422,0x0423,0x0416,0x0412,
0x042C,0x042B,0x0417,0x0428,0x042D,0x0429,0x0427,0x042A } },
#ifdef TEST_ISCII_DEVANAGARI
{ RTL_TEXTENCODING_ISCII_DEVANAGARI,
{ 0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
Expand Down Expand Up @@ -1095,6 +1103,7 @@ TEST_F(Test, testSingleByte) {
0x094D,0x093C,0x0964,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
0xFFFF,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
0x096D,0x096E,0x096F,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF } },
#endif
{ RTL_TEXTENCODING_ADOBE_STANDARD,
{ 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
Expand Down Expand Up @@ -1178,7 +1187,7 @@ TEST_F(Test, testSingleByte) {
0x2738,0x2739,0x273A,0x273B,0x273C,0x273D,0x273E,0x273F,
// 60
0x2740,0x2741,0x2742,0x2743,0x2744,0x2745,0x2746,0x2747,
0x2748,0x2749,0x274A,0x274B,0x27CF,0x274D,0x25A0,0x274F,
0x2748,0x2749,0x274A,0x274B,0x25CF,0x274D,0x25A0,0x274F,
0x2750,0x2751,0x2752,0x25B2,0x25BC,0x25C6,0x2756,0x25D7,
0x2758,0x2759,0x275A,0x275B,0x275C,0x275D,0x275E,0xFFFF,
// 80
Expand All @@ -1194,7 +1203,7 @@ TEST_F(Test, testSingleByte) {
// C0
0x2780,0x2781,0x2782,0x2783,0x2784,0x2785,0x2786,0x2787,
0x2788,0x2789,0x278A,0x278B,0x278C,0x278D,0x278E,0x278F,
0x2790,0x2791,0x2792,0x2793,0x2794,0x2795,0x2796,0x2797,
0x2790,0x2791,0x2792,0x2793,0x2794,0x2192,0x2194,0x2195,
0x2798,0x2799,0x279A,0x279B,0x279C,0x279D,0x279E,0x279F,
// E0
0x27A0,0x27A1,0x27A2,0x27A3,0x27A4,0x27A5,0x27A6,0x27A7,
Expand Down Expand Up @@ -2775,7 +2784,10 @@ TEST_F(Test, testWindows) {
{ 0, RTL_TEXTENCODING_DONTKNOW, true },
{ 0, RTL_TEXTENCODING_UCS4, true },
{ 0, RTL_TEXTENCODING_UCS2, true },
{ 0, RTL_TEXTENCODING_ISCII_DEVANAGARI, true }
// 57002 is the genuine Windows code page for ISCII Devanagari; the
// codepage<->encoding mapping is a fixed table independent of whether
// a converter is implemented (the converter was removed, see #i119141).
{ 57002, RTL_TEXTENCODING_ISCII_DEVANAGARI, true }
};
for (std::size_t i = 0; i < sizeof data / sizeof data[0]; ++i) {
OSL_ASSERT(data[i].codePage != 0 || data[i].reverse);
Expand Down Expand Up @@ -2832,8 +2844,10 @@ TEST_F(Test, testInfo) {
{ RTL_TEXTENCODING_IBM_861, RTL_TEXTENCODING_INFO_MIME, true },
{ RTL_TEXTENCODING_IBM_863, RTL_TEXTENCODING_INFO_MIME, true },
{ RTL_TEXTENCODING_IBM_865, RTL_TEXTENCODING_INFO_MIME, true },
#ifdef TEST_ISCII_DEVANAGARI
{ RTL_TEXTENCODING_ISCII_DEVANAGARI, RTL_TEXTENCODING_INFO_ASCII, true },
{ RTL_TEXTENCODING_ISCII_DEVANAGARI, RTL_TEXTENCODING_INFO_MIME, false },
#endif
{ RTL_TEXTENCODING_ADOBE_STANDARD, RTL_TEXTENCODING_INFO_ASCII, false },
{ RTL_TEXTENCODING_ADOBE_STANDARD, RTL_TEXTENCODING_INFO_MIME, true },
{ RTL_TEXTENCODING_ADOBE_STANDARD, RTL_TEXTENCODING_INFO_SYMBOL, false },
Expand Down
6 changes: 3 additions & 3 deletions main/sal/textenc/convertadobe.tab
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ adobeSymbolEncodingConverterData = {
rtl::textenc::BmpUnicodeToSingleByteRange const unicodeToAdobeDingbatsEncoding[]
= { { 0x0020, 0x0020 - 0x0020, 0x20 }, { 0x00A0, 0x00A0 - 0x00A0, 0x20 },
{ 0x2192, 0x2192 - 0x2192, 0xD5 }, { 0x2194, 0x2195 - 0x2194, 0xD6 },
{ 0x2460, 0x2469 - 0x2460, 0xAB },
{ 0x2460, 0x2469 - 0x2460, 0xAC },
{ 0x25A0, 0x25A0 - 0x25A0, 0x6E },
{ 0x25B2, 0x25B2 - 0x25B2, 0x73 }, { 0x25BC, 0x25BC - 0x25BC, 0x74 },
{ 0x25C6, 0x25C6 - 0x25C6, 0x75 }, { 0x25CF, 0x25CF - 0x25CF, 0x6C },
Expand Down Expand Up @@ -266,7 +266,7 @@ adobeDingbatsEncodingConverterData = {
0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, // 0x50
0x2738, 0x2739, 0x273A, 0x273B, 0x273C, 0x273D, 0x273E, 0x273F,
0x2740, 0x2741, 0x2742, 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, // 0x60
0x2748, 0x2749, 0x274A, 0x274B, 0x27CF, 0x274D, 0x25A0, 0x274F,
0x2748, 0x2749, 0x274A, 0x274B, 0x25CF, 0x274D, 0x25A0, 0x274F,
0x2750, 0x2751, 0x2752, 0x25B2, 0x25BC, 0x25C6, 0x2756, 0x25D7, // 0x70
0x2758, 0x2759, 0x275A, 0x275B, 0x275C, 0x275D, 0x275E, 0xFFFF,
0xF8D7, 0xF8D8, 0xF8D9, 0xF8DA, 0xF8DB, 0xF8DC, 0xF8DD, 0xF8DE, // 0x80
Expand All @@ -279,7 +279,7 @@ adobeDingbatsEncodingConverterData = {
0x2778, 0x2779, 0x277A, 0x277B, 0x277C, 0x277D, 0x277E, 0x277F,
0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786, 0x2787, // 0xC0
0x2788, 0x2789, 0x278A, 0x278B, 0x278C, 0x278D, 0x278E, 0x278F,
0x2790, 0x2791, 0x2792, 0x2793, 0x2794, 0x2795, 0x2796, 0x2797, // 0xD0
0x2790, 0x2791, 0x2792, 0x2793, 0x2794, 0x2192, 0x2194, 0x2195, // 0xD0
0x2798, 0x2799, 0x279A, 0x279B, 0x279C, 0x279D, 0x279E, 0x279F,
0x27A0, 0x27A1, 0x27A2, 0x27A3, 0x27A4, 0x27A5, 0x27A6, 0x27A7, // 0xE0
0x27A8, 0x27A9, 0x27AA, 0x27AB, 0x27AC, 0x27AD, 0x27AE, 0x27AF,
Expand Down
14 changes: 14 additions & 0 deletions main/sal/textenc/textenc.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,25 @@ Impl_getTextEncodingData(rtl_TextEncoding nEncoding) SAL_THROW_EXTERN_C()
&aImplBig5HkscsTextEncodingData, /* BIG5_HKSCS */
&aImplTis620TextEncodingData, /* TIS_620 */
&aImplKoi8UTextEncodingData, /* KOI8_U */
NULL, /* ISCII_DEVANAGARI: converter removed (#i119141), but the
slot MUST remain. This array is indexed positionally by
the (ABI-frozen) rtl_TextEncoding enum value, so a missing
slot shifts every following encoding onto the wrong
converter. */
&aImplJavaUtf8TextEncodingData, /* JAVA_UTF8 */
&adobeStandardEncodingData, /* ADOBE_STANDARD */
&adobeSymbolEncodingData, /* ADOBE_SYMBOL */
&aImplPT154TextEncodingData, /* PT154 */
&adobeDingbatsEncodingData }; /* ADOBE_DINGBATS */
/* aData[] is indexed positionally by the (ABI-frozen) rtl_TextEncoding
enum, so its length must equal the highest enum value plus one. Assert
this at compile time: dropping a slot (as happened to ISCII_DEVANAGARI,
#i119141) would otherwise silently shift every following encoding onto
the wrong converter, undetectable at run time because the bounds check
below just maps the now-out-of-range top encoding to NULL. */
typedef int Impl_textEncodingDataSizeCheck[
sizeof aData / sizeof aData[0]
== RTL_TEXTENCODING_ADOBE_DINGBATS + 1 ? 1 : -1];
return
nEncoding < sizeof aData / sizeof aData[0] ? aData[nEncoding] : NULL;
}