const char *str;
size_t len;
size_t offset;
+ bool posix;
bool init;
bool prev_alnum;
};
{
pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
wbstate->offset);
- bool curr_alnum = pg_u_isalnum(u, true);
+ bool curr_alnum = pg_u_isalnum(u, wbstate->posix);
if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
{
.str = src,
.len = srclen,
.offset = 0,
+ .posix = !locale->info.builtin.casemap_full,
.init = false,
.prev_alnum = false,
};
const char *str;
size_t len;
size_t offset;
+ bool posix;
bool init;
bool prev_alnum;
};
{
pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
wbstate->offset);
- bool curr_alnum = pg_u_isalnum(u, true);
+ bool curr_alnum = pg_u_isalnum(u, wbstate->posix);
if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
{
char icu_upper[BUFSZ];
char icu_fold[BUFSZ];
UErrorCode status;
+
+ /* full case mapping doesn't use posix semantics */
struct WordBoundaryState wbstate = {
.str = str,
.len = strlen(str),
.offset = 0,
+ .posix = false,
.init = false,
.prev_alnum = false,
};
test_convert(tfunc_lower, "σς'Σ' ΣΣ'Σ'", "σς'ς' σσ'ς'");
test_convert(tfunc_title, "σςΣ ΣΣΣ", "Σςς Σσς");
test_convert(tfunc_fold, "σςΣ ΣΣΣ", "σσσ σσσ");
+ /* test that alphanumerics are word characters */
+ test_convert(tfunc_title, "λλ", "Λλ");
+ test_convert(tfunc_title, "1a", "1a");
+ /* U+FF11 FULLWIDTH ONE is alphanumeric for full case mapping */
+ test_convert(tfunc_title, "\uFF11a", "\uFF11a");
+
#ifdef USE_ICU
icu_test_full("");
icu_test_full("abc 123xyz");
icu_test_full("σςΣ ΣΣΣ");
icu_test_full("ıiIİ");
+ icu_test_full("\uFF11a");
/* test <alpha><iota_subscript><acute> */
icu_test_full("\u0391\u0345\u0301");
#endif
('abc DEF 123abc'),
('ábc sßs ßss DÉF'),
('DŽxxDŽ džxxDž Džxxdž'),
+ (U&'Λλ 1a \FF11a'),
('ȺȺȺ'),
('ⱥⱥⱥ'),
('ⱥȺ');
abc DEF 123abc | abc def 123abc | Abc Def 123abc | ABC DEF 123ABC | 14 | 14 | 14 | 14
ábc sßs ßss DÉF | ábc sßs ßss déf | Ábc Sßs ßss Déf | ÁBC SßS ßSS DÉF | 19 | 19 | 19 | 19
DŽxxDŽ džxxDž Džxxdž | džxxdž džxxdž džxxdž | DŽxxdž DŽxxdž DŽxxdž | DŽXXDŽ DŽXXDŽ DŽXXDŽ | 20 | 20 | 20 | 20
+ Λλ 1a 1a | λλ 1a 1a | Λλ 1a 1A | ΛΛ 1A 1A | 12 | 12 | 12 | 12
ȺȺȺ | ⱥⱥⱥ | Ⱥⱥⱥ | ȺȺȺ | 6 | 9 | 8 | 6
ⱥⱥⱥ | ⱥⱥⱥ | Ⱥⱥⱥ | ȺȺȺ | 9 | 9 | 8 | 6
ⱥȺ | ⱥⱥ | Ⱥⱥ | ȺȺ | 5 | 6 | 5 | 4
-(6 rows)
+(7 rows)
DROP TABLE test_pg_c_utf8;
-- negative test: Final_Sigma not used for builtin locale C.UTF-8
('abc DEF 123abc'),
('ábc sßs ßss DÉF'),
('DŽxxDŽ džxxDž Džxxdž'),
+ (U&'Λλ 1a \FF11a'),
('ȺȺȺ'),
('ⱥⱥⱥ'),
('ⱥȺ');
abc DEF 123abc | abc def 123abc | Abc Def 123abc | ABC DEF 123ABC | 14 | 14 | 14 | 14
ábc sßs ßss DÉF | ábc sßs ßss déf | Ábc Sßs Ssss Déf | ÁBC SSSS SSSS DÉF | 19 | 19 | 19 | 19
DŽxxDŽ džxxDž Džxxdž | džxxdž džxxdž džxxdž | Džxxdž Džxxdž Džxxdž | DŽXXDŽ DŽXXDŽ DŽXXDŽ | 20 | 20 | 20 | 20
+ Λλ 1a 1a | λλ 1a 1a | Λλ 1a 1a | ΛΛ 1A 1A | 12 | 12 | 12 | 12
ȺȺȺ | ⱥⱥⱥ | Ⱥⱥⱥ | ȺȺȺ | 6 | 9 | 8 | 6
ⱥⱥⱥ | ⱥⱥⱥ | Ⱥⱥⱥ | ȺȺȺ | 9 | 9 | 8 | 6
ⱥȺ | ⱥⱥ | Ⱥⱥ | ȺȺ | 5 | 6 | 5 | 4
-(6 rows)
+(7 rows)
DROP TABLE test_pg_unicode_fast;
-- test Final_Sigma