diff options
author | Ivan Enderlin <ivan.enderlin@hoa-project.net> | 2015-01-07 11:00:06 +0100 |
---|---|---|
committer | Ivan Enderlin <ivan.enderlin@hoa-project.net> | 2015-01-07 11:00:06 +0100 |
commit | c3e510b9aaf685e9bba2dcddd7ee23850faddfe4 (patch) | |
tree | 50d26f9ff978816d83370f96b350cdc67510702d | |
parent | d8d6efcfe9a5e6ce6d384e7ab0f3ed917f43e055 (diff) | |
parent | 369111c0f904e25ea4156fdb50775448af1a9785 (diff) | |
download | Ustring-c3e510b9aaf685e9bba2dcddd7ee23850faddfe4.zip Ustring-c3e510b9aaf685e9bba2dcddd7ee23850faddfe4.tar.gz Ustring-c3e510b9aaf685e9bba2dcddd7ee23850faddfe4.tar.bz2 |
Merge branch 'getCharWidth' into incoming
-rw-r--r-- | String.php | 62 | ||||
-rw-r--r-- | Test/Unit/String.php | 72 |
2 files changed, 134 insertions, 0 deletions
@@ -862,6 +862,68 @@ class String implements \ArrayAccess, \Countable, \IteratorAggregate { } /** + * Get the number of column positions of a wide-character. + * + * This is a PHP implementation of wcwidth() and wcswidth() (defined in IEEE + * Std 1002.1-2001) for Unicode, by Markus Kuhn. Please, see + * http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c. + * + * The wcwidth(wc) function shall either return 0 (if wc is a null + * wide-character code), or return the number of column positions to be + * occupied by the wide-character code wc, or return -1 (if wc does not + * correspond to a printable wide-character code). + * + * @access public + * @param string $char Character. + * @return int + */ + public static function getCharWidth ( $char ) { + + $char = (string) $char; + $c = static::toCode($char); + + // Test for 8-bit control characters. + if(0x0 === $c) + return 0; + + if(0x20 > $c || (0x7f <= $c && $c < 0xa0)) + return -1; + + // Non-spacing characters. + if( 0xad !== $c + && 0 !== preg_match('#^[\p{Mn}\p{Me}\p{Cf}\x{1160}-\x{11ff}\x{200b}]#u', $char)) + return 0; + + // If we arrive here, $c is not a combining C0/C1 control character. + return 1 + + (0x1100 <= $c && + (0x115f >= $c || // Hangul Jamo init. consonants + 0x2329 === $c || 0x232a === $c || + (0x2e80 <= $c && 0xa4cf >= $c && + 0x303f !== $c) || // CJK…Yi + (0xac00 <= $c && 0xd7a3 >= $c) || // Hangul Syllables + (0xf900 <= $c && 0xfaff >= $c) || // CJK Compatibility Ideographs + (0xfe10 <= $c && 0xfe19 >= $c) || // Vertical forms + (0xfe30 <= $c && 0xfe6f >= $c) || // CJK Compatibility Forms + (0xff00 <= $c && 0xff60 >= $c) || // Fullwidth Forms + (0xffe0 <= $c && 0xffe6 >= $c) || + (0x20000 <= $c && 0x2fffd >= $c) || + (0x30000 <= $c && 0x3fffd >= $c))); + } + + /** + * Check whether the character is printable or not. + * + * @access public + * @param string $char Character. + * @return bool + */ + public static function isCharPrintable ( $char ) { + + return 1 <= static::getCharWidth($char); + } + + /** * Get a UTF-8 character from its decimal code representation. * * @access public diff --git a/Test/Unit/String.php b/Test/Unit/String.php index f3863b9..32d9001 100644 --- a/Test/Unit/String.php +++ b/Test/Unit/String.php @@ -744,6 +744,78 @@ class String extends Test\Unit\Suite { ->isEqualTo(LUT::RTL); } + public function case_get_char_width ( ) { + + $this + ->given( + $data = [ + // 8-bit control character. + [0x0, 0], + [0x19, -1], + [0x7f, -1], + [0x9f, -1], + + // Regular. + [0xa0, 1], + + // Non-spacing characters mark. + [0x300, 0], // in Mn + [0x488, 0], // in Me + [0x600, 0], // in Cf + [0xad, 1], // in Cf, but the only exception + [0x1160, 0], + [0x11ff, 0], + [0x200b, 0], + + // To test the last return statement. + [0x1100, 2], + [0x2160, 1], + [0x3f60, 2], + [0x303f, 1], + [0x2329, 2], + [0xaed0, 2], + [0x232a, 2], + [0xffa4, 1], + [0xfe10, 2], + [0xfe30, 2], + [0xff00, 2], + [0xf900, 2] + ] + ) + ->when(function ( ) use ( $data ) { + + foreach($data as $datum) { + + list($code, $width) = $datum; + + $this + ->when($result = LUT::getCharWidth(LUT::fromCode($code))) + ->then + ->integer($result) + ->isEqualTo($width); + } + }); + } + + public function case_is_char_printable ( ) { + + $this + ->when($result = LUT::isCharPrintable(LUT::fromCode(0x7f))) + ->then + ->boolean($result) + ->isFalse() + + ->when($result = LUT::isCharPrintable(LUT::fromCode(0xa0))) + ->then + ->boolean($result) + ->isTrue() + + ->when($result = LUT::isCharPrintable(LUT::fromCode(0x1100))) + ->then + ->boolean($result) + ->isTrue(); + } + public function case_from_code ( ) { $this |