aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIvan Enderlin <ivan.enderlin@hoa-project.net>2015-01-07 11:00:06 +0100
committerIvan Enderlin <ivan.enderlin@hoa-project.net>2015-01-07 11:00:06 +0100
commitc3e510b9aaf685e9bba2dcddd7ee23850faddfe4 (patch)
tree50d26f9ff978816d83370f96b350cdc67510702d
parentd8d6efcfe9a5e6ce6d384e7ab0f3ed917f43e055 (diff)
parent369111c0f904e25ea4156fdb50775448af1a9785 (diff)
downloadUstring-c3e510b9aaf685e9bba2dcddd7ee23850faddfe4.zip
Ustring-c3e510b9aaf685e9bba2dcddd7ee23850faddfe4.tar.gz
Ustring-c3e510b9aaf685e9bba2dcddd7ee23850faddfe4.tar.bz2
Merge branch 'getCharWidth' into incoming
-rw-r--r--String.php62
-rw-r--r--Test/Unit/String.php72
2 files changed, 134 insertions, 0 deletions
diff --git a/String.php b/String.php
index 6fefa20..be80ad0 100644
--- a/String.php
+++ b/String.php
@@ -862,6 +862,68 @@ class String implements \ArrayAccess, \Countable, \IteratorAggregate {
}
/**
+ * Get the number of column positions of a wide-character.
+ *
+ * This is a PHP implementation of wcwidth() and wcswidth() (defined in IEEE
+ * Std 1002.1-2001) for Unicode, by Markus Kuhn. Please, see
+ * http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c.
+ *
+ * The wcwidth(wc) function shall either return 0 (if wc is a null
+ * wide-character code), or return the number of column positions to be
+ * occupied by the wide-character code wc, or return -1 (if wc does not
+ * correspond to a printable wide-character code).
+ *
+ * @access public
+ * @param string $char Character.
+ * @return int
+ */
+ public static function getCharWidth ( $char ) {
+
+ $char = (string) $char;
+ $c = static::toCode($char);
+
+ // Test for 8-bit control characters.
+ if(0x0 === $c)
+ return 0;
+
+ if(0x20 > $c || (0x7f <= $c && $c < 0xa0))
+ return -1;
+
+ // Non-spacing characters.
+ if( 0xad !== $c
+ && 0 !== preg_match('#^[\p{Mn}\p{Me}\p{Cf}\x{1160}-\x{11ff}\x{200b}]#u', $char))
+ return 0;
+
+ // If we arrive here, $c is not a combining C0/C1 control character.
+ return 1 +
+ (0x1100 <= $c &&
+ (0x115f >= $c || // Hangul Jamo init. consonants
+ 0x2329 === $c || 0x232a === $c ||
+ (0x2e80 <= $c && 0xa4cf >= $c &&
+ 0x303f !== $c) || // CJK…Yi
+ (0xac00 <= $c && 0xd7a3 >= $c) || // Hangul Syllables
+ (0xf900 <= $c && 0xfaff >= $c) || // CJK Compatibility Ideographs
+ (0xfe10 <= $c && 0xfe19 >= $c) || // Vertical forms
+ (0xfe30 <= $c && 0xfe6f >= $c) || // CJK Compatibility Forms
+ (0xff00 <= $c && 0xff60 >= $c) || // Fullwidth Forms
+ (0xffe0 <= $c && 0xffe6 >= $c) ||
+ (0x20000 <= $c && 0x2fffd >= $c) ||
+ (0x30000 <= $c && 0x3fffd >= $c)));
+ }
+
+ /**
+ * Check whether the character is printable or not.
+ *
+ * @access public
+ * @param string $char Character.
+ * @return bool
+ */
+ public static function isCharPrintable ( $char ) {
+
+ return 1 <= static::getCharWidth($char);
+ }
+
+ /**
* Get a UTF-8 character from its decimal code representation.
*
* @access public
diff --git a/Test/Unit/String.php b/Test/Unit/String.php
index f3863b9..32d9001 100644
--- a/Test/Unit/String.php
+++ b/Test/Unit/String.php
@@ -744,6 +744,78 @@ class String extends Test\Unit\Suite {
->isEqualTo(LUT::RTL);
}
+ public function case_get_char_width ( ) {
+
+ $this
+ ->given(
+ $data = [
+ // 8-bit control character.
+ [0x0, 0],
+ [0x19, -1],
+ [0x7f, -1],
+ [0x9f, -1],
+
+ // Regular.
+ [0xa0, 1],
+
+ // Non-spacing characters mark.
+ [0x300, 0], // in Mn
+ [0x488, 0], // in Me
+ [0x600, 0], // in Cf
+ [0xad, 1], // in Cf, but the only exception
+ [0x1160, 0],
+ [0x11ff, 0],
+ [0x200b, 0],
+
+ // To test the last return statement.
+ [0x1100, 2],
+ [0x2160, 1],
+ [0x3f60, 2],
+ [0x303f, 1],
+ [0x2329, 2],
+ [0xaed0, 2],
+ [0x232a, 2],
+ [0xffa4, 1],
+ [0xfe10, 2],
+ [0xfe30, 2],
+ [0xff00, 2],
+ [0xf900, 2]
+ ]
+ )
+ ->when(function ( ) use ( $data ) {
+
+ foreach($data as $datum) {
+
+ list($code, $width) = $datum;
+
+ $this
+ ->when($result = LUT::getCharWidth(LUT::fromCode($code)))
+ ->then
+ ->integer($result)
+ ->isEqualTo($width);
+ }
+ });
+ }
+
+ public function case_is_char_printable ( ) {
+
+ $this
+ ->when($result = LUT::isCharPrintable(LUT::fromCode(0x7f)))
+ ->then
+ ->boolean($result)
+ ->isFalse()
+
+ ->when($result = LUT::isCharPrintable(LUT::fromCode(0xa0)))
+ ->then
+ ->boolean($result)
+ ->isTrue()
+
+ ->when($result = LUT::isCharPrintable(LUT::fromCode(0x1100)))
+ ->then
+ ->boolean($result)
+ ->isTrue();
+ }
+
public function case_from_code ( ) {
$this