comparison cake/libs/multibyte.php @ 0:261e66bd5a0c

hg init
author Shoshi TAMAKI <shoshi@cr.ie.u-ryukyu.ac.jp>
date Sun, 24 Jul 2011 21:08:31 +0900
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:261e66bd5a0c
1 <?php
2 /**
3 * Multibyte handling methods.
4 *
5 *
6 * PHP versions 4 and 5
7 *
8 * CakePHP(tm) : Rapid Development Framework (http://cakephp.org)
9 * Copyright 2005-2010, Cake Software Foundation, Inc. (http://cakefoundation.org)
10 *
11 * Licensed under The MIT License
12 * Redistributions of files must retain the above copyright notice.
13 *
14 * @copyright Copyright 2005-2010, Cake Software Foundation, Inc. (http://cakefoundation.org)
15 * @link http://cakephp.org CakePHP(tm) Project
16 * @package cake
17 * @subpackage cake.cake.libs
18 * @since CakePHP(tm) v 1.2.0.6833
19 * @license MIT License (http://www.opensource.org/licenses/mit-license.php)
20 */
21 if (function_exists('mb_internal_encoding')) {
22 $encoding = Configure::read('App.encoding');
23 if (!empty($encoding)) {
24 mb_internal_encoding($encoding);
25 }
26 }
27
28 /**
29 * Find position of first occurrence of a case-insensitive string.
30 *
31 * @param string $haystack The string from which to get the position of the first occurrence of $needle.
32 * @param string $needle The string to find in $haystack.
33 * @param integer $offset The position in $haystack to start searching.
34 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
35 * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string, or false
36 * if $needle is not found.
37 */
38 if (!function_exists('mb_stripos')) {
39 function mb_stripos($haystack, $needle, $offset = 0, $encoding = null) {
40 return Multibyte::stripos($haystack, $needle, $offset);
41 }
42 }
43
44 /**
45 * Finds first occurrence of a string within another, case insensitive.
46 *
47 * @param string $haystack The string from which to get the first occurrence of $needle.
48 * @param string $needle The string to find in $haystack.
49 * @param boolean $part Determines which portion of $haystack this function returns.
50 * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
51 * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
52 * Default value is false.
53 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
54 * @return string|boolean The portion of $haystack, or false if $needle is not found.
55 */
56 if (!function_exists('mb_stristr')) {
57 function mb_stristr($haystack, $needle, $part = false, $encoding = null) {
58 return Multibyte::stristr($haystack, $needle, $part);
59 }
60 }
61
62 /**
63 * Get string length.
64 *
65 * @param string $string The string being checked for length.
66 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
67 * @return integer The number of characters in string $string having character encoding encoding.
68 * A multi-byte character is counted as 1.
69 */
70 if (!function_exists('mb_strlen')) {
71 function mb_strlen($string, $encoding = null) {
72 return Multibyte::strlen($string);
73 }
74 }
75
76 /**
77 * Find position of first occurrence of a string.
78 *
79 * @param string $haystack The string being checked.
80 * @param string $needle The position counted from the beginning of haystack.
81 * @param integer $offset The search offset. If it is not specified, 0 is used.
82 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
83 * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string.
84 * If $needle is not found, it returns false.
85 */
86 if (!function_exists('mb_strpos')) {
87 function mb_strpos($haystack, $needle, $offset = 0, $encoding = null) {
88 return Multibyte::strpos($haystack, $needle, $offset);
89 }
90 }
91
92 /**
93 * Finds the last occurrence of a character in a string within another.
94 *
95 * @param string $haystack The string from which to get the last occurrence of $needle.
96 * @param string $needle The string to find in $haystack.
97 * @param boolean $part Determines which portion of $haystack this function returns.
98 * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
99 * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
100 * Default value is false.
101 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
102 * @return string|boolean The portion of $haystack. or false if $needle is not found.
103 */
104 if (!function_exists('mb_strrchr')) {
105 function mb_strrchr($haystack, $needle, $part = false, $encoding = null) {
106 return Multibyte::strrchr($haystack, $needle, $part);
107 }
108 }
109
110 /**
111 * Finds the last occurrence of a character in a string within another, case insensitive.
112 *
113 * @param string $haystack The string from which to get the last occurrence of $needle.
114 * @param string $needle The string to find in $haystack.
115 * @param boolean $part Determines which portion of $haystack this function returns.
116 * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
117 * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
118 * Default value is false.
119 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
120 * @return string|boolean The portion of $haystack. or false if $needle is not found.
121 */
122 if (!function_exists('mb_strrichr')) {
123 function mb_strrichr($haystack, $needle, $part = false, $encoding = null) {
124 return Multibyte::strrichr($haystack, $needle, $part);
125 }
126 }
127
128 /**
129 * Finds position of last occurrence of a string within another, case insensitive
130 *
131 * @param string $haystack The string from which to get the position of the last occurrence of $needle.
132 * @param string $needle The string to find in $haystack.
133 * @param integer $offset The position in $haystack to start searching.
134 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
135 * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string,
136 * or false if $needle is not found.
137 */
138 if (!function_exists('mb_strripos')) {
139 function mb_strripos($haystack, $needle, $offset = 0, $encoding = null) {
140 return Multibyte::strripos($haystack, $needle, $offset);
141 }
142 }
143
144 /**
145 * Find position of last occurrence of a string in a string.
146 *
147 * @param string $haystack The string being checked, for the last occurrence of $needle.
148 * @param string $needle The string to find in $haystack.
149 * @param integer $offset May be specified to begin searching an arbitrary number of characters into the string.
150 * Negative values will stop searching at an arbitrary point prior to the end of the string.
151 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
152 * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string.
153 * If $needle is not found, it returns false.
154 */
155 if (!function_exists('mb_strrpos')) {
156 function mb_strrpos($haystack, $needle, $offset = 0, $encoding = null) {
157 return Multibyte::strrpos($haystack, $needle, $offset);
158 }
159 }
160
161 /**
162 * Finds first occurrence of a string within another
163 *
164 * @param string $haystack The string from which to get the first occurrence of $needle.
165 * @param string $needle The string to find in $haystack
166 * @param boolean $part Determines which portion of $haystack this function returns.
167 * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
168 * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
169 * Default value is FALSE.
170 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
171 * @return string|boolean The portion of $haystack, or true if $needle is not found.
172 */
173 if (!function_exists('mb_strstr')) {
174 function mb_strstr($haystack, $needle, $part = false, $encoding = null) {
175 return Multibyte::strstr($haystack, $needle, $part);
176 }
177 }
178
179 /**
180 * Make a string lowercase
181 *
182 * @param string $string The string being lowercased.
183 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
184 * @return string with all alphabetic characters converted to lowercase.
185 */
186 if (!function_exists('mb_strtolower')) {
187 function mb_strtolower($string, $encoding = null) {
188 return Multibyte::strtolower($string);
189 }
190 }
191
192 /**
193 * Make a string uppercase
194 *
195 * @param string $string The string being uppercased.
196 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
197 * @return string with all alphabetic characters converted to uppercase.
198 */
199 if (!function_exists('mb_strtoupper')) {
200 function mb_strtoupper($string, $encoding = null) {
201 return Multibyte::strtoupper($string);
202 }
203 }
204
205 /**
206 * Count the number of substring occurrences
207 *
208 * @param string $haystack The string being checked.
209 * @param string $needle The string being found.
210 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
211 * @return integer The number of times the $needle substring occurs in the $haystack string.
212 */
213 if (!function_exists('mb_substr_count')) {
214 function mb_substr_count($haystack, $needle, $encoding = null) {
215 return Multibyte::substrCount($haystack, $needle);
216 }
217 }
218
219 /**
220 * Get part of string
221 *
222 * @param string $string The string being checked.
223 * @param integer $start The first position used in $string.
224 * @param integer $length The maximum length of the returned string.
225 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
226 * @return string The portion of $string specified by the $string and $length parameters.
227 */
228 if (!function_exists('mb_substr')) {
229 function mb_substr($string, $start, $length = null, $encoding = null) {
230 return Multibyte::substr($string, $start, $length);
231 }
232 }
233
234 /**
235 * Encode string for MIME header
236 *
237 * @param string $str The string being encoded
238 * @param string $charset specifies the name of the character set in which str is represented in.
239 * The default value is determined by the current NLS setting (mbstring.language).
240 * @param string $transfer_encoding specifies the scheme of MIME encoding.
241 * It should be either "B" (Base64) or "Q" (Quoted-Printable). Falls back to "B" if not given.
242 * @param string $linefeed specifies the EOL (end-of-line) marker with which
243 * mb_encode_mimeheader() performs line-folding
244 * (a ยป RFC term, the act of breaking a line longer than a certain length into multiple lines.
245 * The length is currently hard-coded to 74 characters). Falls back to "\r\n" (CRLF) if not given.
246 * @param integer $indent [definition unknown and appears to have no affect]
247 * @return string A converted version of the string represented in ASCII.
248 */
249 if (!function_exists('mb_encode_mimeheader')) {
250 function mb_encode_mimeheader($str, $charset = 'UTF-8', $transfer_encoding = 'B', $linefeed = "\r\n", $indent = 1) {
251 return Multibyte::mimeEncode($str, $charset, $linefeed);
252 }
253 }
254
255 /**
256 * Multibyte handling methods.
257 *
258 *
259 * @package cake
260 * @subpackage cake.cake.libs
261 */
262 class Multibyte extends Object {
263
264 /**
265 * Holds the case folding values
266 *
267 * @var array
268 * @access private
269 */
270 var $__caseFold = array();
271
272 /**
273 * Holds an array of Unicode code point ranges
274 *
275 * @var array
276 * @access private
277 */
278 var $__codeRange = array();
279
280 /**
281 * Holds the current code point range
282 *
283 * @var string
284 * @access private
285 */
286 var $__table = null;
287
288 /**
289 * Gets a reference to the Multibyte object instance
290 *
291 * @return object Multibyte instance
292 * @access public
293 * @static
294 */
295 function &getInstance() {
296 static $instance = array();
297
298 if (!$instance) {
299 $instance[0] =& new Multibyte();
300 }
301 return $instance[0];
302 }
303
304 /**
305 * Converts a multibyte character string
306 * to the decimal value of the character
307 *
308 * @param multibyte string $string
309 * @return array
310 * @access public
311 * @static
312 */
313 function utf8($string) {
314 $map = array();
315
316 $values = array();
317 $find = 1;
318 $length = strlen($string);
319
320 for ($i = 0; $i < $length; $i++) {
321 $value = ord($string[$i]);
322
323 if ($value < 128) {
324 $map[] = $value;
325 } else {
326 if (empty($values)) {
327 $find = ($value < 224) ? 2 : 3;
328 }
329 $values[] = $value;
330
331 if (count($values) === $find) {
332 if ($find == 3) {
333 $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
334 } else {
335 $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
336 }
337 $values = array();
338 $find = 1;
339 }
340 }
341 }
342 return $map;
343 }
344
345 /**
346 * Converts the decimal value of a multibyte character string
347 * to a string
348 *
349 * @param array $array
350 * @return string
351 * @access public
352 * @static
353 */
354 function ascii($array) {
355 $ascii = '';
356
357 foreach ($array as $utf8) {
358 if ($utf8 < 128) {
359 $ascii .= chr($utf8);
360 } elseif ($utf8 < 2048) {
361 $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
362 $ascii .= chr(128 + ($utf8 % 64));
363 } else {
364 $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
365 $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
366 $ascii .= chr(128 + ($utf8 % 64));
367 }
368 }
369 return $ascii;
370 }
371
372 /**
373 * Find position of first occurrence of a case-insensitive string.
374 *
375 * @param multi-byte string $haystack The string from which to get the position of the first occurrence of $needle.
376 * @param multi-byte string $needle The string to find in $haystack.
377 * @param integer $offset The position in $haystack to start searching.
378 * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string,
379 * or false if $needle is not found.
380 * @access public
381 * @static
382 */
383 function stripos($haystack, $needle, $offset = 0) {
384 if (!PHP5 || Multibyte::checkMultibyte($haystack)) {
385 $haystack = Multibyte::strtoupper($haystack);
386 $needle = Multibyte::strtoupper($needle);
387 return Multibyte::strpos($haystack, $needle, $offset);
388 }
389 return stripos($haystack, $needle, $offset);
390 }
391
392 /**
393 * Finds first occurrence of a string within another, case insensitive.
394 *
395 * @param string $haystack The string from which to get the first occurrence of $needle.
396 * @param string $needle The string to find in $haystack.
397 * @param boolean $part Determines which portion of $haystack this function returns.
398 * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
399 * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
400 * Default value is false.
401 * @return int|boolean The portion of $haystack, or false if $needle is not found.
402 * @access public
403 * @static
404 */
405 function stristr($haystack, $needle, $part = false) {
406 $php = (PHP_VERSION < 5.3);
407
408 if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
409 $check = Multibyte::strtoupper($haystack);
410 $check = Multibyte::utf8($check);
411 $found = false;
412
413 $haystack = Multibyte::utf8($haystack);
414 $haystackCount = count($haystack);
415
416 $needle = Multibyte::strtoupper($needle);
417 $needle = Multibyte::utf8($needle);
418 $needleCount = count($needle);
419
420 $parts = array();
421 $position = 0;
422
423 while (($found === false) && ($position < $haystackCount)) {
424 if (isset($needle[0]) && $needle[0] === $check[$position]) {
425 for ($i = 1; $i < $needleCount; $i++) {
426 if ($needle[$i] !== $check[$position + $i]) {
427 break;
428 }
429 }
430 if ($i === $needleCount) {
431 $found = true;
432 }
433 }
434 if (!$found) {
435 $parts[] = $haystack[$position];
436 unset($haystack[$position]);
437 }
438 $position++;
439 }
440
441 if ($found && $part && !empty($parts)) {
442 return Multibyte::ascii($parts);
443 } elseif ($found && !empty($haystack)) {
444 return Multibyte::ascii($haystack);
445 }
446 return false;
447 }
448
449 if (!$php) {
450 return stristr($haystack, $needle, $part);
451 }
452 return stristr($haystack, $needle);
453 }
454
455 /**
456 * Get string length.
457 *
458 * @param string $string The string being checked for length.
459 * @return integer The number of characters in string $string
460 * @access public
461 * @static
462 */
463 function strlen($string) {
464 if (Multibyte::checkMultibyte($string)) {
465 $string = Multibyte::utf8($string);
466 return count($string);
467 }
468 return strlen($string);
469 }
470
471 /**
472 * Find position of first occurrence of a string.
473 *
474 * @param string $haystack The string being checked.
475 * @param string $needle The position counted from the beginning of haystack.
476 * @param integer $offset The search offset. If it is not specified, 0 is used.
477 * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string.
478 * If $needle is not found, it returns false.
479 * @access public
480 * @static
481 */
482 function strpos($haystack, $needle, $offset = 0) {
483 if (Multibyte::checkMultibyte($haystack)) {
484 $found = false;
485
486 $haystack = Multibyte::utf8($haystack);
487 $haystackCount = count($haystack);
488
489 $needle = Multibyte::utf8($needle);
490 $needleCount = count($needle);
491
492 $position = $offset;
493
494 while (($found === false) && ($position < $haystackCount)) {
495 if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
496 for ($i = 1; $i < $needleCount; $i++) {
497 if ($needle[$i] !== $haystack[$position + $i]) {
498 break;
499 }
500 }
501 if ($i === $needleCount) {
502 $found = true;
503 $position--;
504 }
505 }
506 $position++;
507 }
508 if ($found) {
509 return $position;
510 }
511 return false;
512 }
513 return strpos($haystack, $needle, $offset);
514 }
515
516 /**
517 * Finds the last occurrence of a character in a string within another.
518 *
519 * @param string $haystack The string from which to get the last occurrence of $needle.
520 * @param string $needle The string to find in $haystack.
521 * @param boolean $part Determines which portion of $haystack this function returns.
522 * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
523 * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
524 * Default value is false.
525 * @return string|boolean The portion of $haystack. or false if $needle is not found.
526 * @access public
527 * @static
528 */
529 function strrchr($haystack, $needle, $part = false) {
530 $check = Multibyte::utf8($haystack);
531 $found = false;
532
533 $haystack = Multibyte::utf8($haystack);
534 $haystackCount = count($haystack);
535
536 $matches = array_count_values($check);
537
538 $needle = Multibyte::utf8($needle);
539 $needleCount = count($needle);
540
541 $parts = array();
542 $position = 0;
543
544 while (($found === false) && ($position < $haystackCount)) {
545 if (isset($needle[0]) && $needle[0] === $check[$position]) {
546 for ($i = 1; $i < $needleCount; $i++) {
547 if ($needle[$i] !== $check[$position + $i]) {
548 if ($needle[$i] === $check[($position + $i) -1]) {
549 $found = true;
550 }
551 unset($parts[$position - 1]);
552 $haystack = array_merge(array($haystack[$position]), $haystack);
553 break;
554 }
555 }
556 if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
557 $matches[$needle[0]] = $matches[$needle[0]] - 1;
558 } elseif ($i === $needleCount) {
559 $found = true;
560 }
561 }
562
563 if (!$found && isset($haystack[$position])) {
564 $parts[] = $haystack[$position];
565 unset($haystack[$position]);
566 }
567 $position++;
568 }
569
570 if ($found && $part && !empty($parts)) {
571 return Multibyte::ascii($parts);
572 } elseif ($found && !empty($haystack)) {
573 return Multibyte::ascii($haystack);
574 }
575 return false;
576 }
577
578 /**
579 * Finds the last occurrence of a character in a string within another, case insensitive.
580 *
581 * @param string $haystack The string from which to get the last occurrence of $needle.
582 * @param string $needle The string to find in $haystack.
583 * @param boolean $part Determines which portion of $haystack this function returns.
584 * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
585 * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
586 * Default value is false.
587 * @return string|boolean The portion of $haystack. or false if $needle is not found.
588 * @access public
589 * @static
590 */
591 function strrichr($haystack, $needle, $part = false) {
592 $check = Multibyte::strtoupper($haystack);
593 $check = Multibyte::utf8($check);
594 $found = false;
595
596 $haystack = Multibyte::utf8($haystack);
597 $haystackCount = count($haystack);
598
599 $matches = array_count_values($check);
600
601 $needle = Multibyte::strtoupper($needle);
602 $needle = Multibyte::utf8($needle);
603 $needleCount = count($needle);
604
605 $parts = array();
606 $position = 0;
607
608 while (($found === false) && ($position < $haystackCount)) {
609 if (isset($needle[0]) && $needle[0] === $check[$position]) {
610 for ($i = 1; $i < $needleCount; $i++) {
611 if ($needle[$i] !== $check[$position + $i]) {
612 if ($needle[$i] === $check[($position + $i) -1]) {
613 $found = true;
614 }
615 unset($parts[$position - 1]);
616 $haystack = array_merge(array($haystack[$position]), $haystack);
617 break;
618 }
619 }
620 if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
621 $matches[$needle[0]] = $matches[$needle[0]] - 1;
622 } elseif ($i === $needleCount) {
623 $found = true;
624 }
625 }
626
627 if (!$found && isset($haystack[$position])) {
628 $parts[] = $haystack[$position];
629 unset($haystack[$position]);
630 }
631 $position++;
632 }
633
634 if ($found && $part && !empty($parts)) {
635 return Multibyte::ascii($parts);
636 } elseif ($found && !empty($haystack)) {
637 return Multibyte::ascii($haystack);
638 }
639 return false;
640 }
641
642 /**
643 * Finds position of last occurrence of a string within another, case insensitive
644 *
645 * @param string $haystack The string from which to get the position of the last occurrence of $needle.
646 * @param string $needle The string to find in $haystack.
647 * @param integer $offset The position in $haystack to start searching.
648 * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string,
649 * or false if $needle is not found.
650 * @access public
651 * @static
652 */
653 function strripos($haystack, $needle, $offset = 0) {
654 if (!PHP5 || Multibyte::checkMultibyte($haystack)) {
655 $found = false;
656 $haystack = Multibyte::strtoupper($haystack);
657 $haystack = Multibyte::utf8($haystack);
658 $haystackCount = count($haystack);
659
660 $matches = array_count_values($haystack);
661
662 $needle = Multibyte::strtoupper($needle);
663 $needle = Multibyte::utf8($needle);
664 $needleCount = count($needle);
665
666 $position = $offset;
667
668 while (($found === false) && ($position < $haystackCount)) {
669 if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
670 for ($i = 1; $i < $needleCount; $i++) {
671 if ($needle[$i] !== $haystack[$position + $i]) {
672 if ($needle[$i] === $haystack[($position + $i) -1]) {
673 $position--;
674 $found = true;
675 continue;
676 }
677 }
678 }
679
680 if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
681 $matches[$needle[0]] = $matches[$needle[0]] - 1;
682 } elseif ($i === $needleCount) {
683 $found = true;
684 $position--;
685 }
686 }
687 $position++;
688 }
689 return ($found) ? $position : false;
690 }
691 return strripos($haystack, $needle, $offset);
692 }
693
694 /**
695 * Find position of last occurrence of a string in a string.
696 *
697 * @param string $haystack The string being checked, for the last occurrence of $needle.
698 * @param string $needle The string to find in $haystack.
699 * @param integer $offset May be specified to begin searching an arbitrary number of characters into the string.
700 * Negative values will stop searching at an arbitrary point prior to the end of the string.
701 * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string.
702 * If $needle is not found, it returns false.
703 * @access public
704 * @static
705 */
706 function strrpos($haystack, $needle, $offset = 0) {
707 if (!PHP5 || Multibyte::checkMultibyte($haystack)) {
708 $found = false;
709
710 $haystack = Multibyte::utf8($haystack);
711 $haystackCount = count($haystack);
712
713 $matches = array_count_values($haystack);
714
715 $needle = Multibyte::utf8($needle);
716 $needleCount = count($needle);
717
718 $position = $offset;
719
720 while (($found === false) && ($position < $haystackCount)) {
721 if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
722 for ($i = 1; $i < $needleCount; $i++) {
723 if ($needle[$i] !== $haystack[$position + $i]) {
724 if ($needle[$i] === $haystack[($position + $i) -1]) {
725 $position--;
726 $found = true;
727 continue;
728 }
729 }
730 }
731
732 if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
733 $matches[$needle[0]] = $matches[$needle[0]] - 1;
734 } elseif ($i === $needleCount) {
735 $found = true;
736 $position--;
737 }
738 }
739 $position++;
740 }
741 return ($found) ? $position : false;
742 }
743 return strrpos($haystack, $needle, $offset);
744 }
745
746 /**
747 * Finds first occurrence of a string within another
748 *
749 * @param string $haystack The string from which to get the first occurrence of $needle.
750 * @param string $needle The string to find in $haystack
751 * @param boolean $part Determines which portion of $haystack this function returns.
752 * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
753 * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
754 * Default value is FALSE.
755 * @return string|boolean The portion of $haystack, or true if $needle is not found.
756 * @access public
757 * @static
758 */
759 function strstr($haystack, $needle, $part = false) {
760 $php = (PHP_VERSION < 5.3);
761
762 if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
763 $check = Multibyte::utf8($haystack);
764 $found = false;
765
766 $haystack = Multibyte::utf8($haystack);
767 $haystackCount = count($haystack);
768
769 $needle = Multibyte::utf8($needle);
770 $needleCount = count($needle);
771
772 $parts = array();
773 $position = 0;
774
775 while (($found === false) && ($position < $haystackCount)) {
776 if (isset($needle[0]) && $needle[0] === $check[$position]) {
777 for ($i = 1; $i < $needleCount; $i++) {
778 if ($needle[$i] !== $check[$position + $i]) {
779 break;
780 }
781 }
782 if ($i === $needleCount) {
783 $found = true;
784 }
785 }
786 if (!$found) {
787 $parts[] = $haystack[$position];
788 unset($haystack[$position]);
789 }
790 $position++;
791 }
792
793 if ($found && $part && !empty($parts)) {
794 return Multibyte::ascii($parts);
795 } elseif ($found && !empty($haystack)) {
796 return Multibyte::ascii($haystack);
797 }
798 return false;
799 }
800
801 if (!$php) {
802 return strstr($haystack, $needle, $part);
803 }
804 return strstr($haystack, $needle);
805 }
806
807 /**
808 * Make a string lowercase
809 *
810 * @param string $string The string being lowercased.
811 * @return string with all alphabetic characters converted to lowercase.
812 * @access public
813 * @static
814 */
815 function strtolower($string) {
816 $_this =& Multibyte::getInstance();
817 $utf8Map = Multibyte::utf8($string);
818
819 $length = count($utf8Map);
820 $lowerCase = array();
821 $matched = false;
822
823 for ($i = 0 ; $i < $length; $i++) {
824 $char = $utf8Map[$i];
825
826 if ($char < 128) {
827 $str = strtolower(chr($char));
828 $strlen = strlen($str);
829 for ($ii = 0 ; $ii < $strlen; $ii++) {
830 $lower = ord(substr($str, $ii, 1));
831 }
832 $lowerCase[] = $lower;
833 $matched = true;
834 } else {
835 $matched = false;
836 $keys = $_this->__find($char, 'upper');
837
838 if (!empty($keys)) {
839 foreach ($keys as $key => $value) {
840 if ($keys[$key]['upper'] == $char && count($keys[$key]['lower'][0]) === 1) {
841 $lowerCase[] = $keys[$key]['lower'][0];
842 $matched = true;
843 break 1;
844 }
845 }
846 }
847 }
848 if ($matched === false) {
849 $lowerCase[] = $char;
850 }
851 }
852 return Multibyte::ascii($lowerCase);
853 }
854
855 /**
856 * Make a string uppercase
857 *
858 * @param string $string The string being uppercased.
859 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
860 * @return string with all alphabetic characters converted to uppercase.
861 * @access public
862 * @static
863 */
864 function strtoupper($string) {
865 $_this =& Multibyte::getInstance();
866 $utf8Map = Multibyte::utf8($string);
867
868 $length = count($utf8Map);
869 $matched = false;
870 $replaced = array();
871 $upperCase = array();
872
873 for ($i = 0 ; $i < $length; $i++) {
874 $char = $utf8Map[$i];
875
876 if ($char < 128) {
877 $str = strtoupper(chr($char));
878 $strlen = strlen($str);
879 for ($ii = 0 ; $ii < $strlen; $ii++) {
880 $upper = ord(substr($str, $ii, 1));
881 }
882 $upperCase[] = $upper;
883 $matched = true;
884
885 } else {
886 $matched = false;
887 $keys = $_this->__find($char);
888 $keyCount = count($keys);
889
890 if (!empty($keys)) {
891 foreach ($keys as $key => $value) {
892 $matched = false;
893 $replace = 0;
894 if ($length > 1 && count($keys[$key]['lower']) > 1) {
895 $j = 0;
896
897 for ($ii = 0, $count = count($keys[$key]['lower']); $ii < $count; $ii++) {
898 $nextChar = $utf8Map[$i + $ii];
899
900 if (isset($nextChar) && ($nextChar == $keys[$key]['lower'][$j + $ii])) {
901 $replace++;
902 }
903 }
904 if ($replace == $count) {
905 $upperCase[] = $keys[$key]['upper'];
906 $replaced = array_merge($replaced, array_values($keys[$key]['lower']));
907 $matched = true;
908 break 1;
909 }
910 } elseif ($length > 1 && $keyCount > 1) {
911 $j = 0;
912 for ($ii = 1; $ii < $keyCount; $ii++) {
913 $nextChar = $utf8Map[$i + $ii - 1];
914
915 if (in_array($nextChar, $keys[$ii]['lower'])) {
916
917 for ($jj = 0, $count = count($keys[$ii]['lower']); $jj < $count; $jj++) {
918 $nextChar = $utf8Map[$i + $jj];
919
920 if (isset($nextChar) && ($nextChar == $keys[$ii]['lower'][$j + $jj])) {
921 $replace++;
922 }
923 }
924 if ($replace == $count) {
925 $upperCase[] = $keys[$ii]['upper'];
926 $replaced = array_merge($replaced, array_values($keys[$ii]['lower']));
927 $matched = true;
928 break 2;
929 }
930 }
931 }
932 }
933 if ($keys[$key]['lower'][0] == $char) {
934 $upperCase[] = $keys[$key]['upper'];
935 $matched = true;
936 break 1;
937 }
938 }
939 }
940 }
941 if ($matched === false && !in_array($char, $replaced, true)) {
942 $upperCase[] = $char;
943 }
944 }
945 return Multibyte::ascii($upperCase);
946 }
947
948 /**
949 * Count the number of substring occurrences
950 *
951 * @param string $haystack The string being checked.
952 * @param string $needle The string being found.
953 * @return integer The number of times the $needle substring occurs in the $haystack string.
954 * @access public
955 * @static
956 */
957 function substrCount($haystack, $needle) {
958 $count = 0;
959 $haystack = Multibyte::utf8($haystack);
960 $haystackCount = count($haystack);
961 $matches = array_count_values($haystack);
962 $needle = Multibyte::utf8($needle);
963 $needleCount = count($needle);
964
965 if ($needleCount === 1 && isset($matches[$needle[0]])) {
966 return $matches[$needle[0]];
967 }
968
969 for ($i = 0; $i < $haystackCount; $i++) {
970 if (isset($needle[0]) && $needle[0] === $haystack[$i]) {
971 for ($ii = 1; $ii < $needleCount; $ii++) {
972 if ($needle[$ii] === $haystack[$i + 1]) {
973 if ((isset($needle[$ii + 1]) && $haystack[$i + 2]) && $needle[$ii + 1] !== $haystack[$i + 2]) {
974 $count--;
975 } else {
976 $count++;
977 }
978 }
979 }
980 }
981 }
982 return $count;
983 }
984
985 /**
986 * Get part of string
987 *
988 * @param string $string The string being checked.
989 * @param integer $start The first position used in $string.
990 * @param integer $length The maximum length of the returned string.
991 * @return string The portion of $string specified by the $string and $length parameters.
992 * @access public
993 * @static
994 */
995 function substr($string, $start, $length = null) {
996 if ($start === 0 && $length === null) {
997 return $string;
998 }
999
1000 $string = Multibyte::utf8($string);
1001 $stringCount = count($string);
1002
1003 for ($i = 1; $i <= $start; $i++) {
1004 unset($string[$i - 1]);
1005 }
1006
1007 if ($length === null || count($string) < $length) {
1008 return Multibyte::ascii($string);
1009 }
1010 $string = array_values($string);
1011
1012 $value = array();
1013 for ($i = 0; $i < $length; $i++) {
1014 $value[] = $string[$i];
1015 }
1016 return Multibyte::ascii($value);
1017 }
1018
1019 /**
1020 * Prepare a string for mail transport, using the provided encoding
1021 *
1022 * @param string $string value to encode
1023 * @param string $charset charset to use for encoding. defaults to UTF-8
1024 * @param string $newline
1025 * @return string
1026 * @access public
1027 * @static
1028 * @TODO: add support for 'Q'('Quoted Printable') encoding
1029 */
1030 function mimeEncode($string, $charset = null, $newline = "\r\n") {
1031 if (!Multibyte::checkMultibyte($string) && strlen($string) < 75) {
1032 return $string;
1033 }
1034
1035 if (empty($charset)) {
1036 $charset = Configure::read('App.encoding');
1037 }
1038 $charset = strtoupper($charset);
1039
1040 $start = '=?' . $charset . '?B?';
1041 $end = '?=';
1042 $spacer = $end . $newline . ' ' . $start;
1043
1044 $length = 75 - strlen($start) - strlen($end);
1045 $length = $length - ($length % 4);
1046 if ($charset == 'UTF-8') {
1047 $parts = array();
1048 $maxchars = floor(($length * 3) / 4);
1049 while (strlen($string) > $maxchars) {
1050 $i = $maxchars;
1051 $test = ord($string[$i]);
1052 while ($test >= 128 && $test <= 191) {
1053 $i--;
1054 $test = ord($string[$i]);
1055 }
1056 $parts[] = base64_encode(substr($string, 0, $i));
1057 $string = substr($string, $i);
1058 }
1059 $parts[] = base64_encode($string);
1060 $string = implode($spacer, $parts);
1061 } else {
1062 $string = chunk_split(base64_encode($string), $length, $spacer);
1063 $string = preg_replace('/' . preg_quote($spacer) . '$/', '', $string);
1064 }
1065 return $start . $string . $end;
1066 }
1067
1068 /**
1069 * Return the Code points range for Unicode characters
1070 *
1071 * @param interger $decimal
1072 * @return string
1073 * @access private
1074 */
1075 function __codepoint($decimal) {
1076 if ($decimal > 128 && $decimal < 256) {
1077 $return = '0080_00ff'; // Latin-1 Supplement
1078 } elseif ($decimal < 384) {
1079 $return = '0100_017f'; // Latin Extended-A
1080 } elseif ($decimal < 592) {
1081 $return = '0180_024F'; // Latin Extended-B
1082 } elseif ($decimal < 688) {
1083 $return = '0250_02af'; // IPA Extensions
1084 } elseif ($decimal >= 880 && $decimal < 1024) {
1085 $return = '0370_03ff'; // Greek and Coptic
1086 } elseif ($decimal < 1280) {
1087 $return = '0400_04ff'; // Cyrillic
1088 } elseif ($decimal < 1328) {
1089 $return = '0500_052f'; // Cyrillic Supplement
1090 } elseif ($decimal < 1424) {
1091 $return = '0530_058f'; // Armenian
1092 } elseif ($decimal >= 7680 && $decimal < 7936) {
1093 $return = '1e00_1eff'; // Latin Extended Additional
1094 } elseif ($decimal < 8192) {
1095 $return = '1f00_1fff'; // Greek Extended
1096 } elseif ($decimal >= 8448 && $decimal < 8528) {
1097 $return = '2100_214f'; // Letterlike Symbols
1098 } elseif ($decimal < 8592) {
1099 $return = '2150_218f'; // Number Forms
1100 } elseif ($decimal >= 9312 && $decimal < 9472) {
1101 $return = '2460_24ff'; // Enclosed Alphanumerics
1102 } elseif ($decimal >= 11264 && $decimal < 11360) {
1103 $return = '2c00_2c5f'; // Glagolitic
1104 } elseif ($decimal < 11392) {
1105 $return = '2c60_2c7f'; // Latin Extended-C
1106 } elseif ($decimal < 11520) {
1107 $return = '2c80_2cff'; // Coptic
1108 } elseif ($decimal >= 65280 && $decimal < 65520) {
1109 $return = 'ff00_ffef'; // Halfwidth and Fullwidth Forms
1110 } else {
1111 $return = false;
1112 }
1113 $this->__codeRange[$decimal] = $return;
1114 return $return;
1115 }
1116
1117 /**
1118 * Find the related code folding values for $char
1119 *
1120 * @param integer $char decimal value of character
1121 * @param string $type
1122 * @return array
1123 * @access private
1124 */
1125 function __find($char, $type = 'lower') {
1126 $value = false;
1127 $found = array();
1128 if (!isset($this->__codeRange[$char])) {
1129 $range = $this->__codepoint($char);
1130 if ($range === false) {
1131 return null;
1132 }
1133 Configure::load('unicode' . DS . 'casefolding' . DS . $range);
1134 $this->__caseFold[$range] = Configure::read($range);
1135 Configure::delete($range);
1136 }
1137
1138 if (!$this->__codeRange[$char]) {
1139 return null;
1140 }
1141 $this->__table = $this->__codeRange[$char];
1142 $count = count($this->__caseFold[$this->__table]);
1143
1144 for ($i = 0; $i < $count; $i++) {
1145 if ($type === 'lower' && $this->__caseFold[$this->__table][$i][$type][0] === $char) {
1146 $found[] = $this->__caseFold[$this->__table][$i];
1147 } elseif ($type === 'upper' && $this->__caseFold[$this->__table][$i][$type] === $char) {
1148 $found[] = $this->__caseFold[$this->__table][$i];
1149 }
1150 }
1151 return $found;
1152 }
1153
1154 /**
1155 * Check the $string for multibyte characters
1156 * @param string $string value to test
1157 * @return boolean
1158 * @access public
1159 * @static
1160 */
1161 function checkMultibyte($string) {
1162 $length = strlen($string);
1163
1164 for ($i = 0; $i < $length; $i++ ) {
1165 $value = ord(($string[$i]));
1166 if ($value > 128) {
1167 return true;
1168 }
1169 }
1170 return false;
1171 }
1172 }