Mercurial > hg > Members > shoshi > webvirt
comparison cake/libs/multibyte.php @ 0:261e66bd5a0c
hg init
author | Shoshi TAMAKI <shoshi@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 24 Jul 2011 21:08:31 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:261e66bd5a0c |
---|---|
1 <?php | |
2 /** | |
3 * Multibyte handling methods. | |
4 * | |
5 * | |
6 * PHP versions 4 and 5 | |
7 * | |
8 * CakePHP(tm) : Rapid Development Framework (http://cakephp.org) | |
9 * Copyright 2005-2010, Cake Software Foundation, Inc. (http://cakefoundation.org) | |
10 * | |
11 * Licensed under The MIT License | |
12 * Redistributions of files must retain the above copyright notice. | |
13 * | |
14 * @copyright Copyright 2005-2010, Cake Software Foundation, Inc. (http://cakefoundation.org) | |
15 * @link http://cakephp.org CakePHP(tm) Project | |
16 * @package cake | |
17 * @subpackage cake.cake.libs | |
18 * @since CakePHP(tm) v 1.2.0.6833 | |
19 * @license MIT License (http://www.opensource.org/licenses/mit-license.php) | |
20 */ | |
21 if (function_exists('mb_internal_encoding')) { | |
22 $encoding = Configure::read('App.encoding'); | |
23 if (!empty($encoding)) { | |
24 mb_internal_encoding($encoding); | |
25 } | |
26 } | |
27 | |
28 /** | |
29 * Find position of first occurrence of a case-insensitive string. | |
30 * | |
31 * @param string $haystack The string from which to get the position of the first occurrence of $needle. | |
32 * @param string $needle The string to find in $haystack. | |
33 * @param integer $offset The position in $haystack to start searching. | |
34 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used. | |
35 * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string, or false | |
36 * if $needle is not found. | |
37 */ | |
38 if (!function_exists('mb_stripos')) { | |
39 function mb_stripos($haystack, $needle, $offset = 0, $encoding = null) { | |
40 return Multibyte::stripos($haystack, $needle, $offset); | |
41 } | |
42 } | |
43 | |
44 /** | |
45 * Finds first occurrence of a string within another, case insensitive. | |
46 * | |
47 * @param string $haystack The string from which to get the first occurrence of $needle. | |
48 * @param string $needle The string to find in $haystack. | |
49 * @param boolean $part Determines which portion of $haystack this function returns. | |
50 * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle. | |
51 * If set to false, it returns all of $haystack from the first occurrence of $needle to the end, | |
52 * Default value is false. | |
53 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used. | |
54 * @return string|boolean The portion of $haystack, or false if $needle is not found. | |
55 */ | |
56 if (!function_exists('mb_stristr')) { | |
57 function mb_stristr($haystack, $needle, $part = false, $encoding = null) { | |
58 return Multibyte::stristr($haystack, $needle, $part); | |
59 } | |
60 } | |
61 | |
62 /** | |
63 * Get string length. | |
64 * | |
65 * @param string $string The string being checked for length. | |
66 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used. | |
67 * @return integer The number of characters in string $string having character encoding encoding. | |
68 * A multi-byte character is counted as 1. | |
69 */ | |
70 if (!function_exists('mb_strlen')) { | |
71 function mb_strlen($string, $encoding = null) { | |
72 return Multibyte::strlen($string); | |
73 } | |
74 } | |
75 | |
76 /** | |
77 * Find position of first occurrence of a string. | |
78 * | |
79 * @param string $haystack The string being checked. | |
80 * @param string $needle The position counted from the beginning of haystack. | |
81 * @param integer $offset The search offset. If it is not specified, 0 is used. | |
82 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used. | |
83 * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string. | |
84 * If $needle is not found, it returns false. | |
85 */ | |
86 if (!function_exists('mb_strpos')) { | |
87 function mb_strpos($haystack, $needle, $offset = 0, $encoding = null) { | |
88 return Multibyte::strpos($haystack, $needle, $offset); | |
89 } | |
90 } | |
91 | |
92 /** | |
93 * Finds the last occurrence of a character in a string within another. | |
94 * | |
95 * @param string $haystack The string from which to get the last occurrence of $needle. | |
96 * @param string $needle The string to find in $haystack. | |
97 * @param boolean $part Determines which portion of $haystack this function returns. | |
98 * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle. | |
99 * If set to false, it returns all of $haystack from the last occurrence of $needle to the end, | |
100 * Default value is false. | |
101 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used. | |
102 * @return string|boolean The portion of $haystack. or false if $needle is not found. | |
103 */ | |
104 if (!function_exists('mb_strrchr')) { | |
105 function mb_strrchr($haystack, $needle, $part = false, $encoding = null) { | |
106 return Multibyte::strrchr($haystack, $needle, $part); | |
107 } | |
108 } | |
109 | |
110 /** | |
111 * Finds the last occurrence of a character in a string within another, case insensitive. | |
112 * | |
113 * @param string $haystack The string from which to get the last occurrence of $needle. | |
114 * @param string $needle The string to find in $haystack. | |
115 * @param boolean $part Determines which portion of $haystack this function returns. | |
116 * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle. | |
117 * If set to false, it returns all of $haystack from the last occurrence of $needle to the end, | |
118 * Default value is false. | |
119 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used. | |
120 * @return string|boolean The portion of $haystack. or false if $needle is not found. | |
121 */ | |
122 if (!function_exists('mb_strrichr')) { | |
123 function mb_strrichr($haystack, $needle, $part = false, $encoding = null) { | |
124 return Multibyte::strrichr($haystack, $needle, $part); | |
125 } | |
126 } | |
127 | |
128 /** | |
129 * Finds position of last occurrence of a string within another, case insensitive | |
130 * | |
131 * @param string $haystack The string from which to get the position of the last occurrence of $needle. | |
132 * @param string $needle The string to find in $haystack. | |
133 * @param integer $offset The position in $haystack to start searching. | |
134 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used. | |
135 * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string, | |
136 * or false if $needle is not found. | |
137 */ | |
138 if (!function_exists('mb_strripos')) { | |
139 function mb_strripos($haystack, $needle, $offset = 0, $encoding = null) { | |
140 return Multibyte::strripos($haystack, $needle, $offset); | |
141 } | |
142 } | |
143 | |
144 /** | |
145 * Find position of last occurrence of a string in a string. | |
146 * | |
147 * @param string $haystack The string being checked, for the last occurrence of $needle. | |
148 * @param string $needle The string to find in $haystack. | |
149 * @param integer $offset May be specified to begin searching an arbitrary number of characters into the string. | |
150 * Negative values will stop searching at an arbitrary point prior to the end of the string. | |
151 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used. | |
152 * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string. | |
153 * If $needle is not found, it returns false. | |
154 */ | |
155 if (!function_exists('mb_strrpos')) { | |
156 function mb_strrpos($haystack, $needle, $offset = 0, $encoding = null) { | |
157 return Multibyte::strrpos($haystack, $needle, $offset); | |
158 } | |
159 } | |
160 | |
161 /** | |
162 * Finds first occurrence of a string within another | |
163 * | |
164 * @param string $haystack The string from which to get the first occurrence of $needle. | |
165 * @param string $needle The string to find in $haystack | |
166 * @param boolean $part Determines which portion of $haystack this function returns. | |
167 * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle. | |
168 * If set to false, it returns all of $haystack from the first occurrence of $needle to the end, | |
169 * Default value is FALSE. | |
170 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used. | |
171 * @return string|boolean The portion of $haystack, or true if $needle is not found. | |
172 */ | |
173 if (!function_exists('mb_strstr')) { | |
174 function mb_strstr($haystack, $needle, $part = false, $encoding = null) { | |
175 return Multibyte::strstr($haystack, $needle, $part); | |
176 } | |
177 } | |
178 | |
179 /** | |
180 * Make a string lowercase | |
181 * | |
182 * @param string $string The string being lowercased. | |
183 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used. | |
184 * @return string with all alphabetic characters converted to lowercase. | |
185 */ | |
186 if (!function_exists('mb_strtolower')) { | |
187 function mb_strtolower($string, $encoding = null) { | |
188 return Multibyte::strtolower($string); | |
189 } | |
190 } | |
191 | |
192 /** | |
193 * Make a string uppercase | |
194 * | |
195 * @param string $string The string being uppercased. | |
196 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used. | |
197 * @return string with all alphabetic characters converted to uppercase. | |
198 */ | |
199 if (!function_exists('mb_strtoupper')) { | |
200 function mb_strtoupper($string, $encoding = null) { | |
201 return Multibyte::strtoupper($string); | |
202 } | |
203 } | |
204 | |
205 /** | |
206 * Count the number of substring occurrences | |
207 * | |
208 * @param string $haystack The string being checked. | |
209 * @param string $needle The string being found. | |
210 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used. | |
211 * @return integer The number of times the $needle substring occurs in the $haystack string. | |
212 */ | |
213 if (!function_exists('mb_substr_count')) { | |
214 function mb_substr_count($haystack, $needle, $encoding = null) { | |
215 return Multibyte::substrCount($haystack, $needle); | |
216 } | |
217 } | |
218 | |
219 /** | |
220 * Get part of string | |
221 * | |
222 * @param string $string The string being checked. | |
223 * @param integer $start The first position used in $string. | |
224 * @param integer $length The maximum length of the returned string. | |
225 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used. | |
226 * @return string The portion of $string specified by the $string and $length parameters. | |
227 */ | |
228 if (!function_exists('mb_substr')) { | |
229 function mb_substr($string, $start, $length = null, $encoding = null) { | |
230 return Multibyte::substr($string, $start, $length); | |
231 } | |
232 } | |
233 | |
234 /** | |
235 * Encode string for MIME header | |
236 * | |
237 * @param string $str The string being encoded | |
238 * @param string $charset specifies the name of the character set in which str is represented in. | |
239 * The default value is determined by the current NLS setting (mbstring.language). | |
240 * @param string $transfer_encoding specifies the scheme of MIME encoding. | |
241 * It should be either "B" (Base64) or "Q" (Quoted-Printable). Falls back to "B" if not given. | |
242 * @param string $linefeed specifies the EOL (end-of-line) marker with which | |
243 * mb_encode_mimeheader() performs line-folding | |
244 * (a ยป RFC term, the act of breaking a line longer than a certain length into multiple lines. | |
245 * The length is currently hard-coded to 74 characters). Falls back to "\r\n" (CRLF) if not given. | |
246 * @param integer $indent [definition unknown and appears to have no affect] | |
247 * @return string A converted version of the string represented in ASCII. | |
248 */ | |
249 if (!function_exists('mb_encode_mimeheader')) { | |
250 function mb_encode_mimeheader($str, $charset = 'UTF-8', $transfer_encoding = 'B', $linefeed = "\r\n", $indent = 1) { | |
251 return Multibyte::mimeEncode($str, $charset, $linefeed); | |
252 } | |
253 } | |
254 | |
255 /** | |
256 * Multibyte handling methods. | |
257 * | |
258 * | |
259 * @package cake | |
260 * @subpackage cake.cake.libs | |
261 */ | |
262 class Multibyte extends Object { | |
263 | |
264 /** | |
265 * Holds the case folding values | |
266 * | |
267 * @var array | |
268 * @access private | |
269 */ | |
270 var $__caseFold = array(); | |
271 | |
272 /** | |
273 * Holds an array of Unicode code point ranges | |
274 * | |
275 * @var array | |
276 * @access private | |
277 */ | |
278 var $__codeRange = array(); | |
279 | |
280 /** | |
281 * Holds the current code point range | |
282 * | |
283 * @var string | |
284 * @access private | |
285 */ | |
286 var $__table = null; | |
287 | |
288 /** | |
289 * Gets a reference to the Multibyte object instance | |
290 * | |
291 * @return object Multibyte instance | |
292 * @access public | |
293 * @static | |
294 */ | |
295 function &getInstance() { | |
296 static $instance = array(); | |
297 | |
298 if (!$instance) { | |
299 $instance[0] =& new Multibyte(); | |
300 } | |
301 return $instance[0]; | |
302 } | |
303 | |
304 /** | |
305 * Converts a multibyte character string | |
306 * to the decimal value of the character | |
307 * | |
308 * @param multibyte string $string | |
309 * @return array | |
310 * @access public | |
311 * @static | |
312 */ | |
313 function utf8($string) { | |
314 $map = array(); | |
315 | |
316 $values = array(); | |
317 $find = 1; | |
318 $length = strlen($string); | |
319 | |
320 for ($i = 0; $i < $length; $i++) { | |
321 $value = ord($string[$i]); | |
322 | |
323 if ($value < 128) { | |
324 $map[] = $value; | |
325 } else { | |
326 if (empty($values)) { | |
327 $find = ($value < 224) ? 2 : 3; | |
328 } | |
329 $values[] = $value; | |
330 | |
331 if (count($values) === $find) { | |
332 if ($find == 3) { | |
333 $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64); | |
334 } else { | |
335 $map[] = (($values[0] % 32) * 64) + ($values[1] % 64); | |
336 } | |
337 $values = array(); | |
338 $find = 1; | |
339 } | |
340 } | |
341 } | |
342 return $map; | |
343 } | |
344 | |
345 /** | |
346 * Converts the decimal value of a multibyte character string | |
347 * to a string | |
348 * | |
349 * @param array $array | |
350 * @return string | |
351 * @access public | |
352 * @static | |
353 */ | |
354 function ascii($array) { | |
355 $ascii = ''; | |
356 | |
357 foreach ($array as $utf8) { | |
358 if ($utf8 < 128) { | |
359 $ascii .= chr($utf8); | |
360 } elseif ($utf8 < 2048) { | |
361 $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64)); | |
362 $ascii .= chr(128 + ($utf8 % 64)); | |
363 } else { | |
364 $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096)); | |
365 $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64)); | |
366 $ascii .= chr(128 + ($utf8 % 64)); | |
367 } | |
368 } | |
369 return $ascii; | |
370 } | |
371 | |
372 /** | |
373 * Find position of first occurrence of a case-insensitive string. | |
374 * | |
375 * @param multi-byte string $haystack The string from which to get the position of the first occurrence of $needle. | |
376 * @param multi-byte string $needle The string to find in $haystack. | |
377 * @param integer $offset The position in $haystack to start searching. | |
378 * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string, | |
379 * or false if $needle is not found. | |
380 * @access public | |
381 * @static | |
382 */ | |
383 function stripos($haystack, $needle, $offset = 0) { | |
384 if (!PHP5 || Multibyte::checkMultibyte($haystack)) { | |
385 $haystack = Multibyte::strtoupper($haystack); | |
386 $needle = Multibyte::strtoupper($needle); | |
387 return Multibyte::strpos($haystack, $needle, $offset); | |
388 } | |
389 return stripos($haystack, $needle, $offset); | |
390 } | |
391 | |
392 /** | |
393 * Finds first occurrence of a string within another, case insensitive. | |
394 * | |
395 * @param string $haystack The string from which to get the first occurrence of $needle. | |
396 * @param string $needle The string to find in $haystack. | |
397 * @param boolean $part Determines which portion of $haystack this function returns. | |
398 * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle. | |
399 * If set to false, it returns all of $haystack from the first occurrence of $needle to the end, | |
400 * Default value is false. | |
401 * @return int|boolean The portion of $haystack, or false if $needle is not found. | |
402 * @access public | |
403 * @static | |
404 */ | |
405 function stristr($haystack, $needle, $part = false) { | |
406 $php = (PHP_VERSION < 5.3); | |
407 | |
408 if (($php && $part) || Multibyte::checkMultibyte($haystack)) { | |
409 $check = Multibyte::strtoupper($haystack); | |
410 $check = Multibyte::utf8($check); | |
411 $found = false; | |
412 | |
413 $haystack = Multibyte::utf8($haystack); | |
414 $haystackCount = count($haystack); | |
415 | |
416 $needle = Multibyte::strtoupper($needle); | |
417 $needle = Multibyte::utf8($needle); | |
418 $needleCount = count($needle); | |
419 | |
420 $parts = array(); | |
421 $position = 0; | |
422 | |
423 while (($found === false) && ($position < $haystackCount)) { | |
424 if (isset($needle[0]) && $needle[0] === $check[$position]) { | |
425 for ($i = 1; $i < $needleCount; $i++) { | |
426 if ($needle[$i] !== $check[$position + $i]) { | |
427 break; | |
428 } | |
429 } | |
430 if ($i === $needleCount) { | |
431 $found = true; | |
432 } | |
433 } | |
434 if (!$found) { | |
435 $parts[] = $haystack[$position]; | |
436 unset($haystack[$position]); | |
437 } | |
438 $position++; | |
439 } | |
440 | |
441 if ($found && $part && !empty($parts)) { | |
442 return Multibyte::ascii($parts); | |
443 } elseif ($found && !empty($haystack)) { | |
444 return Multibyte::ascii($haystack); | |
445 } | |
446 return false; | |
447 } | |
448 | |
449 if (!$php) { | |
450 return stristr($haystack, $needle, $part); | |
451 } | |
452 return stristr($haystack, $needle); | |
453 } | |
454 | |
455 /** | |
456 * Get string length. | |
457 * | |
458 * @param string $string The string being checked for length. | |
459 * @return integer The number of characters in string $string | |
460 * @access public | |
461 * @static | |
462 */ | |
463 function strlen($string) { | |
464 if (Multibyte::checkMultibyte($string)) { | |
465 $string = Multibyte::utf8($string); | |
466 return count($string); | |
467 } | |
468 return strlen($string); | |
469 } | |
470 | |
471 /** | |
472 * Find position of first occurrence of a string. | |
473 * | |
474 * @param string $haystack The string being checked. | |
475 * @param string $needle The position counted from the beginning of haystack. | |
476 * @param integer $offset The search offset. If it is not specified, 0 is used. | |
477 * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string. | |
478 * If $needle is not found, it returns false. | |
479 * @access public | |
480 * @static | |
481 */ | |
482 function strpos($haystack, $needle, $offset = 0) { | |
483 if (Multibyte::checkMultibyte($haystack)) { | |
484 $found = false; | |
485 | |
486 $haystack = Multibyte::utf8($haystack); | |
487 $haystackCount = count($haystack); | |
488 | |
489 $needle = Multibyte::utf8($needle); | |
490 $needleCount = count($needle); | |
491 | |
492 $position = $offset; | |
493 | |
494 while (($found === false) && ($position < $haystackCount)) { | |
495 if (isset($needle[0]) && $needle[0] === $haystack[$position]) { | |
496 for ($i = 1; $i < $needleCount; $i++) { | |
497 if ($needle[$i] !== $haystack[$position + $i]) { | |
498 break; | |
499 } | |
500 } | |
501 if ($i === $needleCount) { | |
502 $found = true; | |
503 $position--; | |
504 } | |
505 } | |
506 $position++; | |
507 } | |
508 if ($found) { | |
509 return $position; | |
510 } | |
511 return false; | |
512 } | |
513 return strpos($haystack, $needle, $offset); | |
514 } | |
515 | |
516 /** | |
517 * Finds the last occurrence of a character in a string within another. | |
518 * | |
519 * @param string $haystack The string from which to get the last occurrence of $needle. | |
520 * @param string $needle The string to find in $haystack. | |
521 * @param boolean $part Determines which portion of $haystack this function returns. | |
522 * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle. | |
523 * If set to false, it returns all of $haystack from the last occurrence of $needle to the end, | |
524 * Default value is false. | |
525 * @return string|boolean The portion of $haystack. or false if $needle is not found. | |
526 * @access public | |
527 * @static | |
528 */ | |
529 function strrchr($haystack, $needle, $part = false) { | |
530 $check = Multibyte::utf8($haystack); | |
531 $found = false; | |
532 | |
533 $haystack = Multibyte::utf8($haystack); | |
534 $haystackCount = count($haystack); | |
535 | |
536 $matches = array_count_values($check); | |
537 | |
538 $needle = Multibyte::utf8($needle); | |
539 $needleCount = count($needle); | |
540 | |
541 $parts = array(); | |
542 $position = 0; | |
543 | |
544 while (($found === false) && ($position < $haystackCount)) { | |
545 if (isset($needle[0]) && $needle[0] === $check[$position]) { | |
546 for ($i = 1; $i < $needleCount; $i++) { | |
547 if ($needle[$i] !== $check[$position + $i]) { | |
548 if ($needle[$i] === $check[($position + $i) -1]) { | |
549 $found = true; | |
550 } | |
551 unset($parts[$position - 1]); | |
552 $haystack = array_merge(array($haystack[$position]), $haystack); | |
553 break; | |
554 } | |
555 } | |
556 if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) { | |
557 $matches[$needle[0]] = $matches[$needle[0]] - 1; | |
558 } elseif ($i === $needleCount) { | |
559 $found = true; | |
560 } | |
561 } | |
562 | |
563 if (!$found && isset($haystack[$position])) { | |
564 $parts[] = $haystack[$position]; | |
565 unset($haystack[$position]); | |
566 } | |
567 $position++; | |
568 } | |
569 | |
570 if ($found && $part && !empty($parts)) { | |
571 return Multibyte::ascii($parts); | |
572 } elseif ($found && !empty($haystack)) { | |
573 return Multibyte::ascii($haystack); | |
574 } | |
575 return false; | |
576 } | |
577 | |
578 /** | |
579 * Finds the last occurrence of a character in a string within another, case insensitive. | |
580 * | |
581 * @param string $haystack The string from which to get the last occurrence of $needle. | |
582 * @param string $needle The string to find in $haystack. | |
583 * @param boolean $part Determines which portion of $haystack this function returns. | |
584 * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle. | |
585 * If set to false, it returns all of $haystack from the last occurrence of $needle to the end, | |
586 * Default value is false. | |
587 * @return string|boolean The portion of $haystack. or false if $needle is not found. | |
588 * @access public | |
589 * @static | |
590 */ | |
591 function strrichr($haystack, $needle, $part = false) { | |
592 $check = Multibyte::strtoupper($haystack); | |
593 $check = Multibyte::utf8($check); | |
594 $found = false; | |
595 | |
596 $haystack = Multibyte::utf8($haystack); | |
597 $haystackCount = count($haystack); | |
598 | |
599 $matches = array_count_values($check); | |
600 | |
601 $needle = Multibyte::strtoupper($needle); | |
602 $needle = Multibyte::utf8($needle); | |
603 $needleCount = count($needle); | |
604 | |
605 $parts = array(); | |
606 $position = 0; | |
607 | |
608 while (($found === false) && ($position < $haystackCount)) { | |
609 if (isset($needle[0]) && $needle[0] === $check[$position]) { | |
610 for ($i = 1; $i < $needleCount; $i++) { | |
611 if ($needle[$i] !== $check[$position + $i]) { | |
612 if ($needle[$i] === $check[($position + $i) -1]) { | |
613 $found = true; | |
614 } | |
615 unset($parts[$position - 1]); | |
616 $haystack = array_merge(array($haystack[$position]), $haystack); | |
617 break; | |
618 } | |
619 } | |
620 if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) { | |
621 $matches[$needle[0]] = $matches[$needle[0]] - 1; | |
622 } elseif ($i === $needleCount) { | |
623 $found = true; | |
624 } | |
625 } | |
626 | |
627 if (!$found && isset($haystack[$position])) { | |
628 $parts[] = $haystack[$position]; | |
629 unset($haystack[$position]); | |
630 } | |
631 $position++; | |
632 } | |
633 | |
634 if ($found && $part && !empty($parts)) { | |
635 return Multibyte::ascii($parts); | |
636 } elseif ($found && !empty($haystack)) { | |
637 return Multibyte::ascii($haystack); | |
638 } | |
639 return false; | |
640 } | |
641 | |
642 /** | |
643 * Finds position of last occurrence of a string within another, case insensitive | |
644 * | |
645 * @param string $haystack The string from which to get the position of the last occurrence of $needle. | |
646 * @param string $needle The string to find in $haystack. | |
647 * @param integer $offset The position in $haystack to start searching. | |
648 * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string, | |
649 * or false if $needle is not found. | |
650 * @access public | |
651 * @static | |
652 */ | |
653 function strripos($haystack, $needle, $offset = 0) { | |
654 if (!PHP5 || Multibyte::checkMultibyte($haystack)) { | |
655 $found = false; | |
656 $haystack = Multibyte::strtoupper($haystack); | |
657 $haystack = Multibyte::utf8($haystack); | |
658 $haystackCount = count($haystack); | |
659 | |
660 $matches = array_count_values($haystack); | |
661 | |
662 $needle = Multibyte::strtoupper($needle); | |
663 $needle = Multibyte::utf8($needle); | |
664 $needleCount = count($needle); | |
665 | |
666 $position = $offset; | |
667 | |
668 while (($found === false) && ($position < $haystackCount)) { | |
669 if (isset($needle[0]) && $needle[0] === $haystack[$position]) { | |
670 for ($i = 1; $i < $needleCount; $i++) { | |
671 if ($needle[$i] !== $haystack[$position + $i]) { | |
672 if ($needle[$i] === $haystack[($position + $i) -1]) { | |
673 $position--; | |
674 $found = true; | |
675 continue; | |
676 } | |
677 } | |
678 } | |
679 | |
680 if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) { | |
681 $matches[$needle[0]] = $matches[$needle[0]] - 1; | |
682 } elseif ($i === $needleCount) { | |
683 $found = true; | |
684 $position--; | |
685 } | |
686 } | |
687 $position++; | |
688 } | |
689 return ($found) ? $position : false; | |
690 } | |
691 return strripos($haystack, $needle, $offset); | |
692 } | |
693 | |
694 /** | |
695 * Find position of last occurrence of a string in a string. | |
696 * | |
697 * @param string $haystack The string being checked, for the last occurrence of $needle. | |
698 * @param string $needle The string to find in $haystack. | |
699 * @param integer $offset May be specified to begin searching an arbitrary number of characters into the string. | |
700 * Negative values will stop searching at an arbitrary point prior to the end of the string. | |
701 * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string. | |
702 * If $needle is not found, it returns false. | |
703 * @access public | |
704 * @static | |
705 */ | |
706 function strrpos($haystack, $needle, $offset = 0) { | |
707 if (!PHP5 || Multibyte::checkMultibyte($haystack)) { | |
708 $found = false; | |
709 | |
710 $haystack = Multibyte::utf8($haystack); | |
711 $haystackCount = count($haystack); | |
712 | |
713 $matches = array_count_values($haystack); | |
714 | |
715 $needle = Multibyte::utf8($needle); | |
716 $needleCount = count($needle); | |
717 | |
718 $position = $offset; | |
719 | |
720 while (($found === false) && ($position < $haystackCount)) { | |
721 if (isset($needle[0]) && $needle[0] === $haystack[$position]) { | |
722 for ($i = 1; $i < $needleCount; $i++) { | |
723 if ($needle[$i] !== $haystack[$position + $i]) { | |
724 if ($needle[$i] === $haystack[($position + $i) -1]) { | |
725 $position--; | |
726 $found = true; | |
727 continue; | |
728 } | |
729 } | |
730 } | |
731 | |
732 if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) { | |
733 $matches[$needle[0]] = $matches[$needle[0]] - 1; | |
734 } elseif ($i === $needleCount) { | |
735 $found = true; | |
736 $position--; | |
737 } | |
738 } | |
739 $position++; | |
740 } | |
741 return ($found) ? $position : false; | |
742 } | |
743 return strrpos($haystack, $needle, $offset); | |
744 } | |
745 | |
746 /** | |
747 * Finds first occurrence of a string within another | |
748 * | |
749 * @param string $haystack The string from which to get the first occurrence of $needle. | |
750 * @param string $needle The string to find in $haystack | |
751 * @param boolean $part Determines which portion of $haystack this function returns. | |
752 * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle. | |
753 * If set to false, it returns all of $haystack from the first occurrence of $needle to the end, | |
754 * Default value is FALSE. | |
755 * @return string|boolean The portion of $haystack, or true if $needle is not found. | |
756 * @access public | |
757 * @static | |
758 */ | |
759 function strstr($haystack, $needle, $part = false) { | |
760 $php = (PHP_VERSION < 5.3); | |
761 | |
762 if (($php && $part) || Multibyte::checkMultibyte($haystack)) { | |
763 $check = Multibyte::utf8($haystack); | |
764 $found = false; | |
765 | |
766 $haystack = Multibyte::utf8($haystack); | |
767 $haystackCount = count($haystack); | |
768 | |
769 $needle = Multibyte::utf8($needle); | |
770 $needleCount = count($needle); | |
771 | |
772 $parts = array(); | |
773 $position = 0; | |
774 | |
775 while (($found === false) && ($position < $haystackCount)) { | |
776 if (isset($needle[0]) && $needle[0] === $check[$position]) { | |
777 for ($i = 1; $i < $needleCount; $i++) { | |
778 if ($needle[$i] !== $check[$position + $i]) { | |
779 break; | |
780 } | |
781 } | |
782 if ($i === $needleCount) { | |
783 $found = true; | |
784 } | |
785 } | |
786 if (!$found) { | |
787 $parts[] = $haystack[$position]; | |
788 unset($haystack[$position]); | |
789 } | |
790 $position++; | |
791 } | |
792 | |
793 if ($found && $part && !empty($parts)) { | |
794 return Multibyte::ascii($parts); | |
795 } elseif ($found && !empty($haystack)) { | |
796 return Multibyte::ascii($haystack); | |
797 } | |
798 return false; | |
799 } | |
800 | |
801 if (!$php) { | |
802 return strstr($haystack, $needle, $part); | |
803 } | |
804 return strstr($haystack, $needle); | |
805 } | |
806 | |
807 /** | |
808 * Make a string lowercase | |
809 * | |
810 * @param string $string The string being lowercased. | |
811 * @return string with all alphabetic characters converted to lowercase. | |
812 * @access public | |
813 * @static | |
814 */ | |
815 function strtolower($string) { | |
816 $_this =& Multibyte::getInstance(); | |
817 $utf8Map = Multibyte::utf8($string); | |
818 | |
819 $length = count($utf8Map); | |
820 $lowerCase = array(); | |
821 $matched = false; | |
822 | |
823 for ($i = 0 ; $i < $length; $i++) { | |
824 $char = $utf8Map[$i]; | |
825 | |
826 if ($char < 128) { | |
827 $str = strtolower(chr($char)); | |
828 $strlen = strlen($str); | |
829 for ($ii = 0 ; $ii < $strlen; $ii++) { | |
830 $lower = ord(substr($str, $ii, 1)); | |
831 } | |
832 $lowerCase[] = $lower; | |
833 $matched = true; | |
834 } else { | |
835 $matched = false; | |
836 $keys = $_this->__find($char, 'upper'); | |
837 | |
838 if (!empty($keys)) { | |
839 foreach ($keys as $key => $value) { | |
840 if ($keys[$key]['upper'] == $char && count($keys[$key]['lower'][0]) === 1) { | |
841 $lowerCase[] = $keys[$key]['lower'][0]; | |
842 $matched = true; | |
843 break 1; | |
844 } | |
845 } | |
846 } | |
847 } | |
848 if ($matched === false) { | |
849 $lowerCase[] = $char; | |
850 } | |
851 } | |
852 return Multibyte::ascii($lowerCase); | |
853 } | |
854 | |
855 /** | |
856 * Make a string uppercase | |
857 * | |
858 * @param string $string The string being uppercased. | |
859 * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used. | |
860 * @return string with all alphabetic characters converted to uppercase. | |
861 * @access public | |
862 * @static | |
863 */ | |
864 function strtoupper($string) { | |
865 $_this =& Multibyte::getInstance(); | |
866 $utf8Map = Multibyte::utf8($string); | |
867 | |
868 $length = count($utf8Map); | |
869 $matched = false; | |
870 $replaced = array(); | |
871 $upperCase = array(); | |
872 | |
873 for ($i = 0 ; $i < $length; $i++) { | |
874 $char = $utf8Map[$i]; | |
875 | |
876 if ($char < 128) { | |
877 $str = strtoupper(chr($char)); | |
878 $strlen = strlen($str); | |
879 for ($ii = 0 ; $ii < $strlen; $ii++) { | |
880 $upper = ord(substr($str, $ii, 1)); | |
881 } | |
882 $upperCase[] = $upper; | |
883 $matched = true; | |
884 | |
885 } else { | |
886 $matched = false; | |
887 $keys = $_this->__find($char); | |
888 $keyCount = count($keys); | |
889 | |
890 if (!empty($keys)) { | |
891 foreach ($keys as $key => $value) { | |
892 $matched = false; | |
893 $replace = 0; | |
894 if ($length > 1 && count($keys[$key]['lower']) > 1) { | |
895 $j = 0; | |
896 | |
897 for ($ii = 0, $count = count($keys[$key]['lower']); $ii < $count; $ii++) { | |
898 $nextChar = $utf8Map[$i + $ii]; | |
899 | |
900 if (isset($nextChar) && ($nextChar == $keys[$key]['lower'][$j + $ii])) { | |
901 $replace++; | |
902 } | |
903 } | |
904 if ($replace == $count) { | |
905 $upperCase[] = $keys[$key]['upper']; | |
906 $replaced = array_merge($replaced, array_values($keys[$key]['lower'])); | |
907 $matched = true; | |
908 break 1; | |
909 } | |
910 } elseif ($length > 1 && $keyCount > 1) { | |
911 $j = 0; | |
912 for ($ii = 1; $ii < $keyCount; $ii++) { | |
913 $nextChar = $utf8Map[$i + $ii - 1]; | |
914 | |
915 if (in_array($nextChar, $keys[$ii]['lower'])) { | |
916 | |
917 for ($jj = 0, $count = count($keys[$ii]['lower']); $jj < $count; $jj++) { | |
918 $nextChar = $utf8Map[$i + $jj]; | |
919 | |
920 if (isset($nextChar) && ($nextChar == $keys[$ii]['lower'][$j + $jj])) { | |
921 $replace++; | |
922 } | |
923 } | |
924 if ($replace == $count) { | |
925 $upperCase[] = $keys[$ii]['upper']; | |
926 $replaced = array_merge($replaced, array_values($keys[$ii]['lower'])); | |
927 $matched = true; | |
928 break 2; | |
929 } | |
930 } | |
931 } | |
932 } | |
933 if ($keys[$key]['lower'][0] == $char) { | |
934 $upperCase[] = $keys[$key]['upper']; | |
935 $matched = true; | |
936 break 1; | |
937 } | |
938 } | |
939 } | |
940 } | |
941 if ($matched === false && !in_array($char, $replaced, true)) { | |
942 $upperCase[] = $char; | |
943 } | |
944 } | |
945 return Multibyte::ascii($upperCase); | |
946 } | |
947 | |
948 /** | |
949 * Count the number of substring occurrences | |
950 * | |
951 * @param string $haystack The string being checked. | |
952 * @param string $needle The string being found. | |
953 * @return integer The number of times the $needle substring occurs in the $haystack string. | |
954 * @access public | |
955 * @static | |
956 */ | |
957 function substrCount($haystack, $needle) { | |
958 $count = 0; | |
959 $haystack = Multibyte::utf8($haystack); | |
960 $haystackCount = count($haystack); | |
961 $matches = array_count_values($haystack); | |
962 $needle = Multibyte::utf8($needle); | |
963 $needleCount = count($needle); | |
964 | |
965 if ($needleCount === 1 && isset($matches[$needle[0]])) { | |
966 return $matches[$needle[0]]; | |
967 } | |
968 | |
969 for ($i = 0; $i < $haystackCount; $i++) { | |
970 if (isset($needle[0]) && $needle[0] === $haystack[$i]) { | |
971 for ($ii = 1; $ii < $needleCount; $ii++) { | |
972 if ($needle[$ii] === $haystack[$i + 1]) { | |
973 if ((isset($needle[$ii + 1]) && $haystack[$i + 2]) && $needle[$ii + 1] !== $haystack[$i + 2]) { | |
974 $count--; | |
975 } else { | |
976 $count++; | |
977 } | |
978 } | |
979 } | |
980 } | |
981 } | |
982 return $count; | |
983 } | |
984 | |
985 /** | |
986 * Get part of string | |
987 * | |
988 * @param string $string The string being checked. | |
989 * @param integer $start The first position used in $string. | |
990 * @param integer $length The maximum length of the returned string. | |
991 * @return string The portion of $string specified by the $string and $length parameters. | |
992 * @access public | |
993 * @static | |
994 */ | |
995 function substr($string, $start, $length = null) { | |
996 if ($start === 0 && $length === null) { | |
997 return $string; | |
998 } | |
999 | |
1000 $string = Multibyte::utf8($string); | |
1001 $stringCount = count($string); | |
1002 | |
1003 for ($i = 1; $i <= $start; $i++) { | |
1004 unset($string[$i - 1]); | |
1005 } | |
1006 | |
1007 if ($length === null || count($string) < $length) { | |
1008 return Multibyte::ascii($string); | |
1009 } | |
1010 $string = array_values($string); | |
1011 | |
1012 $value = array(); | |
1013 for ($i = 0; $i < $length; $i++) { | |
1014 $value[] = $string[$i]; | |
1015 } | |
1016 return Multibyte::ascii($value); | |
1017 } | |
1018 | |
1019 /** | |
1020 * Prepare a string for mail transport, using the provided encoding | |
1021 * | |
1022 * @param string $string value to encode | |
1023 * @param string $charset charset to use for encoding. defaults to UTF-8 | |
1024 * @param string $newline | |
1025 * @return string | |
1026 * @access public | |
1027 * @static | |
1028 * @TODO: add support for 'Q'('Quoted Printable') encoding | |
1029 */ | |
1030 function mimeEncode($string, $charset = null, $newline = "\r\n") { | |
1031 if (!Multibyte::checkMultibyte($string) && strlen($string) < 75) { | |
1032 return $string; | |
1033 } | |
1034 | |
1035 if (empty($charset)) { | |
1036 $charset = Configure::read('App.encoding'); | |
1037 } | |
1038 $charset = strtoupper($charset); | |
1039 | |
1040 $start = '=?' . $charset . '?B?'; | |
1041 $end = '?='; | |
1042 $spacer = $end . $newline . ' ' . $start; | |
1043 | |
1044 $length = 75 - strlen($start) - strlen($end); | |
1045 $length = $length - ($length % 4); | |
1046 if ($charset == 'UTF-8') { | |
1047 $parts = array(); | |
1048 $maxchars = floor(($length * 3) / 4); | |
1049 while (strlen($string) > $maxchars) { | |
1050 $i = $maxchars; | |
1051 $test = ord($string[$i]); | |
1052 while ($test >= 128 && $test <= 191) { | |
1053 $i--; | |
1054 $test = ord($string[$i]); | |
1055 } | |
1056 $parts[] = base64_encode(substr($string, 0, $i)); | |
1057 $string = substr($string, $i); | |
1058 } | |
1059 $parts[] = base64_encode($string); | |
1060 $string = implode($spacer, $parts); | |
1061 } else { | |
1062 $string = chunk_split(base64_encode($string), $length, $spacer); | |
1063 $string = preg_replace('/' . preg_quote($spacer) . '$/', '', $string); | |
1064 } | |
1065 return $start . $string . $end; | |
1066 } | |
1067 | |
1068 /** | |
1069 * Return the Code points range for Unicode characters | |
1070 * | |
1071 * @param interger $decimal | |
1072 * @return string | |
1073 * @access private | |
1074 */ | |
1075 function __codepoint($decimal) { | |
1076 if ($decimal > 128 && $decimal < 256) { | |
1077 $return = '0080_00ff'; // Latin-1 Supplement | |
1078 } elseif ($decimal < 384) { | |
1079 $return = '0100_017f'; // Latin Extended-A | |
1080 } elseif ($decimal < 592) { | |
1081 $return = '0180_024F'; // Latin Extended-B | |
1082 } elseif ($decimal < 688) { | |
1083 $return = '0250_02af'; // IPA Extensions | |
1084 } elseif ($decimal >= 880 && $decimal < 1024) { | |
1085 $return = '0370_03ff'; // Greek and Coptic | |
1086 } elseif ($decimal < 1280) { | |
1087 $return = '0400_04ff'; // Cyrillic | |
1088 } elseif ($decimal < 1328) { | |
1089 $return = '0500_052f'; // Cyrillic Supplement | |
1090 } elseif ($decimal < 1424) { | |
1091 $return = '0530_058f'; // Armenian | |
1092 } elseif ($decimal >= 7680 && $decimal < 7936) { | |
1093 $return = '1e00_1eff'; // Latin Extended Additional | |
1094 } elseif ($decimal < 8192) { | |
1095 $return = '1f00_1fff'; // Greek Extended | |
1096 } elseif ($decimal >= 8448 && $decimal < 8528) { | |
1097 $return = '2100_214f'; // Letterlike Symbols | |
1098 } elseif ($decimal < 8592) { | |
1099 $return = '2150_218f'; // Number Forms | |
1100 } elseif ($decimal >= 9312 && $decimal < 9472) { | |
1101 $return = '2460_24ff'; // Enclosed Alphanumerics | |
1102 } elseif ($decimal >= 11264 && $decimal < 11360) { | |
1103 $return = '2c00_2c5f'; // Glagolitic | |
1104 } elseif ($decimal < 11392) { | |
1105 $return = '2c60_2c7f'; // Latin Extended-C | |
1106 } elseif ($decimal < 11520) { | |
1107 $return = '2c80_2cff'; // Coptic | |
1108 } elseif ($decimal >= 65280 && $decimal < 65520) { | |
1109 $return = 'ff00_ffef'; // Halfwidth and Fullwidth Forms | |
1110 } else { | |
1111 $return = false; | |
1112 } | |
1113 $this->__codeRange[$decimal] = $return; | |
1114 return $return; | |
1115 } | |
1116 | |
1117 /** | |
1118 * Find the related code folding values for $char | |
1119 * | |
1120 * @param integer $char decimal value of character | |
1121 * @param string $type | |
1122 * @return array | |
1123 * @access private | |
1124 */ | |
1125 function __find($char, $type = 'lower') { | |
1126 $value = false; | |
1127 $found = array(); | |
1128 if (!isset($this->__codeRange[$char])) { | |
1129 $range = $this->__codepoint($char); | |
1130 if ($range === false) { | |
1131 return null; | |
1132 } | |
1133 Configure::load('unicode' . DS . 'casefolding' . DS . $range); | |
1134 $this->__caseFold[$range] = Configure::read($range); | |
1135 Configure::delete($range); | |
1136 } | |
1137 | |
1138 if (!$this->__codeRange[$char]) { | |
1139 return null; | |
1140 } | |
1141 $this->__table = $this->__codeRange[$char]; | |
1142 $count = count($this->__caseFold[$this->__table]); | |
1143 | |
1144 for ($i = 0; $i < $count; $i++) { | |
1145 if ($type === 'lower' && $this->__caseFold[$this->__table][$i][$type][0] === $char) { | |
1146 $found[] = $this->__caseFold[$this->__table][$i]; | |
1147 } elseif ($type === 'upper' && $this->__caseFold[$this->__table][$i][$type] === $char) { | |
1148 $found[] = $this->__caseFold[$this->__table][$i]; | |
1149 } | |
1150 } | |
1151 return $found; | |
1152 } | |
1153 | |
1154 /** | |
1155 * Check the $string for multibyte characters | |
1156 * @param string $string value to test | |
1157 * @return boolean | |
1158 * @access public | |
1159 * @static | |
1160 */ | |
1161 function checkMultibyte($string) { | |
1162 $length = strlen($string); | |
1163 | |
1164 for ($i = 0; $i < $length; $i++ ) { | |
1165 $value = ord(($string[$i])); | |
1166 if ($value > 128) { | |
1167 return true; | |
1168 } | |
1169 } | |
1170 return false; | |
1171 } | |
1172 } |