function UTF8toWIN1251_entities($utf = '') { if($utf == '' || !is_string($utf)) return($utf); $max_count = 5; // flag-bits in $max_mark ( 1111 1000 == 5 times 1) $max_mark = 248; // marker for a (theoretical ;-)) 5-byte-char and mask for a 4-byte-char; $html = ''; for($str_pos = 0; $str_pos < strlen($utf); $str_pos++) { $old_chr = $utf{$str_pos}; $old_val = ord( $utf{$str_pos} ); $new_val = 0; $utf8_marker = 0; // skip non-utf-8-chars if( $old_val > 127 ) { $mark = $max_mark; for($byte_ctr = $max_count; $byte_ctr > 2; $byte_ctr--) { // actual byte is utf-8-marker? if( ( $old_val & $mark ) == ( ($mark << 1) & 255 ) ) { $utf8_marker = $byte_ctr - 1; break; } $mark = ($mark << 1) & 255; } } // marker found: collect following bytes if($utf8_marker > 1 and isset( $utf{$str_pos + 1} ) ) { $str_off = 0; $new_val = $old_val & (127 >> $utf8_marker); for($byte_ctr = $utf8_marker; $byte_ctr > 1; $byte_ctr--) { // check if following chars are UTF8 additional data blocks // UTF8 and ord() > 127 if( (ord($utf{$str_pos + 1}) & 192) == 128 ) { $new_val = $new_val << 6; $str_off++; // no need for Addition, bitwise OR is sufficient // 63: more UTF8-bytes; 0011 1111 $new_val = $new_val | ( ord( $utf{$str_pos + $str_off} ) & 63 ); } // no UTF8, but ord() > 127 // nevertheless convert first char to NCE else { $new_val = $old_val; } } // build NCE-Code or cyrilics simbol for Win1251 if ($new_val == 1025) { $html .= chr(168); } elseif ($new_val == 1105) { $html .= chr(184); } elseif (1040 <= $new_val and $new_val <= 1103) { $html .= chr($new_val - 848); } else { $html .= ''.$new_val.';'; } // Skip additional UTF-8-Bytes $str_pos = $str_pos + $str_off; } else { $html .= chr($old_val); $new_val = $old_val; } } return($html); } function ya_pages($key){ $k=urlencode($key); $str=file_get_contents("http://yandex.ru/yandsearch?text={$k}"); $str=UTF8toWIN1251_entities($str); $GrabStart = "